diff --git a/cuda_core/cuda/core/__init__.py b/cuda_core/cuda/core/__init__.py index 67a815d1de..f22bbc7f16 100644 --- a/cuda_core/cuda/core/__init__.py +++ b/cuda_core/cuda/core/__init__.py @@ -61,4 +61,9 @@ ) from cuda.core._module import Kernel, ObjectCode # noqa: E402 from cuda.core._program import Program, ProgramOptions # noqa: E402 -from cuda.core._stream import Stream, StreamOptions # noqa: E402 +from cuda.core._stream import ( # noqa: E402 + LEGACY_DEFAULT_STREAM, + PER_THREAD_DEFAULT_STREAM, + Stream, + StreamOptions, +) diff --git a/cuda_core/cuda/core/_stream.pyx b/cuda_core/cuda/core/_stream.pyx index 2b5cdfabd2..d03fc80590 100644 --- a/cuda_core/cuda/core/_stream.pyx +++ b/cuda_core/cuda/core/_stream.pyx @@ -107,49 +107,13 @@ cdef class Stream: return s @classmethod - def legacy_default(cls): - """Return the legacy default stream. - - The legacy default stream is an implicit stream which synchronizes - with all other streams in the same CUDA context except for non-blocking - streams. When any operation is launched on the legacy default stream, - it waits for all previously launched operations in blocking streams to - complete, and all subsequent operations in blocking streams wait for - the legacy default stream operation to complete. - - Returns - ------- - Stream - The legacy default stream instance for the current context. - - See Also - -------- - per_thread_default : Per-thread default stream alternative. - - """ + def _legacy_default(cls): + """Return the legacy default stream (supports subclassing).""" return Stream._from_handle(cls, get_legacy_stream()) @classmethod - def per_thread_default(cls): - """Return the per-thread default stream. - - The per-thread default stream is local to both the calling thread and - the CUDA context. Unlike the legacy default stream, it does not - synchronize with other streams and behaves like an explicitly created - non-blocking stream. This allows for better concurrency in multi-threaded - applications. - - Returns - ------- - Stream - The per-thread default stream instance for the current thread - and context. - - See Also - -------- - legacy_default : Legacy default stream alternative. - - """ + def _per_thread_default(cls): + """Return the per-thread default stream (supports subclassing).""" return Stream._from_handle(cls, get_per_thread_stream()) @classmethod @@ -404,8 +368,8 @@ cdef class Stream: # c-only python objects, not public -cdef Stream C_LEGACY_DEFAULT_STREAM = Stream.legacy_default() -cdef Stream C_PER_THREAD_DEFAULT_STREAM = Stream.per_thread_default() +cdef Stream C_LEGACY_DEFAULT_STREAM = Stream._legacy_default() +cdef Stream C_PER_THREAD_DEFAULT_STREAM = Stream._per_thread_default() # standard python objects, public LEGACY_DEFAULT_STREAM = C_LEGACY_DEFAULT_STREAM diff --git a/cuda_core/docs/source/release/0.6.0-notes.rst b/cuda_core/docs/source/release/0.6.0-notes.rst new file mode 100644 index 0000000000..375394abaf --- /dev/null +++ b/cuda_core/docs/source/release/0.6.0-notes.rst @@ -0,0 +1,33 @@ +.. SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +.. SPDX-License-Identifier: Apache-2.0 + +.. currentmodule:: cuda.core + +``cuda.core`` 0.6.0 Release Notes +================================== + +New features +------------ + +- Added public access to default CUDA streams via module-level constants ``LEGACY_DEFAULT_STREAM`` and ``PER_THREAD_DEFAULT_STREAM`` + + Users can now access default streams directly from the ``cuda.core`` namespace: + + .. code-block:: python + + from cuda.core import LEGACY_DEFAULT_STREAM, PER_THREAD_DEFAULT_STREAM + + # Use legacy default stream (synchronizes with all blocking streams) + LEGACY_DEFAULT_STREAM.sync() + + # Use per-thread default stream (non-blocking, thread-local) + PER_THREAD_DEFAULT_STREAM.sync() + + The legacy default stream synchronizes with all blocking streams in the same CUDA context, ensuring strict ordering but potentially limiting concurrency. The per-thread default stream is local to the calling thread and does not synchronize with other streams, enabling concurrent execution in multi-threaded applications. + + This replaces the previous undocumented workaround of using ``Stream.from_handle(0)`` to access the legacy default stream. + +Fixes and enhancements +----------------------- + +None. diff --git a/cuda_core/tests/test_stream.py b/cuda_core/tests/test_stream.py index a40910dbf4..925daa7cd5 100644 --- a/cuda_core/tests/test_stream.py +++ b/cuda_core/tests/test_stream.py @@ -117,7 +117,7 @@ def test_stream_legacy_default_subclassing(): class MyStream(Stream): pass - stream = MyStream.legacy_default() + stream = MyStream._legacy_default() assert isinstance(stream, MyStream) @@ -125,26 +125,10 @@ def test_stream_per_thread_default_subclassing(): class MyStream(Stream): pass - stream = MyStream.per_thread_default() + stream = MyStream._per_thread_default() assert isinstance(stream, MyStream) -def test_stream_legacy_default_public_api(init_cuda): - """Test public legacy_default() method.""" - stream = Stream.legacy_default() - assert isinstance(stream, Stream) - # Verify it's the same as LEGACY_DEFAULT_STREAM - assert stream == LEGACY_DEFAULT_STREAM - - -def test_stream_per_thread_default_public_api(init_cuda): - """Test public per_thread_default() method.""" - stream = Stream.per_thread_default() - assert isinstance(stream, Stream) - # Verify it's the same as PER_THREAD_DEFAULT_STREAM - assert stream == PER_THREAD_DEFAULT_STREAM - - # ============================================================================ # Stream Equality Tests # ============================================================================