From 98bf0d490a7db8106a3ccfebba557f0856e3560e Mon Sep 17 00:00:00 2001 From: Monishver Chandrasekaran Date: Wed, 14 Jan 2026 20:00:33 -0500 Subject: [PATCH 1/2] fix: Make legacy_default() and per_thread_default() return singletons - Fixes #1494 Signed-off-by: Monishver Chandrasekaran --- cuda_core/cuda/core/_stream.pyx | 51 +++++++++++++++++++++++++++++---- cuda_core/tests/test_stream.py | 36 ++++++++++++++++------- 2 files changed, 72 insertions(+), 15 deletions(-) diff --git a/cuda_core/cuda/core/_stream.pyx b/cuda_core/cuda/core/_stream.pyx index 05cbcce76a..65ca362121 100644 --- a/cuda_core/cuda/core/_stream.pyx +++ b/cuda_core/cuda/core/_stream.pyx @@ -117,17 +117,38 @@ cdef class Stream: complete, and all subsequent operations in blocking streams wait for the legacy default stream operation to complete. + This stream is useful for ensuring strict ordering of operations but + may limit concurrency. For better performance in concurrent scenarios, + consider using per_thread_default() or creating explicit streams. + + This method returns the same singleton instance on every call for the + base Stream class. Subclasses will receive new instances of the subclass + type that wrap the same underlying CUDA stream. + Returns ------- Stream - The legacy default stream instance for the current context. + The legacy default stream singleton instance for the current context. See Also -------- per_thread_default : Per-thread default stream alternative. + from_handle : Create stream from existing handle. + Examples + -------- + >>> from cuda.core import Stream + >>> stream1 = Stream.legacy_default() + >>> stream2 = Stream.legacy_default() + >>> stream1 is stream2 # True - returns same singleton + True """ - return Stream._from_handle(cls, get_legacy_stream()) + # Return the singleton for the base Stream class + if cls is Stream: + return C_LEGACY_DEFAULT_STREAM + # For subclasses, create a new instance of the subclass type + else: + return Stream._from_handle(cls, get_legacy_stream()) @classmethod def per_thread_default(cls): @@ -139,18 +160,38 @@ cdef class Stream: non-blocking stream. This allows for better concurrency in multi-threaded applications. + Each thread has its own per-thread default stream, enabling true + concurrent execution without implicit synchronization barriers. + + This method returns the same singleton instance on every call for the + base Stream class. Subclasses will receive new instances of the subclass + type that wrap the same underlying CUDA stream. + Returns ------- Stream - The per-thread default stream instance for the current thread - and context. + The per-thread default stream singleton instance for the current + thread and context. See Also -------- legacy_default : Legacy default stream alternative. + from_handle : Create stream from existing handle. + Examples + -------- + >>> from cuda.core import Stream + >>> stream1 = Stream.per_thread_default() + >>> stream2 = Stream.per_thread_default() + >>> stream1 is stream2 # True - returns same singleton + True """ - return Stream._from_handle(cls, get_per_thread_stream()) + # Return the singleton for the base Stream class + if cls is Stream: + return C_PER_THREAD_DEFAULT_STREAM + # For subclasses, create a new instance of the subclass type + else: + return Stream._from_handle(cls, get_per_thread_stream()) @classmethod def _init(cls, obj: IsStreamT | None = None, options=None, device_id: int = None, diff --git a/cuda_core/tests/test_stream.py b/cuda_core/tests/test_stream.py index a40910dbf4..dff6a3e3a4 100644 --- a/cuda_core/tests/test_stream.py +++ b/cuda_core/tests/test_stream.py @@ -130,19 +130,35 @@ class MyStream(Stream): def test_stream_legacy_default_public_api(init_cuda): - """Test public legacy_default() method.""" - stream = Stream.legacy_default() - assert isinstance(stream, Stream) - # Verify it's the same as LEGACY_DEFAULT_STREAM - assert stream == LEGACY_DEFAULT_STREAM + """Test public legacy_default() method returns singleton.""" + stream1 = Stream.legacy_default() + stream2 = Stream.legacy_default() + + assert isinstance(stream1, Stream) + assert isinstance(stream2, Stream) + + # Verify singleton behavior - same Python object + assert stream1 is stream2, "Should return same singleton instance" + + # Verify it's the same as the module constant + assert stream1 is LEGACY_DEFAULT_STREAM, "Should be the same object as LEGACY_DEFAULT_STREAM" + assert stream2 is LEGACY_DEFAULT_STREAM, "Should be the same object as LEGACY_DEFAULT_STREAM" def test_stream_per_thread_default_public_api(init_cuda): - """Test public per_thread_default() method.""" - stream = Stream.per_thread_default() - assert isinstance(stream, Stream) - # Verify it's the same as PER_THREAD_DEFAULT_STREAM - assert stream == PER_THREAD_DEFAULT_STREAM + """Test public per_thread_default() method returns singleton.""" + stream1 = Stream.per_thread_default() + stream2 = Stream.per_thread_default() + + assert isinstance(stream1, Stream) + assert isinstance(stream2, Stream) + + # Verify singleton behavior - same Python object + assert stream1 is stream2, "Should return same singleton instance" + + # Verify it's the same as the module constant + assert stream1 is PER_THREAD_DEFAULT_STREAM, "Should be the same object as PER_THREAD_DEFAULT_STREAM" + assert stream2 is PER_THREAD_DEFAULT_STREAM, "Should be the same object as PER_THREAD_DEFAULT_STREAM" # ============================================================================ From ff9b8af3986cb32fbba0ac3a5588ddcaaa587228 Mon Sep 17 00:00:00 2001 From: Monishver Chandrasekaran Date: Wed, 14 Jan 2026 22:50:52 -0500 Subject: [PATCH 2/2] fix to avoid the circular initialization issue Signed-off-by: Monishver Chandrasekaran --- cuda_core/cuda/core/_stream.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cuda_core/cuda/core/_stream.pyx b/cuda_core/cuda/core/_stream.pyx index 65ca362121..c3093e674a 100644 --- a/cuda_core/cuda/core/_stream.pyx +++ b/cuda_core/cuda/core/_stream.pyx @@ -455,8 +455,8 @@ cdef class Stream: # c-only python objects, not public -cdef Stream C_LEGACY_DEFAULT_STREAM = Stream.legacy_default() -cdef Stream C_PER_THREAD_DEFAULT_STREAM = Stream.per_thread_default() +cdef Stream C_LEGACY_DEFAULT_STREAM = Stream._from_handle(Stream, get_legacy_stream()) +cdef Stream C_PER_THREAD_DEFAULT_STREAM = Stream._from_handle(Stream, get_per_thread_stream()) # standard python objects, public LEGACY_DEFAULT_STREAM = C_LEGACY_DEFAULT_STREAM