diff --git a/.github/workflows/cibuildwheel.yml b/.github/workflows/cibuildwheel.yml index d054472..724214d 100644 --- a/.github/workflows/cibuildwheel.yml +++ b/.github/workflows/cibuildwheel.yml @@ -14,7 +14,6 @@ jobs: contents: write actions: read with: - pure-python: false + wheel-name-pattern: 'cymem-*.whl' secrets: gh-token: ${{ secrets.GITHUB_TOKEN }} - diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index b73968c..65c00e3 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -24,7 +24,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, windows-latest, macos-latest] - python_version: ["3.9", "3.10", "3.11", "3.12", "3.13"] + python_version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14", "3.14t"] exclude: # The 3.9/3.10 arm64 Python binaries that `setup-python` tries to use # don't work on macOS 15 anymore, so skip those. diff --git a/README.md b/README.md index 9e21837..6997a26 100644 --- a/README.md +++ b/README.md @@ -195,3 +195,28 @@ objects, but we'd still like the laziness of the `Pool`. from cymem.cymem cimport Pool, WrapMalloc, WrapFree cdef Pool mem = Pool(WrapMalloc(priv_malloc), WrapFree(priv_free)) ``` + +## Thread Safety + +As of version 2.0.12, `cymem.Pool` is thread-safe when used with CPython 3.13+ +free-threaded builds (PEP 703). All operations on the Pool, including `alloc()`, +`free()`, and `realloc()`, can be safely called from multiple threads concurrently. + +**Key guarantees:** +- Multiple threads can safely call `alloc()`, `free()`, and `realloc()` on the + same `Pool` instance. +- The Pool's internal bookkeeping (`addresses` dict and `size` accounting) is + protected from race conditions. Reading the internal state without + holding a lock on the `Pool` instance via a critical section is not + thread-safe. + +**Important notes:** +- Individual Pool instances are thread-safe, but you are still responsible for + proper synchronization when accessing the memory contents themselves. Note + that holding a lock on the Pool itself is typically not the right approach: + since malloc is thread-safe, memory allocated by separate calls to `alloc` + can be safely accessed concurrently without locking. You should only + synchronize access to specific memory regions that are being shared across + threads, using fine-grained locks appropriate to your use case rather than a + coarse-grained lock on the entire `Pool`. +- Custom memory allocators need to be thread-safe themselves. diff --git a/cymem/cymem.pyx b/cymem/cymem.pyx index e2470fb..5657d3a 100644 --- a/cymem/cymem.pyx +++ b/cymem/cymem.pyx @@ -1,7 +1,7 @@ -# cython: embedsignature=True +# cython: embedsignature=True, freethreading_compatible=True +cimport cython from cpython.mem cimport PyMem_Malloc, PyMem_Free -from cpython.ref cimport Py_INCREF, Py_DECREF from libc.string cimport memset from libc.string cimport memcpy import warnings @@ -44,17 +44,29 @@ cdef class Pool: addresses (dict): The currently allocated addresses and their sizes. Read-only. pymalloc (PyMalloc): The allocator to use (default uses PyMem_Malloc). pyfree (PyFree): The free to use (default uses PyMem_Free). + + Thread-safety: + All public methods in this class can be safely called from multiple threads. + Testing the thread-safety of this class can be done by checking out the repository + at https://github.com/lysnikolaou/test-cymem-threadsafety and following the + instructions on the README. """ def __cinit__(self, PyMalloc pymalloc=Default_Malloc, PyFree pyfree=Default_Free): + # size, addresses and refs are mutable. note that operations on + # dicts and lists are atomic. self.size = 0 self.addresses = {} self.refs = [] + + # pymalloc and pyfree are immutable. self.pymalloc = pymalloc self.pyfree = pyfree def __dealloc__(self): + # No need for locking here since the methods will be unreachable + # by the time __dealloc__ is called cdef size_t addr if self.addresses is not None: for addr in self.addresses: @@ -73,8 +85,17 @@ cdef class Pool: if p == NULL: raise MemoryError("Error assigning %d bytes" % (number * elem_size)) memset(p, 0, number * elem_size) - self.addresses[p] = number * elem_size - self.size += number * elem_size + + # We need a critical section here so that addresses and size get + # updated atomically. If we were to acquire a critical section on self, + # mutating the dictionary would try to acquire a critical section on + # the dictionary and therefore release the critical section on self. + # Acquiring a critical section on self.addresses works because that's + # the only C API that gets called inside the block and acquiring the + # critical section on the top-most held lock does not release it. + with cython.critical_section(self.addresses): + self.addresses[p] = number * elem_size + self.size += number * elem_size return p cdef void* realloc(self, void* p, size_t new_size) except NULL: @@ -82,19 +103,36 @@ cdef class Pool: a non-NULL pointer to the new block. new_size must be larger than the original. - If p is not in the Pool or new_size is 0, a MemoryError is raised. + If p is not in the Pool or new_size isn't larger than the previous size, + a ValueError is raised. """ - if p not in self.addresses: - raise ValueError("Pointer %d not found in Pool %s" % (p, self.addresses)) - if new_size == 0: - raise ValueError("Realloc requires new_size > 0") - assert new_size > self.addresses[p] - cdef void* new_ptr = self.alloc(1, new_size) + cdef size_t old_size + cdef void* new_ptr + + new_ptr = self.pymalloc.malloc(new_size) if new_ptr == NULL: raise MemoryError("Error reallocating to %d bytes" % new_size) - memcpy(new_ptr, p, self.addresses[p]) - self.free(p) - self.addresses[new_ptr] = new_size + + # See comment in alloc on why we're acquiring a critical section on + # self.addresses instead of self. + with cython.critical_section(self.addresses): + try: + old_size = self.addresses.pop(p) + except KeyError: + self.pyfree.free(new_ptr) + raise ValueError("Pointer %d not found in Pool %s" % (p, self.addresses)) + + if old_size >= new_size: + self.addresses[p] = old_size + self.pyfree.free(new_ptr) + raise ValueError("Realloc requires new_size > previous size") + + memcpy(new_ptr, p, old_size) + memset( new_ptr + old_size, 0, new_size - old_size) + self.size += new_size - old_size + self.addresses[new_ptr] = new_size + + self.pyfree.free(p) return new_ptr cdef void free(self, void* p) except *: @@ -105,10 +143,17 @@ cdef class Pool: If p is not in Pool.addresses, a KeyError is raised. """ - self.size -= self.addresses.pop(p) + cdef size_t size + + # See comment in alloc on why we're acquiring a critical section on + # self.addresses instead of self. + with cython.critical_section(self.addresses): + size = self.addresses.pop(p) + self.size -= size self.pyfree.free(p) def own_pyref(self, object py_ref): + # Calling append here is atomic, no critical section needed. self.refs.append(py_ref) @@ -142,9 +187,9 @@ cdef class Address: raise MemoryError("Error assigning %d bytes" % number * elem_size) memset(self.ptr, 0, number * elem_size) - property addr: - def __get__(self): - return self.ptr + @property + def addr(self): + return self.ptr def __dealloc__(self): if self.ptr != NULL: diff --git a/pyproject.toml b/pyproject.toml index ab36d78..3e85038 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [build-system] requires = [ "setuptools", - "cython>=0.25", + "cython>=3.1", ] build-backend = "setuptools.build_meta" @@ -9,7 +9,7 @@ build-backend = "setuptools.build_meta" build = "*" skip = "pp* cp36* cp37* cp38*" test-skip = "" -free-threaded-support = false +enable = ["cpython-freethreading"] archs = ["native"] diff --git a/requirements.txt b/requirements.txt index 94694dc..ddeb607 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ -cython>=0.25 +cython>=3.1 pytest diff --git a/setup.py b/setup.py index df40294..4d6bad5 100755 --- a/setup.py +++ b/setup.py @@ -100,8 +100,8 @@ def setup_package(): version=about["__version__"], url=about["__uri__"], license=about["__license__"], - ext_modules=cythonize(ext_modules, language_level=2), - setup_requires=["cython>=0.25"], + ext_modules=cythonize(ext_modules), + setup_requires=["cython>=3.1"], classifiers=[ "Environment :: Console", "Intended Audience :: Developers", @@ -116,6 +116,8 @@ def setup_package(): "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", + "Programming Language :: Python :: Free Threading :: 2 - Beta", "Topic :: Scientific/Engineering", ], cmdclass={"build_ext": build_ext_subclass},