Skip to content

Commit 1f18585

Browse files
committed
Move call-once storage out of internals
1 parent 1aed3ab commit 1f18585

File tree

2 files changed

+104
-77
lines changed

2 files changed

+104
-77
lines changed

include/pybind11/detail/internals.h

Lines changed: 1 addition & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -234,38 +234,6 @@ inline uint64_t round_up_to_next_pow2(uint64_t x) {
234234

235235
class loader_life_support;
236236

237-
struct call_once_storage_base {
238-
call_once_storage_base() = default;
239-
virtual ~call_once_storage_base() = default;
240-
call_once_storage_base(const call_once_storage_base &) = delete;
241-
call_once_storage_base(call_once_storage_base &&) = delete;
242-
call_once_storage_base &operator=(const call_once_storage_base &) = delete;
243-
call_once_storage_base &operator=(call_once_storage_base &&) = delete;
244-
};
245-
246-
template <typename T>
247-
struct call_once_storage : call_once_storage_base {
248-
alignas(T) char storage[sizeof(T)] = {};
249-
std::once_flag once_flag;
250-
void (*finalize)(T &) = nullptr;
251-
std::atomic_bool is_initialized{false};
252-
253-
call_once_storage() = default;
254-
~call_once_storage() override {
255-
if (is_initialized) {
256-
if (finalize != nullptr) {
257-
finalize(*reinterpret_cast<T *>(storage));
258-
} else {
259-
reinterpret_cast<T *>(storage)->~T();
260-
}
261-
}
262-
};
263-
call_once_storage(const call_once_storage &) = delete;
264-
call_once_storage(call_once_storage &&) = delete;
265-
call_once_storage &operator=(const call_once_storage &) = delete;
266-
call_once_storage &operator=(call_once_storage &&) = delete;
267-
};
268-
269237
/// Internal data structure used to track registered instances and types.
270238
/// Whenever binary incompatible changes are made to this structure,
271239
/// `PYBIND11_INTERNALS_VERSION` must be incremented.
@@ -310,8 +278,6 @@ struct internals {
310278

311279
type_map<PyObject *> native_enum_type_map;
312280

313-
std::unordered_map<const void *, call_once_storage_base *> call_once_storage_map;
314-
315281
internals()
316282
: static_property_type(make_static_property_type()),
317283
default_metaclass(make_default_metaclass()) {
@@ -337,12 +303,7 @@ struct internals {
337303
internals(internals &&other) = delete;
338304
internals &operator=(const internals &other) = delete;
339305
internals &operator=(internals &&other) = delete;
340-
~internals() {
341-
for (const auto &entry : call_once_storage_map) {
342-
delete entry.second;
343-
}
344-
call_once_storage_map.clear();
345-
}
306+
~internals() = default;
346307
};
347308

348309
// the internals struct (above) is shared between all the modules. local_internals are only

include/pybind11/gil_safe_call_once.h

Lines changed: 103 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,45 @@ class gil_safe_call_once_and_store {
115115
// In this case, we should store the result per-interpreter instead of globally, because each
116116
// subinterpreter has its own separate state. The cached result may not shareable across
117117
// interpreters (e.g., imported modules and their members).
118-
//
118+
119+
struct call_once_storage_base {
120+
call_once_storage_base() = default;
121+
virtual ~call_once_storage_base() = default;
122+
call_once_storage_base(const call_once_storage_base &) = delete;
123+
call_once_storage_base(call_once_storage_base &&) = delete;
124+
call_once_storage_base &operator=(const call_once_storage_base &) = delete;
125+
call_once_storage_base &operator=(call_once_storage_base &&) = delete;
126+
};
127+
128+
template <typename T>
129+
struct call_once_storage : call_once_storage_base {
130+
alignas(T) char storage[sizeof(T)] = {};
131+
std::once_flag once_flag;
132+
void (*finalize)(T &) = nullptr;
133+
std::atomic_bool is_initialized{false};
134+
135+
call_once_storage() = default;
136+
~call_once_storage() override {
137+
if (is_initialized) {
138+
if (finalize != nullptr) {
139+
finalize(*reinterpret_cast<T *>(storage));
140+
} else {
141+
reinterpret_cast<T *>(storage)->~T();
142+
}
143+
}
144+
};
145+
call_once_storage(const call_once_storage &) = delete;
146+
call_once_storage(call_once_storage &&) = delete;
147+
call_once_storage &operator=(const call_once_storage &) = delete;
148+
call_once_storage &operator=(call_once_storage &&) = delete;
149+
};
150+
151+
/// Storage map for `gil_safe_call_once_and_store`. Stored in a capsule in the interpreter's state
152+
/// dict with proper destructor to ensure cleanup when the interpreter is destroyed.
153+
using call_once_storage_map_type = std::unordered_map<const void *, call_once_storage_base *>;
154+
155+
# define PYBIND11_CALL_ONCE_STORAGE_MAP_ID PYBIND11_INTERNALS_ID "call_once_storage_map__"
156+
119157
// The life span of the stored result is the entire interpreter lifetime. An additional
120158
// `finalize_fn` can be provided to clean up the stored result when the interpreter is destroyed.
121159
template <typename T>
@@ -129,35 +167,33 @@ class gil_safe_call_once_and_store {
129167
// Multiple threads may enter here, because the GIL is released in the next line and
130168
// CPython API calls in the `fn()` call below may release and reacquire the GIL.
131169
gil_scoped_release gil_rel; // Needed to establish lock ordering.
132-
detail::with_internals([&](detail::internals &internals) {
133-
const void *const key = reinterpret_cast<const void *>(this);
134-
auto &storage_map = internals.call_once_storage_map;
135-
// There can be multiple threads going through here.
136-
detail::call_once_storage<T> *value = nullptr;
137-
{
138-
gil_scoped_acquire gil_acq;
139-
// Only one thread will enter here at a time.
140-
const auto it = storage_map.find(key);
141-
if (it != storage_map.end()) {
142-
value = static_cast<detail::call_once_storage<T> *>(it->second);
143-
} else {
144-
value = new detail::call_once_storage<T>{};
145-
storage_map.emplace(key, value);
146-
}
170+
const void *const key = reinterpret_cast<const void *>(this);
171+
// There can be multiple threads going through here.
172+
call_once_storage<T> *value = nullptr;
173+
{
174+
gil_scoped_acquire gil_acq;
175+
// Only one thread will enter here at a time.
176+
auto &storage_map = *get_or_create_call_once_storage_map();
177+
const auto it = storage_map.find(key);
178+
if (it != storage_map.end()) {
179+
value = static_cast<call_once_storage<T> *>(it->second);
180+
} else {
181+
value = new call_once_storage<T>{};
182+
storage_map.emplace(key, value);
147183
}
148-
assert(value != nullptr);
149-
std::call_once(value->once_flag, [&] {
150-
// Only one thread will ever enter here.
151-
gil_scoped_acquire gil_acq;
152-
// fn may release, but will reacquire, the GIL.
153-
::new (value->storage) T(fn());
154-
value->finalize = finalize_fn;
155-
value->is_initialized = true;
156-
last_storage_ptr_ = reinterpret_cast<T *>(value->storage);
157-
is_initialized_by_atleast_one_interpreter_ = true;
158-
});
184+
}
185+
assert(value != nullptr);
186+
std::call_once(value->once_flag, [&] {
187+
// Only one thread will ever enter here.
188+
gil_scoped_acquire gil_acq;
189+
// fn may release, but will reacquire, the GIL.
190+
::new (value->storage) T(fn());
191+
value->finalize = finalize_fn;
192+
value->is_initialized = true;
193+
last_storage_ptr_ = reinterpret_cast<T *>(value->storage);
194+
is_initialized_by_atleast_one_interpreter_ = true;
159195
});
160-
// All threads will observe `is_initialized_by_atleast_one_interp_` as true here.
196+
// All threads will observe `is_initialized_by_atleast_one_interpreter_` as true here.
161197
}
162198
// Intentionally not returning `T &` to ensure the calling code is self-documenting.
163199
return *this;
@@ -167,12 +203,11 @@ class gil_safe_call_once_and_store {
167203
T &get_stored() {
168204
T *result = last_storage_ptr_;
169205
if (!is_last_storage_valid()) {
170-
detail::with_internals([&](detail::internals &internals) {
171-
const void *const key = reinterpret_cast<const void *>(this);
172-
auto &storage_map = internals.call_once_storage_map;
173-
auto *value = static_cast<detail::call_once_storage<T> *>(storage_map.at(key));
174-
result = last_storage_ptr_ = reinterpret_cast<T *>(value->storage);
175-
});
206+
gil_scoped_acquire gil_acq;
207+
const void *const key = reinterpret_cast<const void *>(this);
208+
auto &storage_map = *get_or_create_call_once_storage_map();
209+
auto *value = static_cast<call_once_storage<T> *>(storage_map.at(key));
210+
result = last_storage_ptr_ = reinterpret_cast<T *>(value->storage);
176211
}
177212
assert(result != nullptr);
178213
return *result;
@@ -187,12 +222,43 @@ class gil_safe_call_once_and_store {
187222
private:
188223
bool is_last_storage_valid() const {
189224
return is_initialized_by_atleast_one_interpreter_
190-
&& detail::get_num_interpreters_seen() <= 1;
225+
&& detail::get_num_interpreters_seen() == 1;
226+
}
227+
228+
static inline call_once_storage_map_type *get_or_create_call_once_storage_map() {
229+
error_scope err_scope;
230+
dict state_dict = detail::get_python_state_dict();
231+
auto storage_map_obj = reinterpret_steal<object>(
232+
detail::dict_getitemstringref(state_dict.ptr(), PYBIND11_CALL_ONCE_STORAGE_MAP_ID));
233+
call_once_storage_map_type *storage_map = nullptr;
234+
if (storage_map_obj) {
235+
void *raw_ptr = PyCapsule_GetPointer(storage_map_obj.ptr(), /*name=*/nullptr);
236+
if (!raw_ptr) {
237+
raise_from(PyExc_SystemError,
238+
"pybind11::gil_safe_call_once_and_store::"
239+
"get_or_create_call_once_storage_map() FAILED");
240+
throw error_already_set();
241+
}
242+
storage_map = reinterpret_cast<call_once_storage_map_type *>(raw_ptr);
243+
} else {
244+
storage_map = new call_once_storage_map_type();
245+
// Create capsule with destructor to clean up the storage map when the interpreter
246+
// shuts down
247+
state_dict[PYBIND11_CALL_ONCE_STORAGE_MAP_ID]
248+
= capsule(storage_map, [](void *ptr) noexcept {
249+
auto *map = reinterpret_cast<call_once_storage_map_type *>(ptr);
250+
for (const auto &entry : *map) {
251+
delete entry.second;
252+
}
253+
delete map;
254+
});
255+
}
256+
return storage_map;
191257
}
192258

193259
// No storage needed when subinterpreter support is enabled.
194-
// The actual storage is stored in the per-interpreter state dict in
195-
// `internals.call_once_storage_map`.
260+
// The actual storage is stored in the per-interpreter state dict via
261+
// `get_or_create_call_once_storage_map()`.
196262

197263
// Fast local cache to avoid repeated lookups when there are no multiple interpreters.
198264
// This is only valid if there is a single interpreter. Otherwise, it is not used.

0 commit comments

Comments
 (0)