@@ -115,7 +115,45 @@ class gil_safe_call_once_and_store {
115115// In this case, we should store the result per-interpreter instead of globally, because each
116116// subinterpreter has its own separate state. The cached result may not shareable across
117117// interpreters (e.g., imported modules and their members).
118- //
118+
119+ struct call_once_storage_base {
120+ call_once_storage_base () = default ;
121+ virtual ~call_once_storage_base () = default ;
122+ call_once_storage_base (const call_once_storage_base &) = delete ;
123+ call_once_storage_base (call_once_storage_base &&) = delete ;
124+ call_once_storage_base &operator =(const call_once_storage_base &) = delete ;
125+ call_once_storage_base &operator =(call_once_storage_base &&) = delete ;
126+ };
127+
128+ template <typename T>
129+ struct call_once_storage : call_once_storage_base {
130+ alignas (T) char storage[sizeof (T)] = {};
131+ std::once_flag once_flag;
132+ void (*finalize)(T &) = nullptr ;
133+ std::atomic_bool is_initialized{false };
134+
135+ call_once_storage () = default ;
136+ ~call_once_storage () override {
137+ if (is_initialized) {
138+ if (finalize != nullptr ) {
139+ finalize (*reinterpret_cast <T *>(storage));
140+ } else {
141+ reinterpret_cast <T *>(storage)->~T ();
142+ }
143+ }
144+ };
145+ call_once_storage (const call_once_storage &) = delete ;
146+ call_once_storage (call_once_storage &&) = delete ;
147+ call_once_storage &operator =(const call_once_storage &) = delete ;
148+ call_once_storage &operator =(call_once_storage &&) = delete ;
149+ };
150+
151+ // / Storage map for `gil_safe_call_once_and_store`. Stored in a capsule in the interpreter's state
152+ // / dict with proper destructor to ensure cleanup when the interpreter is destroyed.
153+ using call_once_storage_map_type = std::unordered_map<const void *, call_once_storage_base *>;
154+
155+ # define PYBIND11_CALL_ONCE_STORAGE_MAP_ID PYBIND11_INTERNALS_ID " call_once_storage_map__"
156+
119157// The life span of the stored result is the entire interpreter lifetime. An additional
120158// `finalize_fn` can be provided to clean up the stored result when the interpreter is destroyed.
121159template <typename T>
@@ -129,35 +167,33 @@ class gil_safe_call_once_and_store {
129167 // Multiple threads may enter here, because the GIL is released in the next line and
130168 // CPython API calls in the `fn()` call below may release and reacquire the GIL.
131169 gil_scoped_release gil_rel; // Needed to establish lock ordering.
132- detail::with_internals ([&](detail::internals &internals) {
133- const void *const key = reinterpret_cast <const void *>(this );
134- auto &storage_map = internals.call_once_storage_map ;
135- // There can be multiple threads going through here.
136- detail::call_once_storage<T> *value = nullptr ;
137- {
138- gil_scoped_acquire gil_acq;
139- // Only one thread will enter here at a time.
140- const auto it = storage_map.find (key);
141- if (it != storage_map.end ()) {
142- value = static_cast <detail::call_once_storage<T> *>(it->second );
143- } else {
144- value = new detail::call_once_storage<T>{};
145- storage_map.emplace (key, value);
146- }
170+ const void *const key = reinterpret_cast <const void *>(this );
171+ // There can be multiple threads going through here.
172+ call_once_storage<T> *value = nullptr ;
173+ {
174+ gil_scoped_acquire gil_acq;
175+ // Only one thread will enter here at a time.
176+ auto &storage_map = *get_or_create_call_once_storage_map ();
177+ const auto it = storage_map.find (key);
178+ if (it != storage_map.end ()) {
179+ value = static_cast <call_once_storage<T> *>(it->second );
180+ } else {
181+ value = new call_once_storage<T>{};
182+ storage_map.emplace (key, value);
147183 }
148- assert (value != nullptr );
149- std::call_once (value-> once_flag , [&] {
150- // Only one thread will ever enter here.
151- gil_scoped_acquire gil_acq;
152- // fn may release, but will reacquire, the GIL.
153- :: new (value-> storage ) T ( fn ());
154- value->finalize = finalize_fn ;
155- value->is_initialized = true ;
156- last_storage_ptr_ = reinterpret_cast <T *>( value->storage ) ;
157- is_initialized_by_atleast_one_interpreter_ = true ;
158- }) ;
184+ }
185+ assert (value != nullptr );
186+ std::call_once (value-> once_flag , [&] {
187+ // Only one thread will ever enter here.
188+ gil_scoped_acquire gil_acq;
189+ // fn may release, but will reacquire, the GIL.
190+ :: new ( value->storage ) T ( fn ()) ;
191+ value->finalize = finalize_fn ;
192+ value->is_initialized = true ;
193+ last_storage_ptr_ = reinterpret_cast <T *>(value-> storage ) ;
194+ is_initialized_by_atleast_one_interpreter_ = true ;
159195 });
160- // All threads will observe `is_initialized_by_atleast_one_interp_ ` as true here.
196+ // All threads will observe `is_initialized_by_atleast_one_interpreter_ ` as true here.
161197 }
162198 // Intentionally not returning `T &` to ensure the calling code is self-documenting.
163199 return *this ;
@@ -167,12 +203,11 @@ class gil_safe_call_once_and_store {
167203 T &get_stored () {
168204 T *result = last_storage_ptr_;
169205 if (!is_last_storage_valid ()) {
170- detail::with_internals ([&](detail::internals &internals) {
171- const void *const key = reinterpret_cast <const void *>(this );
172- auto &storage_map = internals.call_once_storage_map ;
173- auto *value = static_cast <detail::call_once_storage<T> *>(storage_map.at (key));
174- result = last_storage_ptr_ = reinterpret_cast <T *>(value->storage );
175- });
206+ gil_scoped_acquire gil_acq;
207+ const void *const key = reinterpret_cast <const void *>(this );
208+ auto &storage_map = *get_or_create_call_once_storage_map ();
209+ auto *value = static_cast <call_once_storage<T> *>(storage_map.at (key));
210+ result = last_storage_ptr_ = reinterpret_cast <T *>(value->storage );
176211 }
177212 assert (result != nullptr );
178213 return *result;
@@ -187,12 +222,43 @@ class gil_safe_call_once_and_store {
187222private:
188223 bool is_last_storage_valid () const {
189224 return is_initialized_by_atleast_one_interpreter_
190- && detail::get_num_interpreters_seen () <= 1 ;
225+ && detail::get_num_interpreters_seen () == 1 ;
226+ }
227+
228+ static inline call_once_storage_map_type *get_or_create_call_once_storage_map () {
229+ error_scope err_scope;
230+ dict state_dict = detail::get_python_state_dict ();
231+ auto storage_map_obj = reinterpret_steal<object>(
232+ detail::dict_getitemstringref (state_dict.ptr (), PYBIND11_CALL_ONCE_STORAGE_MAP_ID));
233+ call_once_storage_map_type *storage_map = nullptr ;
234+ if (storage_map_obj) {
235+ void *raw_ptr = PyCapsule_GetPointer (storage_map_obj.ptr (), /* name=*/ nullptr );
236+ if (!raw_ptr) {
237+ raise_from (PyExc_SystemError,
238+ " pybind11::gil_safe_call_once_and_store::"
239+ " get_or_create_call_once_storage_map() FAILED" );
240+ throw error_already_set ();
241+ }
242+ storage_map = reinterpret_cast <call_once_storage_map_type *>(raw_ptr);
243+ } else {
244+ storage_map = new call_once_storage_map_type ();
245+ // Create capsule with destructor to clean up the storage map when the interpreter
246+ // shuts down
247+ state_dict[PYBIND11_CALL_ONCE_STORAGE_MAP_ID]
248+ = capsule (storage_map, [](void *ptr) noexcept {
249+ auto *map = reinterpret_cast <call_once_storage_map_type *>(ptr);
250+ for (const auto &entry : *map) {
251+ delete entry.second ;
252+ }
253+ delete map;
254+ });
255+ }
256+ return storage_map;
191257 }
192258
193259 // No storage needed when subinterpreter support is enabled.
194- // The actual storage is stored in the per-interpreter state dict in
195- // `internals.call_once_storage_map `.
260+ // The actual storage is stored in the per-interpreter state dict via
261+ // `get_or_create_call_once_storage_map() `.
196262
197263 // Fast local cache to avoid repeated lookups when there are no multiple interpreters.
198264 // This is only valid if there is a single interpreter. Otherwise, it is not used.
0 commit comments