@@ -596,39 +596,46 @@ class internals_pp_manager {
596596 // / acquire the GIL. Will never return nullptr.
597597 std::unique_ptr<InternalsType> *get_pp () {
598598#ifdef PYBIND11_HAS_SUBINTERPRETER_SUPPORT
599- // WARNING: We cannot use `get_num_interpreters_seen() > 1` here to create a fast path for
600- // the single-interpreter case.
599+ // FIXME: We cannot use `get_num_interpreters_seen() > 1` here to create a fast path for
600+ // the multi-interpreter case. The singleton may be initialized by a subinterpreter not the
601+ // main interpreter.
601602 //
602603 // For multi-interpreter support, the subinterpreters can be initialized concurrently, and
603604 // the first time this function may not be called in the main interpreter.
604605 // For example, a clean main interpreter that does not import any pybind11 module and then
605606 // spawns multiple subinterpreters using `InterpreterPoolExecutor` that each imports a
606607 // pybind11 module concurrently.
607- //
608- // Multiple subinterpreters may observe `get_num_interpreters_seen() <= 1` at the same
609- // time, while `get_num_interpreters_seen() += 1` in `PYBIND11_MODULE(...)` is called
610- // later.
611-
612- // Whenever the interpreter changes on the current thread we need to invalidate the
613- // internals_pp so that it can be pulled from the interpreter's state dict. That is
614- // slow, so we use the current PyThreadState to check if it is necessary.
615- auto *tstate = get_thread_state_unchecked ();
616- if (!tstate || tstate->interp != last_istate_tls ()) {
617- gil_scoped_acquire_simple gil;
618- if (!tstate) {
619- tstate = get_thread_state_unchecked ();
608+ if (get_num_interpreters_seen () > 1 ) {
609+ // Whenever the interpreter changes on the current thread we need to invalidate the
610+ // internals_pp so that it can be pulled from the interpreter's state dict. That is
611+ // slow, so we use the current PyThreadState to check if it is necessary.
612+ auto *tstate = get_thread_state_unchecked ();
613+ if (!tstate || tstate->interp != last_istate_tls ()) {
614+ gil_scoped_acquire_simple gil;
615+ if (!tstate) {
616+ tstate = get_thread_state_unchecked ();
617+ }
618+ last_istate_tls () = tstate->interp ;
619+ internals_p_tls () = get_or_create_pp_in_state_dict ();
620620 }
621- last_istate_tls () = tstate->interp ;
622- internals_p_tls () = get_or_create_pp_in_state_dict ();
621+ return internals_p_tls ();
623622 }
624- return internals_p_tls ();
625- #else
626- if (!internals_singleton_pp_) {
627- gil_scoped_acquire_simple gil;
628- internals_singleton_pp_ = get_or_create_pp_in_state_dict ();
623+ #endif
624+ return get_pp_for_main_interpreter ();
625+ }
626+
627+ // / Get the pointer-to-pointer for the main interpreter, allocating it if it does not already
628+ // / exist. May acquire the GIL. Will never return nullptr.
629+ std::unique_ptr<InternalsType> *get_pp_for_main_interpreter () {
630+ // This function **assumes** that the current thread is running in the main interpreter.
631+ if (!seen_main_interpreter_) {
632+ std::call_once (seen_main_interpreter_flag_, [&] {
633+ gil_scoped_acquire_simple gil;
634+ internals_singleton_pp_ = get_or_create_pp_in_state_dict ();
635+ seen_main_interpreter_ = true ;
636+ });
629637 }
630638 return internals_singleton_pp_;
631- #endif
632639 }
633640
634641 // / Drop all the references we're currently holding.
@@ -705,6 +712,9 @@ class internals_pp_manager {
705712 char const *holder_id_ = nullptr ;
706713 on_fetch_function *on_fetch_ = nullptr ;
707714 std::unique_ptr<InternalsType> *internals_singleton_pp_;
715+
716+ std::once_flag seen_main_interpreter_flag_;
717+ std::atomic_bool seen_main_interpreter_{false };
708718};
709719
710720// If We loaded the internals through `state_dict`, our `error_already_set`
0 commit comments