From e3d72ff2d3d8c8707cb4d24c76881d26b66a2c00 Mon Sep 17 00:00:00 2001 From: Shayne Fletcher Date: Fri, 19 Dec 2025 09:17:15 -0800 Subject: [PATCH 1/3] : global: reoroder precedences Env > ClientOverride, fix doc inconsistencies (#2182) Summary: key change here is precedence: Env wins over ClientOverride. everything else is cleanup to make the code and docs say the same thing and to reduce the chance we "remember" the wrong order later. tests are updated to encode the new behavior. Reviewed By: mariusae Differential Revision: D89551496 --- hyperactor_config/src/global.rs | 267 ++++++++++++++-------------- hyperactor_mesh/src/v1/host_mesh.rs | 9 + 2 files changed, 142 insertions(+), 134 deletions(-) diff --git a/hyperactor_config/src/global.rs b/hyperactor_config/src/global.rs index 2346c1955..224c2595b 100644 --- a/hyperactor_config/src/global.rs +++ b/hyperactor_config/src/global.rs @@ -10,7 +10,8 @@ //! //! This module provides the process-wide configuration store and APIs //! to access it. Configuration values are resolved via a **layered -//! model**: `TestOverride → Runtime → Env → File → Default`. +//! model**: `TestOverride → Env → Runtime → File → ClientOverride → +//! Default`. //! //! - Reads (`get`, `get_cloned`) consult layers in that order, falling //! back to defaults if no explicit value is set. @@ -21,14 +22,14 @@ //! that are removed automatically when the guard drops. //! - In normal operation, a parent process can capture its effective //! config via `attrs()` and pass that snapshot to a child during -//! bootstrap. The child installs it as a `Runtime` layer so the -//! parent's values take precedence over Env/File/Defaults. +//! bootstrap. The child installs it as a `ClientOverride` layer. +//! Note that Env and Runtime layers will take precedence over this +//! inherited configuration. //! //! This design provides flexibility (easy test overrides, runtime //! updates, YAML/Env baselines) while ensuring type safety and //! predictable resolution order. //! -//! //! # Testing //! //! Tests can override global configuration using [`lock`]. This @@ -62,7 +63,7 @@ use crate::from_yaml; /// Configuration source layers in priority order. /// -/// Resolution order is always: **TestOverride -> Runtime -> Env +/// Resolution order is always: **TestOverride -> Env -> Runtime /// -> File -> ClientOverride -> Default**. /// /// Smaller `priority()` number = higher precedence. @@ -74,11 +75,11 @@ pub enum Source { /// Values loaded from configuration files (e.g., YAML). File, /// Values read from environment variables at process startup. - /// Higher priority than File and ClientOverride, but lower than - /// Runtime/TestOverride. + /// Higher priority than Runtime, File, and ClientOverride, but + /// lower than TestOverride. Env, - /// Values set programmatically at runtime. Highest stable - /// priority layer; only overridden by TestOverride. + /// Values set programmatically at runtime. High-priority layer + /// but overridden by Env and TestOverride. Runtime, /// Ephemeral values inserted by tests via /// `ConfigLock::override_key`. Always wins over all other @@ -88,13 +89,14 @@ pub enum Source { /// Return the numeric priority for a source. /// -/// Smaller number = higher precedence. Matches the documented -/// order: TestOverride (0) -> Runtime (1) -> Env (2) -> File (3) -> ClientOverride (4). +/// Smaller number = higher precedence. Matches the documented order: +/// TestOverride (0) -> Env (1) -> Runtime (2) -> File (3) -> +/// ClientOverride (4). fn priority(s: Source) -> u8 { match s { Source::TestOverride => 0, - Source::Runtime => 1, - Source::Env => 2, + Source::Env => 1, + Source::Runtime => 2, Source::File => 3, Source::ClientOverride => 4, } @@ -105,9 +107,9 @@ fn priority(s: Source) -> u8 { /// `Layers` wraps a vector of [`Layer`]s, always kept sorted by /// [`priority`] (lowest number = highest precedence). /// -/// Resolution (`get`, `get_cloned`, `attrs`) consults `ordered` -/// from front to back, returning the first value found for each -/// key and falling back to defaults if none are set in any layer. +/// Resolution (`get`, `get_cloned`, `attrs`) consults `ordered` from +/// front to back, returning the first value found for each key and +/// falling back to defaults if none are set in any layer. struct Layers { /// Kept sorted by `priority` (lowest number first = highest /// priority). @@ -116,18 +118,17 @@ struct Layers { /// A single configuration layer in the global configuration model. /// -/// Layers are consulted in priority order (`TestOverride → Runtime → -/// Env → File → Default`) when resolving configuration values. Each -/// variant holds an [`Attrs`] map of key/value pairs. +/// Layers are consulted in priority order (`TestOverride → Env → +/// Runtime → File → ClientOverride → Default`) when resolving +/// configuration values. Each variant holds an [`Attrs`] map of +/// key/value pairs. /// /// The `TestOverride` variant additionally maintains per-key override -/// stacks to support nested and out-of-order test overrides. These -/// stacks are currently placeholders for a future refactor; for now, -/// only the `attrs` field is used. +/// stacks to support nested and out-of-order test overrides. /// /// Variants: -/// - [`Layer::ClientOverride`] - Values set by the config snapshot sent from the client -/// during proc bootstrap. +/// - [`Layer::ClientOverride`] - Values set by the config snapshot +/// sent from the client during proc bootstrap. /// - [`Layer::File`] — Values loaded from configuration files. /// - [`Layer::Env`] — Values sourced from process environment /// variables. @@ -136,7 +137,8 @@ struct Layers { /// under [`ConfigLock`]. /// /// Layers are stored in [`Layers::ordered`], kept sorted by their -/// effective [`Source`] priority (`TestOverride` first, `Default` last). +/// effective [`Source`] priority (`TestOverride` first, `Default` +/// last). enum Layer { /// Values set by the config snapshot sent from the client /// during proc bootstrap. @@ -149,15 +151,16 @@ enum Layer { /// installed at startup via [`init_from_env`]. Env(Attrs), - /// Values set programmatically at runtime. Stable high-priority - /// layer used by parent/child bootstrap and dynamic updates. + /// Values set programmatically at runtime. High-priority layer + /// used for dynamic updates (e.g., Python `configure()` API), but + /// overridden by Env and TestOverride layers. Runtime(Attrs), /// Ephemeral values inserted during tests via /// [`ConfigLock::override_key`]. Always takes precedence over all - /// other layers. Currently holds both the active `attrs` map and - /// a per-key `stacks` table (used to support nested or - /// out-of-order test overrides in future refactors). + /// other layers. Holds both the active `attrs` map and a per-key + /// `stacks` table to support nested and out-of-order test + /// overrides. TestOverride { attrs: Attrs, stacks: HashMap<&'static str, OverrideStack>, @@ -179,10 +182,6 @@ enum Layer { /// first override was applied (or `None` if it did not exist). /// - `frames`: The stack of active override frames, with the top /// being the last element in the vector. -/// -/// The full stack mechanism is not yet active; it is introduced -/// incrementally to prepare for robust out-of-order test override -/// restoration. struct OverrideStack { /// The name of the process environment variable associated with /// this configuration key, if any. @@ -244,9 +243,9 @@ struct OverrideFrame { /// Return the [`Source`] corresponding to a given [`Layer`]. /// /// This provides a uniform way to retrieve a layer's logical source -/// (File, Env, Runtime, or TestOverride) regardless of its internal -/// representation. Used for sorting layers by priority and for -/// source-based lookups or removals. +/// (File, Env, Runtime, TestOverride, or ClientOverride) regardless +/// of its internal representation. Used for sorting layers by +/// priority and for source-based lookups or removals. fn layer_source(l: &Layer) -> Source { match l { Layer::File(_) => Source::File, @@ -301,24 +300,24 @@ fn test_override_index(layers: &Layers) -> Option { /// Global layered configuration store. /// -/// This is the single authoritative store for configuration in -/// the process. It is always present, protected by an `RwLock`, -/// and holds a [`Layers`] struct containing all active sources. +/// This is the single authoritative store for configuration in the +/// process. It is always present, protected by an `RwLock`, and holds +/// a [`Layers`] struct containing all active sources. /// /// On startup it is seeded with a single [`Source::Env`] layer /// (values loaded from process environment variables). Additional /// layers can be installed later via [`set`] or cleared with -/// [`clear`]. Reads (`get`, `get_cloned`, `attrs`) consult the -/// layers in priority order. +/// [`clear`]. Reads (`get`, `get_cloned`, `attrs`) consult the layers +/// in priority order. /// -/// In tests, a [`Source::TestOverride`] layer is pushed on demand -/// by [`ConfigLock::override_key`]. This layer always takes -/// precedence and is automatically removed when the guard drops. +/// In tests, a [`Source::TestOverride`] layer is pushed on demand by +/// [`ConfigLock::override_key`]. This layer always takes precedence +/// and is automatically removed when the guard drops. /// -/// In normal operation, a parent process may capture its config -/// with [`attrs`] and pass it to a child during bootstrap. The -/// child installs this snapshot as its [`Source::Runtime`] layer, -/// ensuring the parent's values override Env/File/Defaults. +/// In normal operation, a parent process may capture its config with +/// [`attrs`] and pass it to a child during bootstrap. The child +/// installs this snapshot as its [`Source::ClientOverride`] layer, +/// which has the lowest precedence among explicit layers. static LAYERS: LazyLock>> = LazyLock::new(|| { let env = from_env(); let layers = Layers { @@ -339,17 +338,15 @@ static OVERRIDE_TOKEN_SEQ: AtomicU64 = AtomicU64::new(1); /// Acquire the global configuration lock. /// -/// This lock serializes all mutations of the global -/// configuration, ensuring they cannot clobber each other. It -/// returns a [`ConfigLock`] guard, which must be held for the -/// duration of any mutation (e.g. inserting or overriding -/// values). +/// This lock serializes all mutations of the global configuration, +/// ensuring they cannot clobber each other. It returns a +/// [`ConfigLock`] guard, which must be held for the duration of any +/// mutation (e.g. inserting or overriding values). /// -/// Most commonly used in tests, where it provides exclusive -/// access to push a [`Source::TestOverride`] layer via -/// [`ConfigLock::override_key`]. The override layer is -/// automatically removed when the guard drops, restoring the -/// original state. +/// Most commonly used in tests, where it provides exclusive access to +/// push a [`Source::TestOverride`] layer via +/// [`ConfigLock::override_key`]. The override layer is automatically +/// removed when the guard drops, restoring the original state. /// /// # Example /// ```rust,ignore @@ -371,7 +368,7 @@ pub fn lock() -> ConfigLock { /// `CONFIG.env_name` (from `@meta(CONFIG = ConfigAttr { … })`) to /// determine its mapping. The resulting values are installed as the /// [`Source::Env`] layer. Keys without a corresponding environment -/// variable fall back to defaults or higher-priority sources. +/// variable fall back to lower-priority sources or defaults. /// /// Typically invoked once at process startup to overlay config values /// from the environment. Repeated calls replace the existing Env @@ -382,15 +379,13 @@ pub fn init_from_env() { /// Initialize the global configuration from a YAML file. /// -/// Loads values from the specified YAML file and installs them as -/// the [`Source::File`] layer. This is the lowest-priority -/// explicit source: values from Env, Runtime, or TestOverride -/// layers always take precedence. Keys not present in the file -/// fall back to their defaults or higher-priority sources. +/// Loads values from the specified YAML file and installs them as the +/// [`Source::File`] layer. During resolution, File is consulted after +/// TestOverride, Env, and Runtime layers, and before ClientOverride +/// and defaults. /// -/// Typically invoked once at process startup to provide a -/// baseline configuration. Repeated calls replace the existing -/// File layer. +/// Typically invoked once at process startup to provide a baseline +/// configuration. Repeated calls replace the existing File layer. pub fn init_from_yaml>(path: P) -> Result<(), anyhow::Error> { let file = from_yaml(path)?; set(Source::File, file); @@ -399,9 +394,9 @@ pub fn init_from_yaml>(path: P) -> Result<(), anyhow::Error> { /// Get a key from the global configuration (Copy types). /// -/// Resolution order: TestOverride -> Runtime -> Env -> File -> -/// Default. Panics if the key has no default and is not set in -/// any layer. +/// Resolution order: TestOverride -> Env -> Runtime -> File -> +/// ClientOverride -> Default. Panics if the key has no default and is +/// not set in any layer. pub fn get(key: Key) -> T { let layers = LAYERS.read().unwrap(); for layer in &layers.ordered { @@ -432,9 +427,9 @@ pub fn override_or_global(overrides: &Attrs, key: Key) - /// Get a key by cloning the value. /// -/// Resolution order: TestOverride -> Runtime -> Env -> File -> -/// Default. Panics if the key has no default and is not set in -/// any layer. +/// Resolution order: TestOverride -> Env -> Runtime -> File -> +/// ClientOverride -> Default. Panics if the key has no default and +/// is not set in any layer. pub fn get_cloned(key: Key) -> T { try_get_cloned(key) .expect("key must have a default") @@ -443,9 +438,9 @@ pub fn get_cloned(key: Key) -> T { /// Try to get a key by cloning the value. /// -/// Resolution order: TestOverride -> Runtime -> Env -> File -> -/// Default. Returns None if the key has no default and is not set in -/// any layer. +/// Resolution order: TestOverride -> Env -> Runtime -> File -> +/// ClientOverride -> Default. Returns None if the key has no default +/// and is not set in any layer. pub fn try_get_cloned(key: Key) -> Option { let layers = LAYERS.read().unwrap(); for layer in &layers.ordered { @@ -477,16 +472,16 @@ fn make_layer(source: Source, attrs: Attrs) -> Layer { /// Insert or replace a configuration layer for the given source. /// -/// If a layer with the same [`Source`] already exists, its -/// contents are replaced with the provided `attrs`. Otherwise a -/// new layer is added. After insertion, layers are re-sorted so -/// that higher-priority sources (e.g. [`Source::TestOverride`], -/// [`Source::Runtime`]) appear before lower-priority ones -/// ([`Source::Env`], [`Source::File`]). +/// If a layer with the same [`Source`] already exists, its contents +/// are replaced with the provided `attrs`. Otherwise a new layer is +/// added. After insertion, layers are re-sorted so that +/// higher-priority sources (e.g. [`Source::TestOverride`], +/// [`Source::Env`]) appear before lower-priority ones +/// ([`Source::Runtime`], [`Source::File`]). /// /// This function is used by initialization routines (e.g. -/// `init_from_env`, `init_from_yaml`) and by tests when -/// overriding configuration values. +/// `init_from_env`, `init_from_yaml`) and by tests when overriding +/// configuration values. pub fn set(source: Source, attrs: Attrs) { let mut g = LAYERS.write().unwrap(); if let Some(l) = g.ordered.iter_mut().find(|l| layer_source(l) == source) { @@ -494,7 +489,7 @@ pub fn set(source: Source, attrs: Attrs) { } else { g.ordered.push(make_layer(source, attrs)); } - g.ordered.sort_by_key(|l| priority(layer_source(l))); // TestOverride < Runtime < Env < File < ClientOverride + g.ordered.sort_by_key(|l| priority(layer_source(l))); // TestOverride < Env < Runtime < File < ClientOverride } /// Insert or update a configuration layer for the given [`Source`]. @@ -521,16 +516,16 @@ pub fn create_or_merge(source: Source, attrs: Attrs) { } else { g.ordered.push(make_layer(source, attrs)); } - g.ordered.sort_by_key(|l| priority(layer_source(l))); // TestOverride < Runtime < Env < File < ClientOverride + g.ordered.sort_by_key(|l| priority(layer_source(l))); // TestOverride < Env < Runtime < File < ClientOverride } /// Remove the configuration layer for the given [`Source`], if /// present. /// -/// After this call, values from that source will no longer -/// contribute to resolution in [`get`], [`get_cloned`], or -/// [`attrs`]. Defaults and any remaining layers continue to apply -/// in their normal priority order. +/// After this call, values from that source will no longer contribute +/// to resolution in [`get`], [`get_cloned`], or [`attrs`]. Defaults +/// and any remaining layers continue to apply in their normal +/// priority order. pub fn clear(source: Source) { let mut g = LAYERS.write().unwrap(); g.ordered.retain(|l| layer_source(l) != source); @@ -540,8 +535,8 @@ pub fn clear(source: Source) { /// **(only keys marked with `@meta(CONFIG = ...)`)**. /// /// Resolution per key: -/// 1) First explicit value found in layers (TestOverride → -/// Runtime → Env → File). +/// 1) First explicit value found in layers (TestOverride → Env → +/// Runtime → File → ClientOverride). /// 2) Otherwise, the key's default (if any). /// /// Notes: @@ -552,8 +547,8 @@ pub fn attrs() -> Attrs { let layers = LAYERS.read().unwrap(); let mut merged = Attrs::new(); - // Iterate all declared keys (registered via `declare_attrs!` - // + inventory). + // Iterate all declared keys (registered via `declare_attrs!` + + // inventory). for info in inventory::iter::() { // Skip keys not marked as `CONFIG`. if info.meta.get(CONFIG).is_none() { @@ -571,8 +566,8 @@ pub fn attrs() -> Attrs { } } - // If no explicit value, materialize the default if there - // is one. + // If no explicit value, materialize the default if there is + // one. let boxed = match chosen { Some(b) => b, None => { @@ -622,12 +617,12 @@ pub fn runtime_attrs() -> Attrs { /// Reset the global configuration to only Defaults (for testing). /// -/// This clears all explicit layers (`File`, `Env`, `Runtime`, and -/// `TestOverride`). Subsequent lookups will resolve keys entirely -/// from their declared defaults. +/// This clears all explicit layers (`File`, `Env`, `Runtime`, +/// `ClientOverride`, and `TestOverride`). Subsequent lookups will +/// resolve keys entirely from their declared defaults. /// -/// Note: Should be called while holding [`global::lock`] in -/// tests, to ensure no concurrent modifications happen. +/// Note: Should be called while holding [`global::lock`] in tests, to +/// ensure no concurrent modifications happen. pub fn reset_to_defaults() { let mut g = LAYERS.write().unwrap(); g.ordered.clear(); @@ -729,16 +724,16 @@ impl ConfigLock { } /// When a [`ConfigLock`] is dropped, the special -/// [`Source::TestOverride`] layer (if present) is removed -/// entirely. This discards all temporary overrides created under -/// the lock, ensuring they cannot leak into subsequent tests or -/// callers. Other layers (`Runtime`, `Env`, `File`, and defaults) +/// [`Source::TestOverride`] layer (if present) is removed entirely. +/// This discards all temporary overrides created under the lock, +/// ensuring they cannot leak into subsequent tests or callers. Other +/// layers (`Runtime`, `Env`, `File`, `ClientOverride`, and defaults) /// are left untouched. /// -/// Note: individual values within the TestOverride layer may -/// already have been restored by [`ConfigValueGuard`]s as they -/// drop. This final removal guarantees no residual layer remains -/// once the lock itself is released. +/// Note: individual values within the TestOverride layer may already +/// have been restored by [`ConfigValueGuard`]s as they drop. This +/// final removal guarantees no residual layer remains once the lock +/// itself is released. impl Drop for ConfigLock { fn drop(&mut self) { let mut guard = LAYERS.write().unwrap(); @@ -748,7 +743,7 @@ impl Drop for ConfigLock { } } -/// A guard that restores a single configuration value when dropped +/// A guard that restores a single configuration value when dropped. pub struct ConfigValueGuard<'a, T: 'static> { key: crate::attrs::Key, token: u64, @@ -838,7 +833,7 @@ impl Drop for ConfigValueGuard<'_, T> { // No changes to attrs or env here. } } // else: token already handled; nothing to do - } // &must stack borrow ends here + } // &mut stack borrow ends here // If we emptied the stack for this key, restore env and drop // the stack entry. @@ -867,9 +862,9 @@ mod tests { use crate::ConfigAttr; use crate::attrs::declare_attrs; - // Test configuration keys used to exercise the layered config infrastructure. - // These mirror hyperactor's config keys but are declared locally to keep - // hyperactor_config independent. + // Test configuration keys used to exercise the layered config + // infrastructure. These mirror hyperactor's config keys but are + // declared locally to keep hyperactor_config independent. declare_attrs! { /// Maximum frame length for codec @@ -919,14 +914,16 @@ mod tests { fn test_global_config() { let config = lock(); - // Reset global config to defaults to avoid interference from other tests + // Reset global config to defaults to avoid interference from + // other tests reset_to_defaults(); assert_eq!(get(CODEC_MAX_FRAME_LENGTH), CODEC_MAX_FRAME_LENGTH_DEFAULT); { let _guard = config.override_key(CODEC_MAX_FRAME_LENGTH, 1024); assert_eq!(get(CODEC_MAX_FRAME_LENGTH), 1024); - // The configuration will be automatically restored when _guard goes out of scope + // The configuration will be automatically restored when + // _guard goes out of scope } assert_eq!(get(CODEC_MAX_FRAME_LENGTH), CODEC_MAX_FRAME_LENGTH_DEFAULT); @@ -936,7 +933,8 @@ mod tests { fn test_overrides() { let config = lock(); - // Reset global config to defaults to avoid interference from other tests + // Reset global config to defaults to avoid interference from + // other tests reset_to_defaults(); // Test the new lock/override API for individual config values @@ -1047,18 +1045,18 @@ mod tests { env[MESSAGE_DELIVERY_TIMEOUT] = Duration::from_secs(40); set(Source::Env, env); - // Runtime beats both. + // Runtime layer (but Env beats it). let mut rt = Attrs::new(); rt[MESSAGE_DELIVERY_TIMEOUT] = Duration::from_secs(50); set(Source::Runtime, rt); - assert_eq!(get(MESSAGE_DELIVERY_TIMEOUT), Duration::from_secs(50)); + assert_eq!(get(MESSAGE_DELIVERY_TIMEOUT), Duration::from_secs(40)); - // Clearing Runtime should reveal Env again. - clear(Source::Runtime); + // Clearing Env should reveal Runtime. + clear(Source::Env); - // With the Runtime layer gone, Env still wins over File. - assert_eq!(get(MESSAGE_DELIVERY_TIMEOUT), Duration::from_secs(40)); + // With the Env layer gone, Runtime wins over File. + assert_eq!(get(MESSAGE_DELIVERY_TIMEOUT), Duration::from_secs(50)); } #[test] @@ -1083,7 +1081,7 @@ mod tests { } #[test] - fn test_parent_child_snapshot_as_runtime_layer() { + fn test_parent_child_snapshot_as_clientoverride_layer() { let _lock = lock(); reset_to_defaults(); @@ -1095,12 +1093,13 @@ mod tests { let parent_snap = attrs(); // "Child" process: start clean, install parent snapshot as - // Runtime. + // ClientOverride. reset_to_defaults(); - set(Source::Runtime, parent_snap); + set(Source::ClientOverride, parent_snap); - // Child should observe parent's effective value (as highest - // stable layer). + // Child should observe parent's effective value from the + // ClientOverride layer (since child has no Env/Runtime/File + // layers set). assert_eq!(get(MESSAGE_ACK_EVERY_N_MESSAGES), 12345); } @@ -1151,8 +1150,8 @@ mod tests { rt[SPLIT_MAX_BUFFER_SIZE] = 9; set(Source::Runtime, rt); - // Sanity: highest wins. - assert_eq!(get(SPLIT_MAX_BUFFER_SIZE), 9); + // Sanity: Env wins over Runtime and File. + assert_eq!(get(SPLIT_MAX_BUFFER_SIZE), 8); // Reset clears all explicit layers; defaults apply. reset_to_defaults(); @@ -1332,9 +1331,9 @@ mod tests { #[test] fn test_priority_order() { use Source::*; - assert!(priority(TestOverride) < priority(Runtime)); - assert!(priority(Runtime) < priority(Env)); - assert!(priority(Env) < priority(File)); + assert!(priority(TestOverride) < priority(Env)); + assert!(priority(Env) < priority(Runtime)); + assert!(priority(Runtime) < priority(File)); assert!(priority(File) < priority(ClientOverride)); } diff --git a/hyperactor_mesh/src/v1/host_mesh.rs b/hyperactor_mesh/src/v1/host_mesh.rs index 6e2cd7cbe..a70376094 100644 --- a/hyperactor_mesh/src/v1/host_mesh.rs +++ b/hyperactor_mesh/src/v1/host_mesh.rs @@ -1640,6 +1640,15 @@ mod tests { Duration::from_mins(1), ); + // Unset env vars that were mirrored by TestOverride, so child + // processes don't inherit them. This allows Runtime layer to + // override ClientOverride. SAFETY: Single-threaded test under + // global config lock. + unsafe { + std::env::remove_var("HYPERACTOR_HOST_SPAWN_READY_TIMEOUT"); + std::env::remove_var("HYPERACTOR_MESSAGE_DELIVERY_TIMEOUT"); + } + let instance = testing::instance().await; let proc_meshes = testing::proc_meshes(instance, extent!(replicas = 2)).await; From e6ae3e72948f8e599027df46f8aa40eb7e227ff2 Mon Sep 17 00:00:00 2001 From: Shayne Fletcher Date: Fri, 19 Dec 2025 09:17:15 -0800 Subject: [PATCH 2/3] : global: close a few test gaps (#2183) Summary: fill in a few missing tests Differential Revision: D89553139 --- hyperactor_config/src/global.rs | 205 ++++++++++++++++++++++++++++++++ 1 file changed, 205 insertions(+) diff --git a/hyperactor_config/src/global.rs b/hyperactor_config/src/global.rs index 224c2595b..f7e77255c 100644 --- a/hyperactor_config/src/global.rs +++ b/hyperactor_config/src/global.rs @@ -908,6 +908,13 @@ mod tests { py_name: None, }) pub attr MESSAGE_TTL_DEFAULT: u8 = 64; + + /// A test key with no environment variable mapping + @meta(CONFIG = ConfigAttr { + env_name: None, + py_name: None, + }) + pub attr CONFIG_KEY_NO_ENV: u32 = 100; } #[test] @@ -1369,4 +1376,202 @@ mod tests { assert_eq!(get(MESSAGE_TTL_DEFAULT), 42); } + + #[test] + fn test_clientoverride_precedence_loses_to_all_other_layers() { + let _lock = lock(); + reset_to_defaults(); + + // ClientOverride sets a baseline value. + let mut client = Attrs::new(); + client[MESSAGE_TTL_DEFAULT] = 10; + set(Source::ClientOverride, client); + assert_eq!(get(MESSAGE_TTL_DEFAULT), 10); + + // File should beat ClientOverride. + let mut file = Attrs::new(); + file[MESSAGE_TTL_DEFAULT] = 20; + set(Source::File, file); + assert_eq!(get(MESSAGE_TTL_DEFAULT), 20); + + // Runtime should beat both File and ClientOverride. + let mut runtime = Attrs::new(); + runtime[MESSAGE_TTL_DEFAULT] = 30; + set(Source::Runtime, runtime); + assert_eq!(get(MESSAGE_TTL_DEFAULT), 30); + + // Env should beat Runtime, File, and ClientOverride. + let mut env = Attrs::new(); + env[MESSAGE_TTL_DEFAULT] = 40; + set(Source::Env, env); + assert_eq!(get(MESSAGE_TTL_DEFAULT), 40); + + // Clear higher layers one by one to verify fallback. + clear(Source::Env); + assert_eq!(get(MESSAGE_TTL_DEFAULT), 30); // Runtime + + clear(Source::Runtime); + assert_eq!(get(MESSAGE_TTL_DEFAULT), 20); // File + + clear(Source::File); + assert_eq!(get(MESSAGE_TTL_DEFAULT), 10); // ClientOverride + } + + #[test] + fn test_create_or_merge_clientoverride() { + let _lock = lock(); + reset_to_defaults(); + + // Seed ClientOverride with one key. + let mut client = Attrs::new(); + client[MESSAGE_TTL_DEFAULT] = 10; + set(Source::ClientOverride, client); + + // Merge in a different key. + let mut update = Attrs::new(); + update[MESSAGE_ACK_EVERY_N_MESSAGES] = 123; + create_or_merge(Source::ClientOverride, update); + + // Both keys should now be visible. + assert_eq!(get(MESSAGE_TTL_DEFAULT), 10); + assert_eq!(get(MESSAGE_ACK_EVERY_N_MESSAGES), 123); + } + + #[test] + fn test_override_or_global_returns_override_when_present() { + let _lock = lock(); + reset_to_defaults(); + + // Set a global value via Env. + let mut env = Attrs::new(); + env[MESSAGE_TTL_DEFAULT] = 99; + set(Source::Env, env); + + // Create an override Attrs with a different value. + let mut overrides = Attrs::new(); + overrides[MESSAGE_TTL_DEFAULT] = 42; + + // Should return the override value, not global. + assert_eq!(override_or_global(&overrides, MESSAGE_TTL_DEFAULT), 42); + } + + #[test] + fn test_override_or_global_returns_global_when_not_present() { + let _lock = lock(); + reset_to_defaults(); + + // Set a global value via Env. + let mut env = Attrs::new(); + env[MESSAGE_TTL_DEFAULT] = 99; + set(Source::Env, env); + + // Empty overrides. + let overrides = Attrs::new(); + + // Should return the global value. + assert_eq!(override_or_global(&overrides, MESSAGE_TTL_DEFAULT), 99); + } + + #[test] + fn test_runtime_attrs_returns_only_runtime_layer() { + let _lock = lock(); + reset_to_defaults(); + + // Set values in multiple layers. + let mut file = Attrs::new(); + file[MESSAGE_TTL_DEFAULT] = 10; + set(Source::File, file); + + let mut env = Attrs::new(); + env[SPLIT_MAX_BUFFER_SIZE] = 20; + set(Source::Env, env); + + let mut runtime = Attrs::new(); + runtime[MESSAGE_ACK_EVERY_N_MESSAGES] = 123; + set(Source::Runtime, runtime); + + // runtime_attrs() should return only Runtime layer contents. + let rt = runtime_attrs(); + + // Should have the Runtime key. + assert_eq!(rt[MESSAGE_ACK_EVERY_N_MESSAGES], 123); + + // Should NOT have File or Env keys. + assert!(!rt.contains_key(MESSAGE_TTL_DEFAULT)); + assert!(!rt.contains_key(SPLIT_MAX_BUFFER_SIZE)); + } + + #[test] + fn test_override_key_without_env_name_does_not_mirror_to_env() { + let lock = lock(); + reset_to_defaults(); + + // Verify default value. + assert_eq!(get(CONFIG_KEY_NO_ENV), 100); + + // Override the key (which has no env_name). + let _guard = lock.override_key(CONFIG_KEY_NO_ENV, 999); + + // Should see the override value. + assert_eq!(get(CONFIG_KEY_NO_ENV), 999); + + // No env var should have been set (test doesn't crash, + // behavior is clean). This test mainly ensures no panic + // occurs during override/restore. + + drop(_guard); + + // Should restore to default. + assert_eq!(get(CONFIG_KEY_NO_ENV), 100); + } + + #[test] + fn test_multiple_different_keys_overridden_simultaneously() { + let lock = lock(); + reset_to_defaults(); + + // SAFETY: single-threaded test. + unsafe { + std::env::remove_var("HYPERACTOR_CODEC_MAX_FRAME_LENGTH"); + std::env::remove_var("HYPERACTOR_MESSAGE_TTL_DEFAULT"); + } + + // Override multiple different keys at once. + let guard1 = lock.override_key(CODEC_MAX_FRAME_LENGTH, 1111); + let guard2 = lock.override_key(MESSAGE_TTL_DEFAULT, 42); + let guard3 = lock.override_key(CHANNEL_MULTIPART, false); + + // All should reflect their override values. + assert_eq!(get(CODEC_MAX_FRAME_LENGTH), 1111); + assert_eq!(get(MESSAGE_TTL_DEFAULT), 42); + assert_eq!(get(CHANNEL_MULTIPART), false); + + // Env vars should be mirrored. + assert_eq!( + std::env::var("HYPERACTOR_CODEC_MAX_FRAME_LENGTH").unwrap(), + "1111" + ); + assert_eq!( + std::env::var("HYPERACTOR_MESSAGE_TTL_DEFAULT").unwrap(), + "42" + ); + + // Drop guards in arbitrary order. + drop(guard2); // Drop MESSAGE_TTL_DEFAULT first + + // MESSAGE_TTL_DEFAULT should restore, others should remain. + assert_eq!(get(MESSAGE_TTL_DEFAULT), MESSAGE_TTL_DEFAULT_DEFAULT); + assert_eq!(get(CODEC_MAX_FRAME_LENGTH), 1111); + assert_eq!(get(CHANNEL_MULTIPART), false); + + // Env for MESSAGE_TTL_DEFAULT should be cleared. + assert!(std::env::var("HYPERACTOR_MESSAGE_TTL_DEFAULT").is_err()); + + drop(guard1); + drop(guard3); + + // All should be restored. + assert_eq!(get(CODEC_MAX_FRAME_LENGTH), CODEC_MAX_FRAME_LENGTH_DEFAULT); + assert_eq!(get(CHANNEL_MULTIPART), CHANNEL_MULTIPART_DEFAULT); + } } From f7566dc7948947d252dfc3b90251214c86faa330 Mon Sep 17 00:00:00 2001 From: Shayne Fletcher Date: Fri, 19 Dec 2025 09:17:15 -0800 Subject: [PATCH 3/3] : config: add python support for configuration variable types (#2184) Summary: this adds python-side config support for a couple of additional rust types. specifically, it introduces `PyEncoding` to bridge string values onto `hyperactor::data::Encoding`, and `PyPortRange` to bridge python `(start, end)` tuples or `"start..end"` strings onto `Range`. both are wired into `declare_py_config_type!` so they work with `monarch.configure(...)`, and the new tests cover parsing, validation, and round-tripping for the supported formats. Differential Revision: D89555029 --- monarch_hyperactor/src/config.rs | 366 +++++++++++++++++++++++++++++-- 1 file changed, 353 insertions(+), 13 deletions(-) diff --git a/monarch_hyperactor/src/config.rs b/monarch_hyperactor/src/config.rs index 84daf8282..786225c85 100644 --- a/monarch_hyperactor/src/config.rs +++ b/monarch_hyperactor/src/config.rs @@ -38,6 +38,125 @@ use pyo3::prelude::*; use crate::channel::PyBindSpec; +/// Python wrapper for Encoding enum. +/// +/// This type bridges between Python strings (e.g., "bincode", +/// "serde_json", "serde_multipart") and Rust's +/// `hyperactor::data::Encoding` enum. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct PyEncoding(pub hyperactor::data::Encoding); + +impl From for hyperactor::data::Encoding { + fn from(e: PyEncoding) -> Self { + e.0 + } +} + +impl From for PyEncoding { + fn from(e: hyperactor::data::Encoding) -> Self { + PyEncoding(e) + } +} + +impl<'py> FromPyObject<'py> for PyEncoding { + fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult { + let s: String = ob.extract()?; + let encoding = s.parse::().map_err(|e| { + PyValueError::new_err(format!( + "Invalid encoding '{}': {}. Valid values: bincode, serde_json, serde_multipart", + s, e + )) + })?; + Ok(PyEncoding(encoding)) + } +} + +impl<'py> IntoPyObject<'py> for PyEncoding { + type Target = PyAny; + type Output = Bound<'py, Self::Target>; + type Error = PyErr; + + fn into_pyobject(self, py: Python<'py>) -> Result { + let formatted = self.0.to_string(); + formatted.into_bound_py_any(py) + } +} + +/// Python wrapper for Range, using tuple or string format. +/// +/// This type bridges between Python and Rust's +/// `std::ops::Range`. +/// Accepts either: +/// - Tuple: `(8000, 9000)` +/// - String: `"8000..9000"` +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct PyPortRange(pub std::ops::Range); + +impl From for std::ops::Range { + fn from(r: PyPortRange) -> Self { + r.0 + } +} + +impl From> for PyPortRange { + fn from(r: std::ops::Range) -> Self { + PyPortRange(r) + } +} + +impl<'py> FromPyObject<'py> for PyPortRange { + fn extract_bound(ob: &Bound<'py, PyAny>) -> PyResult { + // Try tuple format first: (start, end) + if let Ok((start, end)) = ob.extract::<(u16, u16)>() { + if start >= end { + return Err(PyValueError::new_err(format!( + "Invalid port range ({}, {}): start must be less than end", + start, end + ))); + } + return Ok(PyPortRange(start..end)); + } + + // Fall back to string format: "start..end" + let s: String = ob.extract().map_err(|_| { + PyTypeError::new_err( + "Port range must be either a tuple (start, end) or string 'start..end'", + ) + })?; + let parts: Vec<&str> = s.split("..").collect(); + if parts.len() != 2 { + return Err(PyValueError::new_err(format!( + "Invalid port range format '{}': expected 'start..end'", + s + ))); + } + let start = parts[0].parse::().map_err(|e| { + PyValueError::new_err(format!("Invalid start port '{}': {}", parts[0], e)) + })?; + let end = parts[1].parse::().map_err(|e| { + PyValueError::new_err(format!("Invalid end port '{}': {}", parts[1], e)) + })?; + if start >= end { + return Err(PyValueError::new_err(format!( + "Invalid port range '{}': start must be less than end", + s + ))); + } + Ok(PyPortRange(start..end)) + } +} + +impl<'py> IntoPyObject<'py> for PyPortRange { + type Target = PyAny; + type Output = Bound<'py, Self::Target>; + type Error = PyErr; + + fn into_pyobject(self, py: Python<'py>) -> Result { + let formatted = format!("{}..{}", self.0.start, self.0.end); + formatted.into_bound_py_any(py) + } +} + /// Python wrapper for Duration, using humantime format strings. /// /// This type bridges between Python strings (e.g., "30s", "5m") and @@ -118,8 +237,9 @@ static KEY_BY_NAME: std::sync::LazyLock` and use it to get/set values in the global configl +/// Map from typehash to an info struct that can be used to downcast +/// an `ErasedKey` to a concrete `Key` and use it to get/set values +/// in the global configl static TYPEHASH_TO_INFO: std::sync::LazyLock> = std::sync::LazyLock::new(|| { inventory::iter::() @@ -163,8 +283,8 @@ where /// /// This mirrors [`get_global_config_py`] but restricts the lookup to /// the `Source::Runtime` layer (ignoring -/// TestOverride/Env/File/defaults). If the key has a runtime -/// override, it is cloned as `T`, converted to `P`, then to a +/// TestOverride/Env/File/ClientOverride/defaults). If the key has a +/// runtime override, it is cloned as `T`, converted to `P`, then to a /// `PyObject`; otherwise `Ok(None)` is returned. fn get_runtime_config_py<'py, P, T>( py: Python<'py>, @@ -190,7 +310,7 @@ where /// This is the write-path for the "Python configuration layer": it /// takes a typed key/value and merges it into `Source::Runtime` via /// `create_or_merge`. No other layers -/// (Env/File/TestOverride/Defaults) are affected. +/// (Env/File/TestOverride/ClientOverride/Defaults) are affected. fn set_runtime_config_py( key: &'static dyn ErasedKey, value: T, @@ -285,14 +405,13 @@ struct PythonConfigTypeInfo { // `TYPEHASH_TO_INFO` via `inventory::iter()`. inventory::collect!(PythonConfigTypeInfo); -/// Macro to declare that keys of this type can be configured -/// from python using `monarch.configure(...)`. For types -/// like `String` that are convertible directly to/from PyObjects, -/// you can just use `declare_py_config_type!(String)`. For types -/// that must first be converted to/from a rust python wrapper -/// (e.g., keys with type `BindSpec` must use `PyBindSpec` -/// as an intermediate step), the usage is -/// `declare_py_config_type!(PyBindSpec as BindSpec)`. +/// Macro to declare that keys of this type can be configured from +/// python using `monarch.configure(...)`. For types like `String` +/// that are convertible directly to/from PyObjects, you can just use +/// `declare_py_config_type!(String)`. For types that must first be +/// converted to/from a rust python wrapper (e.g., keys with type +/// `BindSpec` must use `PyBindSpec` as an intermediate step), the +/// usage is `declare_py_config_type!(PyBindSpec as BindSpec)`. macro_rules! declare_py_config_type { ($($ty:ty),+ $(,)?) => { hyperactor::paste! { @@ -344,6 +463,8 @@ macro_rules! declare_py_config_type { declare_py_config_type!(PyBindSpec as BindSpec); declare_py_config_type!(PyDuration as Duration); +declare_py_config_type!(PyEncoding as hyperactor::data::Encoding); +declare_py_config_type!(PyPortRange as std::ops::Range::); declare_py_config_type!( i8, i16, i32, i64, u8, u16, u32, u64, usize, f32, f64, bool, String ); @@ -514,3 +635,222 @@ pub fn register_python_bindings(module: &Bound<'_, PyModule>) -> PyResult<()> { Ok(()) } + +#[cfg(test)] +mod tests { + use std::time::Duration; + + use pyo3::prelude::*; + use pyo3::types::PyDict; + use pyo3::types::PyString; + use pyo3::types::PyTuple; + + use super::*; + + #[test] + fn test_pyduration_parse_valid_formats() { + pyo3::prepare_freethreaded_python(); + Python::with_gil(|py| { + // Test various valid duration formats + let s = PyString::new(py, "30s"); + let d: PyDuration = s.extract().unwrap(); + assert_eq!(d.0, Duration::from_secs(30)); + + let s = PyString::new(py, "5m"); + let d: PyDuration = s.extract().unwrap(); + assert_eq!(d.0, Duration::from_mins(5)); + + let s = PyString::new(py, "1h"); + let d: PyDuration = s.extract().unwrap(); + assert_eq!(d.0, Duration::from_secs(3600)); + + let s = PyString::new(py, "500ms"); + let d: PyDuration = s.extract().unwrap(); + assert_eq!(d.0, Duration::from_millis(500)); + + let s = PyString::new(py, "1m 30s"); + let d: PyDuration = s.extract().unwrap(); + assert_eq!(d.0, Duration::from_secs(90)); + }); + } + + #[test] + fn test_pyduration_parse_invalid_format() { + pyo3::prepare_freethreaded_python(); + Python::with_gil(|py| { + let s = PyString::new(py, "invalid"); + let result: PyResult = s.extract(); + assert!(result.is_err()); + let err_msg = format!("{}", result.unwrap_err()); + assert!(err_msg.contains("Invalid duration format")); + }); + } + + #[test] + fn test_pyduration_roundtrip() { + pyo3::prepare_freethreaded_python(); + Python::with_gil(|py| { + let original = Duration::from_secs(42); + let py_duration = PyDuration(original); + let py_obj = py_duration.into_pyobject(py).unwrap(); + let back: PyDuration = py_obj.extract().unwrap(); + assert_eq!(back.0, original); + }); + } + + #[test] + fn test_pyencoding_parse_valid_values() { + pyo3::prepare_freethreaded_python(); + Python::with_gil(|py| { + let s = PyString::new(py, "bincode"); + let e: PyEncoding = s.extract().unwrap(); + assert_eq!(e.0, hyperactor::data::Encoding::Bincode); + + let s = PyString::new(py, "serde_json"); + let e: PyEncoding = s.extract().unwrap(); + assert_eq!(e.0, hyperactor::data::Encoding::Json); + + let s = PyString::new(py, "serde_multipart"); + let e: PyEncoding = s.extract().unwrap(); + assert_eq!(e.0, hyperactor::data::Encoding::Multipart); + }); + } + + #[test] + fn test_pyencoding_parse_invalid_value() { + pyo3::prepare_freethreaded_python(); + Python::with_gil(|py| { + let s = PyString::new(py, "invalid_encoding"); + let result: PyResult = s.extract(); + assert!(result.is_err()); + let err_msg = format!("{}", result.unwrap_err()); + assert!(err_msg.contains("Invalid encoding")); + assert!(err_msg.contains("Valid values")); + }); + } + + #[test] + fn test_pyencoding_roundtrip() { + pyo3::prepare_freethreaded_python(); + Python::with_gil(|py| { + let original = hyperactor::data::Encoding::Multipart; + let py_encoding = PyEncoding(original); + let py_obj = py_encoding.into_pyobject(py).unwrap(); + let back: PyEncoding = py_obj.extract().unwrap(); + assert_eq!(back.0, original); + }); + } + + #[test] + fn test_pyportrange_parse_tuple_format() { + pyo3::prepare_freethreaded_python(); + Python::with_gil(|py| { + let tuple = PyTuple::new(py, [8000u16, 9000u16]).unwrap(); + let r: PyPortRange = tuple.extract().unwrap(); + assert_eq!(r.0.start, 8000); + assert_eq!(r.0.end, 9000); + }); + } + + #[test] + fn test_pyportrange_parse_string_format() { + pyo3::prepare_freethreaded_python(); + Python::with_gil(|py| { + let s = PyString::new(py, "8000..9000"); + let r: PyPortRange = s.extract().unwrap(); + assert_eq!(r.0.start, 8000); + assert_eq!(r.0.end, 9000); + }); + } + + #[test] + fn test_pyportrange_reject_invalid_string_format() { + pyo3::prepare_freethreaded_python(); + Python::with_gil(|py| { + // Missing ".." + let s = PyString::new(py, "8000-9000"); + let result: PyResult = s.extract(); + assert!(result.is_err()); + + // Not numbers + let s = PyString::new(py, "abc..def"); + let result: PyResult = s.extract(); + assert!(result.is_err()); + + // Too many parts + let s = PyString::new(py, "8000..9000..10000"); + let result: PyResult = s.extract(); + assert!(result.is_err()); + }); + } + + #[test] + fn test_pyportrange_reject_invalid_ranges() { + pyo3::prepare_freethreaded_python(); + Python::with_gil(|py| { + // start >= end (tuple format) + let tuple = PyTuple::new(py, [9000u16, 8000u16]).unwrap(); + let result: PyResult = tuple.extract(); + assert!(result.is_err()); + let err_msg = format!("{}", result.unwrap_err()); + assert!(err_msg.contains("start must be less than end")); + + // start == end (tuple format) + let tuple = PyTuple::new(py, [8000u16, 8000u16]).unwrap(); + let result: PyResult = tuple.extract(); + assert!(result.is_err()); + + // start >= end (string format) + let s = PyString::new(py, "9000..8000"); + let result: PyResult = s.extract(); + assert!(result.is_err()); + let err_msg = format!("{}", result.unwrap_err()); + assert!(err_msg.contains("start must be less than end")); + }); + } + + #[test] + fn test_pyportrange_roundtrip() { + pyo3::prepare_freethreaded_python(); + Python::with_gil(|py| { + let original = 8000..9000; + let py_range = PyPortRange(original.clone()); + let py_obj = py_range.into_pyobject(py).unwrap(); + let s: String = py_obj.extract().unwrap(); + assert_eq!(s, "8000..9000"); + + // Parse back + let back: PyPortRange = py_obj.extract().unwrap(); + assert_eq!(back.0, original); + }); + } + + #[test] + fn test_pyportrange_accepts_both_formats() { + pyo3::prepare_freethreaded_python(); + Python::with_gil(|py| { + // Create a dict with both formats + let dict = PyDict::new(py); + dict.set_item("tuple_format", (8000u16, 9000u16)).unwrap(); + dict.set_item("string_format", "8000..9000").unwrap(); + + // Both should parse to the same range + let r1: PyPortRange = dict + .get_item("tuple_format") + .unwrap() + .unwrap() + .extract() + .unwrap(); + let r2: PyPortRange = dict + .get_item("string_format") + .unwrap() + .unwrap() + .extract() + .unwrap(); + + assert_eq!(r1.0, r2.0); + assert_eq!(r1.0.start, 8000); + assert_eq!(r1.0.end, 9000); + }); + } +}