Skip to content

Commit c06bfb0

Browse files
shayne-fletchermeta-codesync[bot]
authored andcommitted
bootstrap: fix missing config propagation to v0 proc mesh bootstrap processes (#2127)
Summary: Pull Request resolved: #2127 host mesh processes spawned by `ProcessAllocator` weren't getting client config. this has been harmless so far - child procs spawned by the hosts correctly get their config direct from the client. this fix ensures consistency - host processes now also run with the client's intended config, not just the worker procs. so, this closes a gap that existed but didn't cause problems in practice - good to fix even if we'll soon remove this code path entirely. Reviewed By: mariusae Differential Revision: D88983141 fbshipit-source-id: e1f3bc0df5ee0ad6d295571abe0226951306a8f7
1 parent 28f76e4 commit c06bfb0

File tree

2 files changed

+57
-7
lines changed

2 files changed

+57
-7
lines changed

hyperactor_mesh/src/alloc/process.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -482,6 +482,13 @@ impl ProcessAlloc {
482482
self.created.push(ShortUuid::generate());
483483
let create_key = &self.created[index];
484484

485+
// Capture config and pass to child via Bootstrap::V0ProcMesh
486+
let client_config = hyperactor_config::global::attrs();
487+
let bootstrap = bootstrap::Bootstrap::V0ProcMesh {
488+
config: Some(client_config),
489+
};
490+
bootstrap.to_env(&mut cmd);
491+
485492
cmd.env(
486493
bootstrap::BOOTSTRAP_ADDR_ENV,
487494
self.bootstrap_addr.to_string(),

hyperactor_mesh/src/bootstrap.rs

Lines changed: 50 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -312,7 +312,7 @@ pub async fn host(
312312
/// available to the child. Interpretation and application of that
313313
/// snapshot is up to the child process; if omitted, the child falls
314314
/// back to environment/default values.
315-
#[derive(Clone, Default, Debug, Serialize, Deserialize)]
315+
#[derive(Clone, Debug, Serialize, Deserialize)]
316316
pub enum Bootstrap {
317317
/// Bootstrap as a "v1" proc
318318
Proc {
@@ -350,8 +350,19 @@ pub enum Bootstrap {
350350
},
351351

352352
/// Bootstrap as a legacy "v0" proc.
353-
#[default]
354-
V0ProcMesh, // pass through to the v0 allocator
353+
V0ProcMesh {
354+
/// Optional config snapshot (`hyperactor_config::Attrs`)
355+
/// captured by the parent. If present, the child installs it
356+
/// as the `ClientOverride` layer so the parent's effective config
357+
/// takes precedence over Env/Defaults.
358+
config: Option<Attrs>,
359+
},
360+
}
361+
362+
impl Default for Bootstrap {
363+
fn default() -> Self {
364+
Bootstrap::V0ProcMesh { config: None }
365+
}
355366
}
356367

357368
impl Bootstrap {
@@ -518,7 +529,7 @@ impl Bootstrap {
518529
ok!(host(addr, command, config).await);
519530
halt().await
520531
}
521-
Bootstrap::V0ProcMesh => bootstrap_v0_proc_mesh().await,
532+
Bootstrap::V0ProcMesh { config } => bootstrap_v0_proc_mesh(config).await,
522533
}
523534
}
524535

@@ -2142,7 +2153,19 @@ pub async fn bootstrap() -> anyhow::Error {
21422153
/// - `HYPERACTOR_MESH_BOOTSTRAP_ADDR`: the channel address to which Process2Allocator messages
21432154
/// should be sent.
21442155
/// - `HYPERACTOR_MESH_INDEX`: an index used to identify this process to the allocator.
2145-
async fn bootstrap_v0_proc_mesh() -> anyhow::Error {
2156+
async fn bootstrap_v0_proc_mesh(config: Option<Attrs>) -> anyhow::Error {
2157+
// Apply config before entering the nested scope
2158+
if let Some(attrs) = config {
2159+
hyperactor_config::global::set(hyperactor_config::global::Source::ClientOverride, attrs);
2160+
tracing::debug!("bootstrap: installed ClientOverride config snapshot (V0ProcMesh)");
2161+
} else {
2162+
tracing::debug!("bootstrap: no config snapshot provided (V0ProcMesh)");
2163+
}
2164+
tracing::info!(
2165+
"bootstrap_v0_proc_mesh config:\n{}",
2166+
hyperactor_config::global::attrs()
2167+
);
2168+
21462169
pub async fn go() -> Result<(), anyhow::Error> {
21472170
let procs = Arc::new(tokio::sync::Mutex::new(Vec::<Proc>::new()));
21482171
let procs_for_cleanup = procs.clone();
@@ -2435,7 +2458,10 @@ mod tests {
24352458
// Sanity: the decoded variant is what we expect.
24362459
match (&value, &round) {
24372460
(Bootstrap::Proc { config: None, .. }, Bootstrap::Proc { config: None, .. }) => {}
2438-
(Bootstrap::V0ProcMesh, Bootstrap::V0ProcMesh) => {}
2461+
(
2462+
Bootstrap::V0ProcMesh { config: None },
2463+
Bootstrap::V0ProcMesh { config: None },
2464+
) => {}
24392465
_ => panic!("decoded variant mismatch: got {:?}", round),
24402466
}
24412467
}
@@ -2470,7 +2496,7 @@ mod tests {
24702496
}
24712497

24722498
#[test]
2473-
fn test_bootstrap_env_roundtrip_with_config_proc_and_host() {
2499+
fn test_bootstrap_config_snapshot_roundtrip() {
24742500
// Build a small, distinctive Attrs snapshot.
24752501
let mut attrs = Attrs::new();
24762502
attrs[MESH_TAIL_LOG_LINES] = 123;
@@ -2517,6 +2543,23 @@ mod tests {
25172543
other => panic!("unexpected variant after roundtrip: {:?}", other),
25182544
}
25192545
}
2546+
2547+
// V0ProcMesh case
2548+
{
2549+
let original = Bootstrap::V0ProcMesh {
2550+
config: Some(attrs.clone()),
2551+
};
2552+
let env_str = original.to_env_safe_string().expect("encode bootstrap");
2553+
let decoded = Bootstrap::from_env_safe_string(&env_str).expect("decode bootstrap");
2554+
match &decoded {
2555+
Bootstrap::V0ProcMesh { config } => {
2556+
let cfg = config.as_ref().expect("expected Some(attrs)");
2557+
assert_eq!(cfg[MESH_TAIL_LOG_LINES], 123);
2558+
assert!(!cfg[MESH_BOOTSTRAP_ENABLE_PDEATHSIG]);
2559+
}
2560+
other => panic!("unexpected variant after roundtrip: {:?}", other),
2561+
}
2562+
}
25202563
}
25212564

25222565
#[tokio::test]

0 commit comments

Comments
 (0)