From 770acf01f85d21f3f58d0f8a3a8a17f8272e0722 Mon Sep 17 00:00:00 2001 From: David Chu Date: Sat, 20 Sep 2025 00:39:53 +0000 Subject: [PATCH 01/12] Bug fix part 2 --- hydro_optimize/src/parse_results.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hydro_optimize/src/parse_results.rs b/hydro_optimize/src/parse_results.rs index 2414104..2b0e557 100644 --- a/hydro_optimize/src/parse_results.rs +++ b/hydro_optimize/src/parse_results.rs @@ -316,7 +316,7 @@ pub fn get_or_append_run_metadata( multi_run_metadata: &mut MultiRunMetadata, iteration: usize, ) -> &mut RunMetadata { - if multi_run_metadata.len() <= iteration { + while multi_run_metadata.len() < iteration + 1 { multi_run_metadata.push(RunMetadata::default()); } multi_run_metadata.get_mut(iteration).unwrap() From e61aee6ade44bb1cc41564ba82a90f8f49fd41a9 Mon Sep 17 00:00:00 2001 From: David Chu Date: Sat, 20 Sep 2025 00:44:20 +0000 Subject: [PATCH 02/12] Migrate toy_examples from main repo --- .gitignore | 4 +- Cargo.lock | 5 + hydro_optimize_examples/Cargo.toml | 10 +- .../examples/simple_graphs.rs | 130 +++ hydro_optimize_examples/src/lib.rs | 7 + hydro_optimize_examples/src/lobsters.rs | 165 +++ hydro_optimize_examples/src/lock_server.rs | 80 ++ hydro_optimize_examples/src/simple_graphs.rs | 1021 +++++++++++++++++ .../src/simple_graphs_bench.rs | 33 + .../src/simple_kv_bench.rs | 140 +++ hydro_optimize_examples/src/web_submit.rs | 404 +++++++ 11 files changed, 1995 insertions(+), 4 deletions(-) create mode 100644 hydro_optimize_examples/examples/simple_graphs.rs create mode 100644 hydro_optimize_examples/src/lobsters.rs create mode 100644 hydro_optimize_examples/src/lock_server.rs create mode 100644 hydro_optimize_examples/src/simple_graphs.rs create mode 100644 hydro_optimize_examples/src/simple_graphs_bench.rs create mode 100644 hydro_optimize_examples/src/simple_kv_bench.rs create mode 100644 hydro_optimize_examples/src/web_submit.rs diff --git a/.gitignore b/.gitignore index 308a2f7..98d6e30 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,3 @@ target/ -.data.folded -.perf.data \ No newline at end of file +*.data.folded +*.perf.data \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index aa58c21..45112e3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1830,11 +1830,16 @@ version = "0.0.0" dependencies = [ "clap", "ctor 0.2.9", + "dfir_lang", + "hydro_build_utils", "hydro_deploy", "hydro_lang", "hydro_optimize", "hydro_std", "hydro_test", + "regex", + "serde", + "sha2", "stageleft", "stageleft_tool", "tokio", diff --git a/hydro_optimize_examples/Cargo.toml b/hydro_optimize_examples/Cargo.toml index c1da3ad..32792ed 100644 --- a/hydro_optimize_examples/Cargo.toml +++ b/hydro_optimize_examples/Cargo.toml @@ -11,15 +11,21 @@ all-features = true hydro_lang = { git = "https://github.com/hydro-project/hydro.git" } hydro_std = { git = "https://github.com/hydro-project/hydro.git" } hydro_test = { git = "https://github.com/hydro-project/hydro.git" } +serde = { version = "1.0.197", features = ["derive"] } +sha2 = "0.10.9" stageleft = "0.9.7" tokio = { version = "1.29.0", features = ["full"] } [dev-dependencies] ctor = "0.2" clap = { version = "4.4", features = ["derive"] } +dfir_lang = { git = "https://github.com/hydro-project/hydro.git" } +hydro_build_utils = { git = "https://github.com/hydro-project/hydro.git", version = "0.0.1" } hydro_deploy = { git = "https://github.com/hydro-project/hydro.git" } -hydro_lang = { git = "https://github.com/hydro-project/hydro.git", features = ["deploy"] } +hydro_lang = { git = "https://github.com/hydro-project/hydro.git", features = ["deploy", "viz"] } hydro_optimize = { path = "../hydro_optimize" } +regex = "1.11.1" [build-dependencies] -stageleft_tool = "0.9.7" \ No newline at end of file +stageleft_tool = "0.9.7" +hydro_build_utils = { git = "https://github.com/hydro-project/hydro.git", version = "0.0.1" } \ No newline at end of file diff --git a/hydro_optimize_examples/examples/simple_graphs.rs b/hydro_optimize_examples/examples/simple_graphs.rs new file mode 100644 index 0000000..eb62392 --- /dev/null +++ b/hydro_optimize_examples/examples/simple_graphs.rs @@ -0,0 +1,130 @@ +use std::cell::RefCell; +use std::collections::HashMap; +use std::sync::Arc; + +use clap::Parser; +use hydro_deploy::Deployment; +use hydro_deploy::gcp::GcpNetwork; +use hydro_lang::graph::config::GraphConfig; +use hydro_lang::location::Location; +use hydro_lang::prelude::FlowBuilder; +use hydro_optimize::decoupler; +use hydro_optimize::deploy::ReusableHosts; +use hydro_optimize::deploy_and_analyze::deploy_and_analyze; +use hydro_optimize_examples::simple_graphs::{Client, Server, get_graph_function}; +use hydro_optimize_examples::simple_graphs_bench::{Aggregator, simple_graphs_bench}; +use tokio::sync::RwLock; + +#[derive(Parser, Debug)] +#[command(author, version, about, long_about = None)] +struct Args { + #[command(flatten)] + graph: GraphConfig, + + /// Use GCP for deployment (provide project name) + #[arg(long)] + gcp: Option, + + #[arg(long)] + function: String, +} + +#[tokio::main] +async fn main() { + let args = Args::parse(); + + let mut deployment = Deployment::new(); + let (host_arg, project) = if let Some(project) = args.gcp { + ("gcp".to_string(), project) + } else { + ("localhost".to_string(), String::new()) + }; + let network = Arc::new(RwLock::new(GcpNetwork::new(&project, None))); + + let mut builder = FlowBuilder::new(); + let num_clients = 10; + let num_clients_per_node = 1000; + let graph_function = get_graph_function(&args.function); + let server = builder.cluster(); + let clients = builder.cluster(); + let client_aggregator = builder.process(); + + simple_graphs_bench( + num_clients_per_node, + &server, + &clients, + &client_aggregator, + graph_function, + ); + + let mut clusters = vec![ + ( + server.id().raw_id(), + std::any::type_name::().to_string(), + 1, + ), + ( + clients.id().raw_id(), + std::any::type_name::().to_string(), + num_clients, + ), + ]; + let processes = vec![( + client_aggregator.id().raw_id(), + std::any::type_name::().to_string(), + )]; + + // Deploy + let mut reusable_hosts = ReusableHosts { + hosts: HashMap::new(), + host_arg, + project: project.clone(), + network: network.clone(), + }; + + let num_times_to_optimize = 2; + let num_seconds_to_profile = Some(20); + let multi_run_metadata = RefCell::new(vec![]); + + for i in 0..num_times_to_optimize { + let (rewritten_ir_builder, mut ir, mut decoupler, bottleneck_name, bottleneck_num_nodes) = + deploy_and_analyze( + &mut reusable_hosts, + &mut deployment, + builder, + &clusters, + &processes, + vec![ + std::any::type_name::().to_string(), + std::any::type_name::().to_string(), + ], + num_seconds_to_profile, + &multi_run_metadata, + i, + ) + .await; + + // Apply decoupling + let mut decoupled_cluster = None; + builder = rewritten_ir_builder.build_with(|builder| { + let new_cluster = builder.cluster::<()>(); + decoupler.decoupled_location = new_cluster.id().clone(); + decoupler::decouple(&mut ir, &decoupler, &multi_run_metadata, i); + decoupled_cluster = Some(new_cluster); + + ir + }); + if let Some(new_cluster) = decoupled_cluster { + clusters.push(( + new_cluster.id().raw_id(), + format!("{}_decouple_{}", bottleneck_name, i), + bottleneck_num_nodes, + )); + } + } + + let built = builder.finalize(); + + // Generate graphs if requested + _ = built.generate_graph_with_config(&args.graph, None); +} \ No newline at end of file diff --git a/hydro_optimize_examples/src/lib.rs b/hydro_optimize_examples/src/lib.rs index 09bf34a..9617274 100644 --- a/hydro_optimize_examples/src/lib.rs +++ b/hydro_optimize_examples/src/lib.rs @@ -1,5 +1,12 @@ stageleft::stageleft_no_entry_crate!(); +pub mod simple_graphs; +pub mod simple_graphs_bench; +pub mod simple_kv_bench; +pub mod lock_server; +pub mod lobsters; +pub mod web_submit; + #[cfg(test)] mod test_init { #[ctor::ctor] diff --git a/hydro_optimize_examples/src/lobsters.rs b/hydro_optimize_examples/src/lobsters.rs new file mode 100644 index 0000000..7d1546e --- /dev/null +++ b/hydro_optimize_examples/src/lobsters.rs @@ -0,0 +1,165 @@ +use std::collections::HashSet; + +use hydro_lang::{ + live_collections::stream::NoOrder, + location::{Location, MemberId}, + nondet::nondet, + prelude::{Process, Stream, Unbounded}, +}; +use sha2::{Digest, Sha256}; +use stageleft::q; +use serde::{Deserialize, Serialize}; +use tokio::time::Instant; + +pub struct Server {} + +#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug)] +pub struct Story { + pub title: String, + pub epoch_time: u128, + pub id: u32, +} + +impl PartialOrd for Story { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.epoch_time.cmp(&other.epoch_time)) + } +} + +/// Implementation of Lobsters, roughly based on API calls exposed here: https://lobste.rs/s/cqnzl5/lobste_rs_access_pattern_statistics_for#c_2op8by +/// We expose the following APIs: +/// - add_user (takes username, returns api_key, should only approve if user is admin but it's tautological so just approve everyone) +/// - get_users (returns usernames) +/// - add_story (takes api_key, title, timestamp, returns story_id) +/// - add_comment (takes api_key, story_id, comment, timestamp, returns comment_id) +/// - upvote_story (takes api_key, story_id) +/// - upvote_comment (takes api_key, comment_id) +/// - get_stories (returns the 20 stories with the latest timestamps) +/// - get_comments (returns the 20 comments with the latest timestamps) +/// - get_story_comments (takes story_id, returns the comments for that story) +/// +/// Any call with an invalid API key (either it does not exist or does not have the privileges required) will not receive a response. +#[expect( + clippy::too_many_arguments, + clippy::type_complexity, + reason = "internal Lobsters code // TODO" +)] +pub fn lobsters<'a, Client>( + server: &Process<'a, Server>, + add_user: Stream<(MemberId, String), Process<'a, Server>, Unbounded, NoOrder>, + get_users: Stream, Process<'a, Server>, Unbounded, NoOrder>, + add_story: Stream< + (MemberId, (String, String, Instant)), + Process<'a, Server>, + Unbounded, + NoOrder, + >, + _add_comment: Stream< + (MemberId, (String, u32, String, Instant)), + Process<'a, Server>, + Unbounded, + NoOrder, + >, + _upvote_story: Stream< + (MemberId, (String, u32)), + Process<'a, Server>, + Unbounded, + NoOrder, + >, + _upvote_comment: Stream< + (MemberId, (String, u32)), + Process<'a, Server>, + Unbounded, + NoOrder, + >, + _get_stories: Stream, Process<'a, Server>, Unbounded, NoOrder>, + _get_comments: Stream, Process<'a, Server>, Unbounded, NoOrder>, + _get_story_comments: Stream<(MemberId, u32), Process<'a, Server>, Unbounded, NoOrder>, +) { + let user_auth_tick = server.tick(); + let stories_tick = server.tick(); + + // Add user + let add_user_with_api_key = add_user.map(q!(|(client_id, username)| { + let api_key = self::generate_api_key(username.clone()); + (client_id, (username, api_key)) + })); + let users_this_tick_with_api_key = add_user_with_api_key.batch( + &user_auth_tick, + nondet!(/** Snapshot current users to approve/deny access */), + ); + // Persisted users + let curr_users = users_this_tick_with_api_key + .clone() + .map(q!(|(_client_id, (username, api_key))| (api_key, username))) + .persist(); + let curr_users_hashset = curr_users.clone().fold_commutative_idempotent( + q!(|| HashSet::new()), + q!(|set, (_api_key, username)| { + set.insert(username); + }), + ); + // Send response back to client. Only done after the tick to ensure that once the client gets the response, the user has been added + let _add_user_response = + users_this_tick_with_api_key + .all_ticks() + .map(q!(|(client_id, (_api_key, _username))| (client_id, ()))); + + // Get users + let _get_users_response = get_users + .batch( + &user_auth_tick, + nondet!(/** Snapshot against current users */), + ) + .cross_singleton(curr_users_hashset) + .all_ticks(); + + // Add story + let add_story_pre_join = add_story.map(q!(|(client_id, (api_key, title, timestamp))| { + (api_key, (client_id, title, timestamp)) + })); + let stories = add_story_pre_join + .batch( + &user_auth_tick, + nondet!(/** Compare against current users to approve/deny access */), + ) + .join(curr_users.clone()) + .all_ticks(); + let curr_stories = stories.batch(&stories_tick, nondet!(/** Snapshot of current stories */)).assume_ordering(nondet!(/** In order to use enumerate to assign a unique ID, we need total ordering. */)); + // Assign each story a unique ID + let (story_id_complete_cycle, story_id) = + stories_tick.cycle_with_initial(stories_tick.singleton(q!(0))); + let _indexed_curr_stories = curr_stories + .clone() + .enumerate() + .cross_singleton(story_id.clone()) + .map(q!(|((index, story), story_id)| (index + story_id, story))); + let num_curr_stories = curr_stories.clone().count(); + let new_story_id = num_curr_stories + .zip(story_id) + .map(q!(|(num_stories, story_id)| num_stories + story_id)); + story_id_complete_cycle.complete_next_tick(new_story_id); + + let _top_stories = curr_stories.clone().persist().fold_commutative_idempotent( + q!(|| vec![]), + q!( + |vec, (_api_key, ((_client_id, title, timestamp), username))| { + let new_elem = (title, timestamp, username); + // TODO: Use a binary heap + // TODO: Create a struct that is ordered by timestamp + let pos = vec.binary_search(&new_elem).unwrap_or_else(|e| e); + vec.insert(pos, new_elem); + vec.truncate(20); + } + ), + ); +} + +fn generate_api_key(email: String) -> String { + let secret = "There is no secret ingredient"; + let mut hasher = Sha256::new(); + hasher.update(email.as_bytes()); + hasher.update(secret.as_bytes()); + let hash = hasher.finalize(); + format!("{:x}", hash) +} \ No newline at end of file diff --git a/hydro_optimize_examples/src/lock_server.rs b/hydro_optimize_examples/src/lock_server.rs new file mode 100644 index 0000000..90afb66 --- /dev/null +++ b/hydro_optimize_examples/src/lock_server.rs @@ -0,0 +1,80 @@ +use hydro_lang::{ + live_collections::stream::NoOrder, + location::{Location, MemberId}, + nondet::nondet, + prelude::{Process, Stream, Unbounded}, +}; +use stageleft::q; + +pub struct Server {} + +/// Lock server implementation as described in https://dl.acm.org/doi/pdf/10.1145/3341301.3359651, with the difference being that each server can hold multiple locks. +/// Clients send (virt_client_id, server_id, acquire) requesting a lock from the server. +/// +/// If acquire = true, then: +/// - If the server currently holds the lock, it returns (virt_client_id, server_id, true). +/// - Otherwise, it returns (virt_client_id, server_id, false). +/// +/// If acquire = false, then the client wants to release its lock. Return (virt_client_id, server_id, true). +#[expect(clippy::type_complexity, reason = "internal Lock Server code // TODO")] +pub fn lock_server<'a, Client>( + server: &Process<'a, Server>, + payloads: Stream<(MemberId, (u32, u32, bool)), Process<'a, Server>, Unbounded, NoOrder>, +) -> Stream<(MemberId, (u32, u32, bool)), Process<'a, Server>, Unbounded, NoOrder> { + let server_tick = server.tick(); + let keyed_payloads = payloads.map(q!(|(client_id, (virt_client_id, server_id, acquire))| ( + server_id, + (client_id, virt_client_id, acquire) + ))); + + let batched_payloads = keyed_payloads + .batch( + &server_tick, + nondet!(/** Need to check who currently owns the lock */), + ) + .assume_ordering(nondet!(/** First to acquire the lock wins */)); + let lock_state = batched_payloads + .clone() + .persist() + .into_keyed() + .reduce(q!( + |(curr_client_id, curr_virt_client_id, is_held_by_client), + (client_id, virt_client_id, acquire)| { + if acquire { + // If the lock is currently held by the server, give the client the lock + if !*is_held_by_client { + *curr_client_id = client_id; + *curr_virt_client_id = virt_client_id; + *is_held_by_client = true; + } + } else { + // If the client is releasing the lock and it holds it, give the lock back to the server + if *is_held_by_client + && *curr_virt_client_id == virt_client_id + && *curr_client_id == client_id + { + *is_held_by_client = false; + } + } + } + )) + .entries(); + let results = batched_payloads.join(lock_state).all_ticks().map(q!(|( + server_id, + ( + (client_id, virt_client_id, acquire), + (curr_client_id, curr_virt_client_id, is_held_by_client), + ), + )| { + if acquire { + let acquired = is_held_by_client + && curr_client_id == client_id + && curr_virt_client_id == virt_client_id; + (client_id, (virt_client_id, server_id, acquired)) + } else { + // Releasing always succeeds + (client_id, (virt_client_id, server_id, true)) + } + })); + results +} diff --git a/hydro_optimize_examples/src/simple_graphs.rs b/hydro_optimize_examples/src/simple_graphs.rs new file mode 100644 index 0000000..b550336 --- /dev/null +++ b/hydro_optimize_examples/src/simple_graphs.rs @@ -0,0 +1,1021 @@ +use hydro_lang::{ + live_collections::stream::NoOrder, + location::{Location, MemberId}, + nondet::nondet, + prelude::{Cluster, KeyedStream, Unbounded}, +}; +use sha2::{Digest, Sha256}; +use stageleft::q; + +pub struct Client {} +pub struct Server {} + +pub trait GraphFunction<'a>: + Fn( + &Cluster<'a, Server>, + KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder>, +) -> KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder> +{ +} + +impl<'a, F> GraphFunction<'a> for F where + F: Fn( + &Cluster<'a, Server>, + KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder>, + ) + -> KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder> +{ +} + +fn sha256(n: u32) -> u32 { + let mut sha_input = n; + + for _ in 0..n { + let mut sha = Sha256::new(); + sha.update(sha_input.to_be_bytes()); + let sha_output = sha.finalize(); + sha_input = sha_output[0].into(); + } + + sha_input +} + +// Note: H = high load, L = low load + +pub fn get_graph_function<'a>(name: &str) -> impl GraphFunction<'a> { + match name { + "map_h_map_h_map_h" => map_h_map_h_map_h, + "map_h_map_h_map_l" => map_h_map_h_map_l, + "map_h_map_l_map_h" => map_h_map_l_map_h, + "map_l_map_h_map_h" => map_l_map_h_map_h, + "map_h_map_l_map_l" => map_h_map_l_map_l, + "map_l_map_h_map_l" => map_l_map_h_map_l, + "map_l_map_l_map_h" => map_l_map_l_map_h, + "map_l_map_l_map_l" => map_l_map_l_map_l, + "map_l_first_map_l_second_union" => map_l_first_map_l_second_union, + "map_l_first_map_h_second_union" => map_l_first_map_h_second_union, + "map_h_first_map_l_second_union" => map_h_first_map_l_second_union, + "map_h_first_map_h_second_union" => map_h_first_map_h_second_union, + "map_l_map_l_first_payload_second_union" => map_l_map_l_first_payload_second_union, + "map_l_map_h_first_payload_second_union" => map_l_map_h_first_payload_second_union, + "map_h_map_l_first_payload_second_union" => map_h_map_l_first_payload_second_union, + "map_h_map_h_first_payload_second_union" => map_h_map_h_first_payload_second_union, + "map_l_first_payload_second_union_map_l" => map_l_first_payload_second_union_map_l, + "map_l_first_payload_second_union_map_h" => map_l_first_payload_second_union_map_h, + "map_h_first_payload_second_union_map_l" => map_h_first_payload_second_union_map_l, + "map_h_first_payload_second_union_map_h" => map_h_first_payload_second_union_map_h, + "map_l_first_map_l_second_anti_join" => map_l_first_map_l_second_anti_join, + "map_l_first_map_h_second_anti_join" => map_l_first_map_h_second_anti_join, + "map_h_first_map_l_second_anti_join" => map_h_first_map_l_second_anti_join, + "map_h_first_map_h_second_anti_join" => map_h_first_map_h_second_anti_join, + "map_l_map_l_first_payload_second_anti_join" => map_l_map_l_first_payload_second_anti_join, + "map_l_map_h_first_payload_second_anti_join" => map_l_map_h_first_payload_second_anti_join, + "map_h_map_l_first_payload_second_anti_join" => map_h_map_l_first_payload_second_anti_join, + "map_h_map_h_first_payload_second_anti_join" => map_h_map_h_first_payload_second_anti_join, + "map_l_first_payload_second_anti_join_map_l" => map_l_first_payload_second_anti_join_map_l, + "map_l_first_payload_second_anti_join_map_h" => map_l_first_payload_second_anti_join_map_h, + "map_h_first_payload_second_anti_join_map_l" => map_h_first_payload_second_anti_join_map_l, + "map_h_first_payload_second_anti_join_map_h" => map_h_first_payload_second_anti_join_map_h, + _ => unimplemented!(), + } +} + +pub fn map_h_map_h_map_h<'a>( + _server: &Cluster<'a, Server>, + payloads: KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder>, +) -> KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder> { + payloads + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(100 + n % 2) + ))) + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(100 + n % 2) + ))) + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(100 + n % 2) + ))) +} + +pub fn map_h_map_h_map_l<'a>( + _server: &Cluster<'a, Server>, + payloads: KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder>, +) -> KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder> { + payloads + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(100 + n % 2) + ))) + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(100 + n % 2) + ))) + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(n % 2 + 1) + ))) +} + +pub fn map_h_map_l_map_h<'a>( + _server: &Cluster<'a, Server>, + payloads: KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder>, +) -> KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder> { + payloads + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(100 + n % 2) + ))) + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(n % 2 + 1) + ))) + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(100 + n % 2) + ))) +} + +pub fn map_l_map_h_map_h<'a>( + _server: &Cluster<'a, Server>, + payloads: KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder>, +) -> KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder> { + payloads + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(n % 2 + 1) + ))) + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(100 + n % 2) + ))) + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(100 + n % 2) + ))) +} + +pub fn map_h_map_l_map_l<'a>( + _server: &Cluster<'a, Server>, + payloads: KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder>, +) -> KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder> { + payloads + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(100 + n % 2) + ))) + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(n % 2 + 1) + ))) + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(n % 2 + 1) + ))) +} + +pub fn map_l_map_h_map_l<'a>( + _server: &Cluster<'a, Server>, + payloads: KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder>, +) -> KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder> { + payloads + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(n % 2 + 1) + ))) + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(100 + n % 2) + ))) + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(n % 2 + 1) + ))) +} + +pub fn map_l_map_l_map_h<'a>( + _server: &Cluster<'a, Server>, + payloads: KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder>, +) -> KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder> { + payloads + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(n % 2 + 1) + ))) + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(n % 2 + 1) + ))) + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(100 + n % 2) + ))) +} + +pub fn map_l_map_l_map_l<'a>( + _server: &Cluster<'a, Server>, + payloads: KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder>, +) -> KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder> { + payloads + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(n % 2 + 1) + ))) + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(n % 2 + 1) + ))) + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(n % 2 + 1) + ))) +} + +pub fn map_l_first_map_l_second_union<'a>( + _server: &Cluster<'a, Server>, + payloads: KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder>, +) -> KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder> { + let map_l1 = payloads + .clone() + .map(q!(|(_virt_client_id, n)| (None, self::sha256(n % 2 + 1)))); + let map_l2 = payloads.map(q!(|(virt_client_id, n)| ( + Some(virt_client_id), + self::sha256(n % 2 + 1) + ))); + map_l1 + .interleave(map_l2) + .filter_map(q!(|(virt_client_id_opt, n)| { + // Since we cloned payloads, delete half the payloads so 1 input = 1 output + if let Some(virt_client_id) = virt_client_id_opt { + Some((virt_client_id, n)) + } else { + None + } + })) +} + +pub fn map_l_first_map_h_second_union<'a>( + _server: &Cluster<'a, Server>, + payloads: KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder>, +) -> KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder> { + let map_l1 = payloads + .clone() + .map(q!(|(_virt_client_id, n)| (None, self::sha256(n % 2 + 1)))); + let map_h2 = payloads.map(q!(|(virt_client_id, n)| ( + Some(virt_client_id), + self::sha256(100 + n % 2) + ))); + map_l1 + .interleave(map_h2) + .filter_map(q!(|(virt_client_id_opt, n)| { + // Since we cloned payloads, delete half the payloads so 1 input = 1 output + if let Some(virt_client_id) = virt_client_id_opt { + Some((virt_client_id, n)) + } else { + None + } + })) +} + +pub fn map_h_first_map_l_second_union<'a>( + _server: &Cluster<'a, Server>, + payloads: KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder>, +) -> KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder> { + let map_h1 = payloads + .clone() + .map(q!(|(_virt_client_id, n)| (None, self::sha256(100 + n % 2)))); + let map_l2 = payloads.map(q!(|(virt_client_id, n)| ( + Some(virt_client_id), + self::sha256(n % 2 + 1) + ))); + map_h1 + .interleave(map_l2) + .filter_map(q!(|(virt_client_id_opt, n)| { + // Since we cloned payloads, delete half the payloads so 1 input = 1 output + if let Some(virt_client_id) = virt_client_id_opt { + Some((virt_client_id, n)) + } else { + None + } + })) +} + +pub fn map_h_first_map_h_second_union<'a>( + _server: &Cluster<'a, Server>, + payloads: KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder>, +) -> KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder> { + let map_h1 = payloads + .clone() + .map(q!(|(_virt_client_id, n)| (None, self::sha256(100 + n % 2)))); + let map_h2 = payloads.map(q!(|(virt_client_id, n)| ( + Some(virt_client_id), + self::sha256(100 + n % 2) + ))); + map_h1 + .interleave(map_h2) + .filter_map(q!(|(virt_client_id_opt, n)| { + // Since we cloned payloads, delete half the payloads so 1 input = 1 output + if let Some(virt_client_id) = virt_client_id_opt { + Some((virt_client_id, n)) + } else { + None + } + })) +} + +pub fn map_l_map_l_first_payload_second_union<'a>( + _server: &Cluster<'a, Server>, + payloads: KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder>, +) -> KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder> { + payloads + .clone() + .map(q!(|(_virt_client_id, n)| ( + None::, + self::sha256(n % 2 + 1) + ))) + .map(q!(|(_virt_client_id, n)| (None, self::sha256(n % 2 + 1)))) + .interleave(payloads.map(q!(|(virt_client_id, n)| (Some(virt_client_id), n)))) + .filter_map(q!(|(virt_client_id_opt, n)| { + // Since we cloned payloads, delete half the payloads so 1 input = 1 output + if let Some(virt_client_id) = virt_client_id_opt { + Some((virt_client_id, n)) + } else { + None + } + })) +} + +pub fn map_l_map_h_first_payload_second_union<'a>( + _server: &Cluster<'a, Server>, + payloads: KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder>, +) -> KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder> { + payloads + .clone() + .map(q!(|(_virt_client_id, n)| ( + None::, + self::sha256(n % 2 + 1) + ))) + .map(q!(|(_virt_client_id, n)| (None, self::sha256(100 + n % 2)))) + .interleave(payloads.map(q!(|(virt_client_id, n)| (Some(virt_client_id), n)))) + .filter_map(q!(|(virt_client_id_opt, n)| { + // Since we cloned payloads, delete half the payloads so 1 input = 1 output + if let Some(virt_client_id) = virt_client_id_opt { + Some((virt_client_id, n)) + } else { + None + } + })) +} + +pub fn map_h_map_l_first_payload_second_union<'a>( + _server: &Cluster<'a, Server>, + payloads: KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder>, +) -> KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder> { + payloads + .clone() + .map(q!(|(_virt_client_id, n)| ( + None::, + self::sha256(100 + n % 2) + ))) + .map(q!(|(_virt_client_id, n)| (None, self::sha256(n % 2 + 1)))) + .interleave(payloads.map(q!(|(virt_client_id, n)| (Some(virt_client_id), n)))) + .filter_map(q!(|(virt_client_id_opt, n)| { + // Since we cloned payloads, delete half the payloads so 1 input = 1 output + if let Some(virt_client_id) = virt_client_id_opt { + Some((virt_client_id, n)) + } else { + None + } + })) +} + +pub fn map_h_map_h_first_payload_second_union<'a>( + _server: &Cluster<'a, Server>, + payloads: KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder>, +) -> KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder> { + payloads + .clone() + .map(q!(|(_virt_client_id, n)| ( + None::, + self::sha256(100 + n % 2) + ))) + .map(q!(|(_virt_client_id, n)| (None, self::sha256(100 + n % 2)))) + .interleave(payloads.map(q!(|(virt_client_id, n)| (Some(virt_client_id), n)))) + .filter_map(q!(|(virt_client_id_opt, n)| { + // Since we cloned payloads, delete half the payloads so 1 input = 1 output + if let Some(virt_client_id) = virt_client_id_opt { + Some((virt_client_id, n)) + } else { + None + } + })) +} + +pub fn map_l_first_payload_second_union_map_l<'a>( + _server: &Cluster<'a, Server>, + payloads: KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder>, +) -> KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder> { + payloads + .clone() + .map(q!(|(_virt_client_id, n)| ( + None::, + self::sha256(n % 2 + 1) + ))) + .interleave(payloads.map(q!(|(virt_client_id, n)| (Some(virt_client_id), n)))) + .filter_map(q!(|(virt_client_id_opt, n)| { + let sha = self::sha256(n % 2 + 1); + // Since we cloned payloads, delete half the payloads so 1 input = 1 output + if let Some(virt_client_id) = virt_client_id_opt { + Some((virt_client_id, sha)) + } else { + None + } + })) +} + +pub fn map_l_first_payload_second_union_map_h<'a>( + _server: &Cluster<'a, Server>, + payloads: KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder>, +) -> KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder> { + payloads + .clone() + .map(q!(|(_virt_client_id, n)| ( + None::, + self::sha256(n % 2 + 1) + ))) + .interleave(payloads.map(q!(|(virt_client_id, n)| (Some(virt_client_id), n)))) + .filter_map(q!(|(virt_client_id_opt, n)| { + let sha = self::sha256(100 + n % 2); + // Since we cloned payloads, delete half the payloads so 1 input = 1 output + if let Some(virt_client_id) = virt_client_id_opt { + Some((virt_client_id, sha)) + } else { + None + } + })) +} + +pub fn map_h_first_payload_second_union_map_l<'a>( + _server: &Cluster<'a, Server>, + payloads: KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder>, +) -> KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder> { + payloads + .clone() + .map(q!(|(_virt_client_id, n)| ( + None::, + self::sha256(100 + n % 2) + ))) + .interleave(payloads.map(q!(|(virt_client_id, n)| (Some(virt_client_id), n)))) + .filter_map(q!(|(virt_client_id_opt, n)| { + let sha = self::sha256(n % 2 + 1); + // Since we cloned payloads, delete half the payloads so 1 input = 1 output + if let Some(virt_client_id) = virt_client_id_opt { + Some((virt_client_id, sha)) + } else { + None + } + })) +} + +pub fn map_h_first_payload_second_union_map_h<'a>( + _server: &Cluster<'a, Server>, + payloads: KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder>, +) -> KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder> { + payloads + .clone() + .map(q!(|(_virt_client_id, n)| ( + None::, + self::sha256(100 + n % 2) + ))) + .interleave(payloads.map(q!(|(virt_client_id, n)| (Some(virt_client_id), n)))) + .filter_map(q!(|(virt_client_id_opt, n)| { + let sha = self::sha256(100 + n % 2); + // Since we cloned payloads, delete half the payloads so 1 input = 1 output + if let Some(virt_client_id) = virt_client_id_opt { + Some((virt_client_id, sha)) + } else { + None + } + })) +} + +pub fn map_l_first_map_l_second_anti_join<'a>( + server: &Cluster<'a, Server>, + payloads: KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder>, +) -> KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder> { + let tick = server.tick(); + let nondet = nondet!(/** Test */); + + let true_payloads = payloads + .clone() + .entries() + .map(q!(|(client_id, (virt_client_id, n))| ( + client_id, + virt_client_id, + true, + n + ))); + let map_l1 = payloads + .clone() + .entries() + .map(q!(|(client_id, (virt_client_id, n))| ( + client_id, + virt_client_id, + false, + self::sha256(n % 2 + 1) + ))) + .interleave(true_payloads) // The actual payloads that will pass the anti_join + .batch(&tick, nondet); + let map_l2 = payloads + .entries() + .map(q!(|(client_id, (virt_client_id, n))| ( + client_id, + virt_client_id, + false, + self::sha256(n % 2 + 1) + ))) + .batch(&tick, nondet); + map_l1 + .filter_not_in(map_l2) + .all_ticks() + .filter_map(q!(|(client_id, virt_client_id, keep, n)| { + if keep { + Some((client_id, (virt_client_id, n))) + } else { + None + } + })) + .into_keyed() +} + +pub fn map_l_first_map_h_second_anti_join<'a>( + server: &Cluster<'a, Server>, + payloads: KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder>, +) -> KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder> { + let tick = server.tick(); + let nondet = nondet!(/** Test */); + + let true_payloads = payloads + .clone() + .entries() + .map(q!(|(client_id, (virt_client_id, n))| ( + client_id, + virt_client_id, + true, + n + ))); + let map_l1 = payloads + .clone() + .entries() + .map(q!(|(client_id, (virt_client_id, n))| ( + client_id, + virt_client_id, + false, + self::sha256(n % 2 + 1) + ))) + .interleave(true_payloads) // The actual payloads that will pass the anti_join + .batch(&tick, nondet); + let map_h2 = payloads + .entries() + .map(q!(|(client_id, (virt_client_id, n))| ( + client_id, + virt_client_id, + false, + self::sha256(100 + n % 2) + ))) + .batch(&tick, nondet); + map_l1 + .filter_not_in(map_h2) + .all_ticks() + .filter_map(q!(|(client_id, virt_client_id, keep, n)| { + if keep { + Some((client_id, (virt_client_id, n))) + } else { + None + } + })) + .into_keyed() +} + +pub fn map_h_first_map_l_second_anti_join<'a>( + server: &Cluster<'a, Server>, + payloads: KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder>, +) -> KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder> { + let tick = server.tick(); + let nondet = nondet!(/** Test */); + + let true_payloads = payloads + .clone() + .entries() + .map(q!(|(client_id, (virt_client_id, n))| ( + client_id, + virt_client_id, + true, + n + ))); + let map_h1 = payloads + .clone() + .entries() + .map(q!(|(client_id, (virt_client_id, n))| ( + client_id, + virt_client_id, + false, + self::sha256(100 + n % 2) + ))) + .interleave(true_payloads) // The actual payloads that will pass the anti_join + .batch(&tick, nondet); + let map_l2 = payloads + .entries() + .map(q!(|(client_id, (virt_client_id, n))| ( + client_id, + virt_client_id, + false, + self::sha256(n % 2 + 1) + ))) + .batch(&tick, nondet); + map_h1 + .filter_not_in(map_l2) + .all_ticks() + .filter_map(q!(|(client_id, virt_client_id, keep, n)| { + if keep { + Some((client_id, (virt_client_id, n))) + } else { + None + } + })) + .into_keyed() +} + +pub fn map_h_first_map_h_second_anti_join<'a>( + server: &Cluster<'a, Server>, + payloads: KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder>, +) -> KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder> { + let tick = server.tick(); + let nondet = nondet!(/** Test */); + + let true_payloads = payloads + .clone() + .entries() + .map(q!(|(client_id, (virt_client_id, n))| ( + client_id, + virt_client_id, + true, + n + ))); + let map_h1 = payloads + .clone() + .entries() + .map(q!(|(client_id, (virt_client_id, n))| ( + client_id, + virt_client_id, + false, + self::sha256(100 + n % 2) + ))) + .interleave(true_payloads) // The actual payloads that will pass the anti_join + .batch(&tick, nondet); + let map_h2 = payloads + .entries() + .map(q!(|(client_id, (virt_client_id, n))| ( + client_id, + virt_client_id, + false, + self::sha256(100 + n % 2) + ))) + .batch(&tick, nondet); + map_h1 + .filter_not_in(map_h2) + .all_ticks() + .filter_map(q!(|(client_id, virt_client_id, keep, n)| { + if keep { + Some((client_id, (virt_client_id, n))) + } else { + None + } + })) + .into_keyed() +} + +pub fn map_l_map_l_first_payload_second_anti_join<'a>( + server: &Cluster<'a, Server>, + payloads: KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder>, +) -> KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder> { + let tick = server.tick(); + let nondet = nondet!(/** Test */); + + let false_payloads = payloads + .entries() + .map(q!(|(client_id, (virt_client_id, n))| ( + client_id, + virt_client_id, + n, + false + ))); + false_payloads + .clone() + .map(q!(|(client_id, virt_client_id, n, _keep)| ( + client_id, + virt_client_id, + self::sha256(n % 2 + 1), + true + ))) + .map(q!(|(client_id, virt_client_id, n, _keep)| ( + client_id, + virt_client_id, + self::sha256(n % 2 + 1), + true + ))) + .interleave(false_payloads.clone()) + .batch(&tick, nondet) + .filter_not_in(false_payloads.batch(&tick, nondet)) + .all_ticks() + .filter_map(q!(|(client_id, virt_client_id, n, keep)| { + if keep { + Some((client_id, (virt_client_id, n))) + } else { + None + } + })) + .into_keyed() +} + +pub fn map_l_map_h_first_payload_second_anti_join<'a>( + server: &Cluster<'a, Server>, + payloads: KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder>, +) -> KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder> { + let tick = server.tick(); + let nondet = nondet!(/** Test */); + + let false_payloads = payloads + .entries() + .map(q!(|(client_id, (virt_client_id, n))| ( + client_id, + virt_client_id, + n, + false + ))); + false_payloads + .clone() + .map(q!(|(client_id, virt_client_id, n, _keep)| ( + client_id, + virt_client_id, + self::sha256(n % 2 + 1), + true + ))) + .map(q!(|(client_id, virt_client_id, n, _keep)| ( + client_id, + virt_client_id, + self::sha256(100 + n % 2), + true + ))) + .interleave(false_payloads.clone()) + .batch(&tick, nondet) + .filter_not_in(false_payloads.batch(&tick, nondet)) + .all_ticks() + .filter_map(q!(|(client_id, virt_client_id, n, keep)| { + if keep { + Some((client_id, (virt_client_id, n))) + } else { + None + } + })) + .into_keyed() +} + +pub fn map_h_map_l_first_payload_second_anti_join<'a>( + server: &Cluster<'a, Server>, + payloads: KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder>, +) -> KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder> { + let tick = server.tick(); + let nondet = nondet!(/** Test */); + + let false_payloads = payloads + .entries() + .map(q!(|(client_id, (virt_client_id, n))| ( + client_id, + virt_client_id, + n, + false + ))); + false_payloads + .clone() + .map(q!(|(client_id, virt_client_id, n, _keep)| ( + client_id, + virt_client_id, + self::sha256(100 + n % 2), + true + ))) + .map(q!(|(client_id, virt_client_id, n, _keep)| ( + client_id, + virt_client_id, + self::sha256(n % 2 + 1), + true + ))) + .interleave(false_payloads.clone()) + .batch(&tick, nondet) + .filter_not_in(false_payloads.batch(&tick, nondet)) + .all_ticks() + .filter_map(q!(|(client_id, virt_client_id, n, keep)| { + if keep { + Some((client_id, (virt_client_id, n))) + } else { + None + } + })) + .into_keyed() +} + +pub fn map_h_map_h_first_payload_second_anti_join<'a>( + server: &Cluster<'a, Server>, + payloads: KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder>, +) -> KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder> { + let tick = server.tick(); + let nondet = nondet!(/** Test */); + + let false_payloads = payloads + .entries() + .map(q!(|(client_id, (virt_client_id, n))| ( + client_id, + virt_client_id, + n, + false + ))); + false_payloads + .clone() + .map(q!(|(client_id, virt_client_id, n, _keep)| ( + client_id, + virt_client_id, + self::sha256(100 + n % 2), + true + ))) + .map(q!(|(client_id, virt_client_id, n, _keep)| ( + client_id, + virt_client_id, + self::sha256(100 + n % 2), + true + ))) + .interleave(false_payloads.clone()) + .batch(&tick, nondet) + .filter_not_in(false_payloads.batch(&tick, nondet)) + .all_ticks() + .filter_map(q!(|(client_id, virt_client_id, n, keep)| { + if keep { + Some((client_id, (virt_client_id, n))) + } else { + None + } + })) + .into_keyed() +} + +pub fn map_l_first_payload_second_anti_join_map_l<'a>( + server: &Cluster<'a, Server>, + payloads: KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder>, +) -> KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder> { + let tick = server.tick(); + let nondet = nondet!(/** Test */); + + let false_payloads = payloads + .entries() + .map(q!(|(client_id, (virt_client_id, n))| ( + client_id, + virt_client_id, + n, + false + ))); + + false_payloads + .clone() + .map(q!(|(client, virt_client_id, n, _keep)| ( + client, + virt_client_id, + self::sha256(n % 2 + 1), + true + ))) + .interleave(false_payloads.clone()) + .batch(&tick, nondet) + .filter_not_in(false_payloads.clone().batch(&tick, nondet)) + .all_ticks() + .filter_map(q!(|(client, virt_client_id, n, keep)| { + if keep { + Some((client, (virt_client_id, self::sha256(n % 2 + 1)))) + } else { + None + } + })) + .into_keyed() +} + +pub fn map_l_first_payload_second_anti_join_map_h<'a>( + server: &Cluster<'a, Server>, + payloads: KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder>, +) -> KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder> { + let tick = server.tick(); + let nondet = nondet!(/** Test */); + + let false_payloads = payloads + .entries() + .map(q!(|(client_id, (virt_client_id, n))| ( + client_id, + virt_client_id, + n, + false + ))); + + false_payloads + .clone() + .map(q!(|(client, virt_client_id, n, _keep)| ( + client, + virt_client_id, + self::sha256(n % 2 + 1), + true + ))) + .interleave(false_payloads.clone()) + .batch(&tick, nondet) + .filter_not_in(false_payloads.clone().batch(&tick, nondet)) + .all_ticks() + .filter_map(q!(|(client, virt_client_id, n, keep)| { + if keep { + Some((client, (virt_client_id, self::sha256(100 + n % 2)))) + } else { + None + } + })) + .into_keyed() +} + +pub fn map_h_first_payload_second_anti_join_map_l<'a>( + server: &Cluster<'a, Server>, + payloads: KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder>, +) -> KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder> { + let tick = server.tick(); + let nondet = nondet!(/** Test */); + + let false_payloads = payloads + .entries() + .map(q!(|(client_id, (virt_client_id, n))| ( + client_id, + virt_client_id, + n, + false + ))); + + false_payloads + .clone() + .map(q!(|(client, virt_client_id, n, _keep)| ( + client, + virt_client_id, + self::sha256(100 + n % 2), + true + ))) + .interleave(false_payloads.clone()) + .batch(&tick, nondet) + .filter_not_in(false_payloads.clone().batch(&tick, nondet)) + .all_ticks() + .filter_map(q!(|(client, virt_client_id, n, keep)| { + if keep { + Some((client, (virt_client_id, self::sha256(n % 2 + 1)))) + } else { + None + } + })) + .into_keyed() +} + +pub fn map_h_first_payload_second_anti_join_map_h<'a>( + server: &Cluster<'a, Server>, + payloads: KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder>, +) -> KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder> { + let tick = server.tick(); + let nondet = nondet!(/** Test */); + + let false_payloads = payloads + .entries() + .map(q!(|(client_id, (virt_client_id, n))| ( + client_id, + virt_client_id, + n, + false + ))); + + false_payloads + .clone() + .map(q!(|(client, virt_client_id, n, _keep)| ( + client, + virt_client_id, + self::sha256(100 + n % 2), + true + ))) + .interleave(false_payloads.clone()) + .batch(&tick, nondet) + .filter_not_in(false_payloads.clone().batch(&tick, nondet)) + .all_ticks() + .filter_map(q!(|(client, virt_client_id, n, keep)| { + if keep { + Some((client, (virt_client_id, self::sha256(100 + n % 2)))) + } else { + None + } + })) + .into_keyed() +} diff --git a/hydro_optimize_examples/src/simple_graphs_bench.rs b/hydro_optimize_examples/src/simple_graphs_bench.rs new file mode 100644 index 0000000..84bb2e8 --- /dev/null +++ b/hydro_optimize_examples/src/simple_graphs_bench.rs @@ -0,0 +1,33 @@ +use hydro_lang::{prelude::{Cluster, Process}, nondet::nondet}; +use hydro_std::bench_client::{bench_client, print_bench_results}; + +use hydro_test::cluster::paxos_bench::inc_u32_workload_generator; +use crate::simple_graphs::{Client, GraphFunction, Server}; +pub struct Aggregator; + +pub fn simple_graphs_bench<'a>( + num_clients_per_node: usize, + server: &Cluster<'a, Server>, + clients: &Cluster<'a, Client>, + client_aggregator: &Process<'a, Aggregator>, + graph: impl GraphFunction<'a>, +) { + let bench_results = bench_client( + clients, + inc_u32_workload_generator, + |payloads| { + graph( + server, + payloads + .broadcast_bincode(server, nondet!(/** Test */)) + .into(), + ) + .demux_bincode(clients) + .values() + }, + num_clients_per_node, + nondet!(/** bench */), + ); + + print_bench_results(bench_results, client_aggregator, clients); +} \ No newline at end of file diff --git a/hydro_optimize_examples/src/simple_kv_bench.rs b/hydro_optimize_examples/src/simple_kv_bench.rs new file mode 100644 index 0000000..89aaa1b --- /dev/null +++ b/hydro_optimize_examples/src/simple_kv_bench.rs @@ -0,0 +1,140 @@ +use hydro_lang::{ + location::Location, + nondet::nondet, + prelude::{Cluster, Process}, +}; +use hydro_std::bench_client::{bench_client, print_bench_results}; + +use hydro_test::cluster::paxos_bench::inc_u32_workload_generator; +use stageleft::q; + +pub struct Kv; +pub struct Client; +pub struct Aggregator; + +pub fn simple_kv_bench<'a>( + num_clients_per_node: usize, + kv: &Process<'a, Kv>, + clients: &Cluster<'a, Client>, + client_aggregator: &Process<'a, Aggregator>, +) { + let bench_results = bench_client( + clients, + inc_u32_workload_generator, + |payloads| { + let k_tick = kv.tick(); + let k_payloads = payloads.send_bincode(kv).batch(&k_tick, nondet!(/** TODO: Actually can use atomic() here, but there's no way to exit atomic in KeyedStreams? */)); + + // Insert each payload into the KV store + k_payloads + .clone() + .values() + .assume_ordering(nondet!(/** Last writer wins. TODO: Technically, we only need to assume ordering over the keyed stream (ordering of values with different keys doesn't matter. But there's no .persist() for KeyedStreams) */)) + .persist() + .into_keyed() + .reduce(q!(|prev, new| { + *prev = new; + })) + .entries() + .all_ticks() + .for_each(q!(|_| {})); // Do nothing, just need to end on a HydroLeaf + + // Send committed requests back to the original client + k_payloads.all_ticks().demux_bincode(clients).into() + }, + num_clients_per_node, + nondet!(/** bench */), + ); + + print_bench_results(bench_results, client_aggregator, clients); +} + +#[cfg(test)] +mod tests { + use dfir_lang::graph::WriteConfig; + use hydro_build_utils::insta; + use hydro_deploy::Deployment; + use hydro_lang::{ + compile::ir::dbg_dedup_tee, + deploy::{DeployCrateWrapper, HydroDeploy, TrybuildHost}, + prelude::FlowBuilder, + }; + use std::str::FromStr; + + use regex::Regex; + + #[cfg(stageleft_runtime)] + use crate::simple_kv_bench::simple_kv_bench; + + #[test] + fn simple_kv_ir() { + let builder = FlowBuilder::new(); + let kv = builder.process(); + let clients = builder.cluster(); + let client_aggregator = builder.process(); + + simple_kv_bench(1, &kv, &clients, &client_aggregator); + let built = builder.with_default_optimize::(); + + dbg_dedup_tee(|| { + insta::assert_debug_snapshot!(built.ir()); + }); + + let preview = built.preview_compile(); + insta::with_settings!({snapshot_suffix => "kv_mermaid"}, { + insta::assert_snapshot!( + preview.dfir_for(&kv).to_mermaid(&WriteConfig { + no_subgraphs: true, + no_pull_push: true, + no_handoffs: true, + op_text_no_imports: true, + ..WriteConfig::default() + }) + ); + }); + } + + #[tokio::test] + async fn simple_kv_some_throughput() { + let builder = FlowBuilder::new(); + let kv = builder.process(); + let clients = builder.cluster(); + let client_aggregator = builder.process(); + + simple_kv_bench(1, &kv, &clients, &client_aggregator); + let mut deployment = Deployment::new(); + + let nodes = builder + .with_process(&kv, TrybuildHost::new(deployment.Localhost())) + .with_cluster(&clients, vec![TrybuildHost::new(deployment.Localhost())]) + .with_process( + &client_aggregator, + TrybuildHost::new(deployment.Localhost()), + ) + .deploy(&mut deployment); + + deployment.deploy().await.unwrap(); + + let client_node = &nodes.get_process(&client_aggregator); + let client_out = client_node.stdout_filter("Throughput:").await; + + deployment.start().await.unwrap(); + + let re = Regex::new(r"Throughput: ([^ ]+) - ([^ ]+) - ([^ ]+) requests/s").unwrap(); + let mut found = 0; + let mut client_out = client_out; + while let Some(line) = client_out.recv().await { + if let Some(caps) = re.captures(&line) { + if let Ok(lower) = f64::from_str(&caps[1]) { + if lower > 0.0 { + println!("Found throughput lower-bound: {}", lower); + found += 1; + if found == 2 { + break; + } + } + } + } + } + } +} diff --git a/hydro_optimize_examples/src/web_submit.rs b/hydro_optimize_examples/src/web_submit.rs new file mode 100644 index 0000000..61d216b --- /dev/null +++ b/hydro_optimize_examples/src/web_submit.rs @@ -0,0 +1,404 @@ +use std::collections::{HashMap, HashSet}; + +use hydro_lang::{ + live_collections::stream::NoOrder, + location::{Location, MemberId}, + nondet::nondet, + prelude::{Process, Stream, Unbounded}, +}; +use sha2::{Digest, Sha256}; +use stageleft::q; + +pub struct Server {} + +/// Implementation of WebSubmit https://github.com/ms705/websubmit-rs/tree/master. +/// We expose the following APIs: +/// - add_lecture (takes api_key, lecture_id, lecture, only approves if user is admin) +/// - add_question (takes api_key, question, question_id, lecture_id, only approves if user is admin) +/// - add_user (takes user_email, is_admin, hashes user's email + secret, stores API key in table, emails them the key, should only approve if user is admin but it's tautological so just approve everyone) +/// - get_users (takes api_key, only approves if caller is admin, returns user_id, user_email, user_is_admin) +/// - list_lectures (takes api_key, returns lecture_id, lecture) +/// - list_lecture_questions_all (takes api_key & lecture_id, returns question, question_id, optional answer joining on answer_id = question_id, only approves if user is admin) +/// - list_lecture_questions_user (takes api_key & lecture_id, returns question, question_id, optional answer joining on answer_id = question_id if this user wrote the answer) +/// - add_answer (takes api_key, question_id, answer) +/// +/// Any call with an invalid API key (either it does not exist or does not have the privileges required) will not receive a response. +#[expect( + clippy::too_many_arguments, + clippy::type_complexity, + reason = "internal Web Submit code // TODO" +)] +pub fn web_submit<'a, Client>( + server: &Process<'a, Server>, + add_lecture: Stream< + (MemberId, (String, u32, String)), + Process<'a, Server>, + Unbounded, + NoOrder, + >, + add_question: Stream< + (MemberId, (String, String, u32, u32)), + Process<'a, Server>, + Unbounded, + NoOrder, + >, + add_user: Stream<(MemberId, (String, bool)), Process<'a, Server>, Unbounded, NoOrder>, + get_users: Stream<(MemberId, String), Process<'a, Server>, Unbounded, NoOrder>, + list_lectures: Stream<(MemberId, String), Process<'a, Server>, Unbounded, NoOrder>, + list_lecture_questions_all: Stream< + (MemberId, (String, u32)), + Process<'a, Server>, + Unbounded, + NoOrder, + >, + list_lecture_questions_user: Stream< + (MemberId, (String, u32)), + Process<'a, Server>, + Unbounded, + NoOrder, + >, + add_answer: Stream< + (MemberId, (String, u32, String)), + Process<'a, Server>, + Unbounded, + NoOrder, + >, +) -> ( + Stream<(MemberId, ()), Process<'a, Server>, Unbounded, NoOrder>, + Stream<(MemberId, ()), Process<'a, Server>, Unbounded, NoOrder>, + Stream<(MemberId, ()), Process<'a, Server>, Unbounded, NoOrder>, + Stream<(MemberId, HashMap), Process<'a, Server>, Unbounded, NoOrder>, + Stream<(MemberId, HashMap), Process<'a, Server>, Unbounded, NoOrder>, + Stream< + (MemberId, HashMap)>), + Process<'a, Server>, + Unbounded, + NoOrder, + >, + Stream< + (MemberId, HashMap)>), + Process<'a, Server>, + Unbounded, + NoOrder, + >, + Stream<(MemberId, ()), Process<'a, Server>, Unbounded, NoOrder>, +) { + let user_auth_tick = server.tick(); + let lectures_tick = server.tick(); + let question_answer_tick = server.tick(); + + // Add user + let add_user_with_api_key = add_user.map(q!(|(client_id, (email, is_admin))| { + let api_key = self::generate_api_key(email.clone()); + (client_id, (email, is_admin, api_key)) + })); + let users_this_tick_with_api_key = add_user_with_api_key.batch( + &user_auth_tick, + nondet!(/** Snapshot current users to approve/deny access */), + ); + // Persisted users + let curr_users = users_this_tick_with_api_key + .clone() + .map(q!(|(_client_id, (email, is_admin, api_key))| ( + api_key, + (email, is_admin) + ))) + .persist(); + let curr_users_hashmap = curr_users.clone().fold_commutative_idempotent( + q!(|| HashMap::new()), + q!(|map, (_api_key, (email, is_admin))| { + map.insert(email, is_admin); + }), + ); + // Email the API key. Only done after the tick to ensure that once the client gets the email, the user has been added + users_this_tick_with_api_key + .clone() + .all_ticks() + .for_each(q!(|(_client_id, (email, _is_admin, api_key))| { + self::send_email(api_key, email) + })); + // Send response back to client. Only done after the tick to ensure that once the client gets the response, the user has been added + let add_user_response = + users_this_tick_with_api_key.all_ticks().map(q!(|( + client_id, + (_email, _is_admin, _api_key), + )| (client_id, ()))); + + // Add lecture + let add_lecture_pre_join = + add_lecture.map(q!(|(client_id, (api_key, lecture_id, lecture))| { + (api_key, (client_id, lecture_id, lecture)) + })); + let lectures = add_lecture_pre_join + .batch( + &user_auth_tick, + nondet!(/** Compare against current users to approve/deny access */), + ) + .join(curr_users.clone()) + .all_ticks() + .filter(q!(|( + _api_key, + ((_client_id, _lecture_id, _lecture), (_email, is_admin)), + )| *is_admin)); + let curr_lectures = + lectures.batch(&lectures_tick, nondet!(/** Snapshot of current lectures */)); + let curr_lectures_hashmap = curr_lectures.clone().persist().fold_commutative_idempotent( + q!(|| HashMap::new()), + q!( + |map, (_api_key, ((_client_id, lecture_id, lecture), (_email, _is_admin)))| { + map.insert(lecture_id, lecture); + } + ), + ); + // Only done after the lectures_tick to ensure that once the client gets the response, the lecture has been added + let add_lecture_response = curr_lectures.all_ticks().map(q!(|( + _api_key, + ((client_id, _lecture_id, _lecture), (_email, _is_admin)), + )| (client_id, ()))); + + // Add question + let add_question_pre_join = add_question.map(q!(|( + client_id, + (api_key, question, question_id, lecture_id), + )| { + (api_key, (client_id, question, question_id, lecture_id)) + })); + let add_question_auth = add_question_pre_join + .batch( + &user_auth_tick, + nondet!(/** Compare against current users to approve/deny access */), + ) + .join(curr_users.clone()) + .all_ticks() + .filter(q!(|( + _api_key, + ((_client_id, _question, _question_id, _lecture_id), (_email, is_admin)), + )| *is_admin)); + let add_question_this_tick = add_question_auth.batch( + &question_answer_tick, + nondet!(/** Snapshot of current questions */), + ); + let curr_questions = add_question_this_tick + .clone() + .map(q!(|( + _api_key, + ((_client_id, question, question_id, lecture_id), (_email, _is_admin)), + )| (lecture_id, (question_id, question)))) + .persist(); + // Only done after the question_answer_tick to ensure that once the client gets the response, the question has been added + let add_question_response = add_question_this_tick.all_ticks().map(q!(|( + _api_key, + ((client_id, _question, _question_id, _lecture_id), (_email, _is_admin)), + )| (client_id, ()))); + + // Get users + let get_users_pre_join = get_users.map(q!(|(client_id, api_key)| (api_key, client_id))); + let get_users_response = get_users_pre_join + .batch( + &user_auth_tick, + nondet!(/** Compare against current users to approve/deny access */), + ) + .join(curr_users.clone()) + .filter_map(q!(|(_api_key, (client_id, (_email, is_admin)))| { + if is_admin { Some(client_id) } else { None } + })) + .cross_singleton(curr_users_hashmap) + .all_ticks(); + + // List lectures + let list_lectures_pre_join = list_lectures.map(q!(|(client_id, api_key)| (api_key, client_id))); + let list_lectures_auth = list_lectures_pre_join + .batch( + &user_auth_tick, + nondet!(/** Compare against current users to approve/deny access */), + ) + .join(curr_users.clone()) + .all_ticks() + .map(q!(|(_api_key, (client_id, (_email, _is_admin)))| client_id)); + let list_lectures_response = list_lectures_auth + .batch( + &lectures_tick, + nondet!(/** Join with snapshot of current lectures */), + ) + .cross_singleton(curr_lectures_hashmap) + .all_ticks(); + + // Add answer + let add_answer_pre_join = add_answer.map(q!(|(client_id, (api_key, question_id, answer))| { + (api_key, (client_id, question_id, answer)) + })); + let add_answer_auth = add_answer_pre_join + .batch( + &user_auth_tick, + nondet!(/** Compare against current users to approve/deny access */), + ) + .join(curr_users.clone()) + .all_ticks(); + let add_answer_this_tick = add_answer_auth.batch( + &question_answer_tick, + nondet!(/** Snapshot of current answers */), + ); + let curr_answers = add_answer_this_tick + .clone() + .map(q!(|( + api_key, + ((_client_id, question_id, answer), (_email, _is_admin)), + )| ((question_id, api_key), answer))) + .persist(); + // Only done after the question_answer_tick to ensure that once the client gets the response, the answer has been added + let add_answer_response = add_answer_this_tick.all_ticks().map(q!(|( + _api_key, + ((client_id, _question_id, _answer), (_email, _is_admin)), + )| (client_id, ()))); + + // List lecture questions all + let list_lecture_questions_all_pre_join = + list_lecture_questions_all.map(q!(|(client_id, (api_key, lecture_id))| { + (api_key, (client_id, lecture_id)) + })); + let list_lecture_questions_all_auth = list_lecture_questions_all_pre_join + .batch( + &user_auth_tick, + nondet!(/** Compare against current users to approve/deny access */), + ) + .join(curr_users.clone()) + .all_ticks() + .filter_map(q!(|( + _api_key, + ((client_id, lecture_id), (_email, is_admin)), + )| { + if is_admin { + Some((lecture_id, client_id)) + } else { + None + } + })); + // Find all questions with that ID + let list_lecture_questions_all_question_only = list_lecture_questions_all_auth + .batch( + &question_answer_tick, + nondet!(/** Join with snapshot of current questions */), + ) + .join(curr_questions.clone()) + .map(q!(|(_lecture_id, (client_id, (question_id, question)))| ( + question_id, + (client_id, question) + ))); + // Don't need to join on api_key since we're getting all answers, regardless of who wrote them + let curr_answers_no_api_key = + curr_answers + .clone() + .map(q!(|((question_id, _api_key), answer)| ( + question_id, + answer + ))); + // Find all answers with the question ID + let list_lecture_questions_all_with_answer = list_lecture_questions_all_question_only + .clone() + .join(curr_answers_no_api_key.clone()) + .map(q!(|(question_id, ((client_id, question), answer))| { + (client_id, (question_id, question, Some(answer))) + })); + // Find all questions without answers + let list_lecture_questions_all_no_answer = list_lecture_questions_all_question_only + .anti_join(curr_answers_no_api_key.map(q!(|(question_id, _answer)| question_id))) + .map(q!(|(question_id, (client_id, question))| ( + client_id, + (question_id, question, None) + ))); + let list_lecture_questions_all_response = list_lecture_questions_all_with_answer + .chain(list_lecture_questions_all_no_answer) + .into_keyed() + .fold_commutative_idempotent( + q!(|| HashMap::new()), + q!(|map, (question_id, question, answer)| { + let (_question, set_of_answers) = + map.entry(question_id).or_insert((question, HashSet::new())); + if let Some(answer) = answer { + set_of_answers.insert(answer); + } + }), + ) + .entries() + .all_ticks(); + + // List lecture questions user + let list_lecture_questions_user_pre_join = + list_lecture_questions_user.map(q!(|(client_id, (api_key, lecture_id))| { + (api_key, (client_id, lecture_id)) + })); + let list_lecture_questions_user_auth = list_lecture_questions_user_pre_join + .batch( + &user_auth_tick, + nondet!(/** Compare against current users to approve/deny access */), + ) + .join(curr_users.clone()) + .all_ticks() + .map(q!(|( + api_key, + ((client_id, lecture_id), (_email, _is_admin)), + )| (lecture_id, (client_id, api_key)))); + let list_lecture_questions_user_question_only = list_lecture_questions_user_auth + .batch( + &question_answer_tick, + nondet!(/** Join with snapshot of current questions */), + ) + .join(curr_questions) + .map(q!(|( + _lecture_id, + ((client_id, api_key), (question_id, question)), + )| ( + (question_id, api_key), + (client_id, question) + ))); + // Find all answers with the question ID + let list_lecture_questions_user_with_answer = list_lecture_questions_user_question_only + .clone() + .join(curr_answers.clone()) + .map(q!(|( + (question_id, _api_key), + ((client_id, question), answer), + )| { + (client_id, (question_id, question, Some(answer))) + })); + // Find all questions without answers + let list_lecture_questions_user_no_answer = list_lecture_questions_user_question_only + .anti_join(curr_answers.map(q!(|(k, _)| k))) + .map(q!(|((question_id, _api_key), (client_id, question))| ( + client_id, + (question_id, question, None) + ))); + let list_lecture_questions_user_response = list_lecture_questions_user_with_answer + .chain(list_lecture_questions_user_no_answer) + .into_keyed() + .fold_commutative_idempotent( + q!(|| HashMap::new()), + q!(|map, (question_id, question, answer)| { + map.insert(question_id, (question, answer)); + }), + ) + .entries() + .all_ticks(); + + ( + add_lecture_response, + add_question_response, + add_user_response, + get_users_response, + list_lectures_response, + list_lecture_questions_all_response, + list_lecture_questions_user_response, + add_answer_response, + ) +} + +fn generate_api_key(email: String) -> String { + let secret = "There is no secret ingredient"; + let mut hasher = Sha256::new(); + hasher.update(email.as_bytes()); + hasher.update(secret.as_bytes()); + let hash = hasher.finalize(); + format!("{:x}", hash) +} + +fn send_email(_api_key: String, _email: String) {} From 505b332bfad2d9c6f995509ffc62dc45bffe8dab Mon Sep 17 00:00:00 2001 From: David Chu Date: Tue, 23 Sep 2025 00:04:14 +0000 Subject: [PATCH 03/12] Change dependencies to be local, merge with hydro main (add support for ChainFirst, remove delta, duplicate private functions), change simple_graphs to be time-based --- Cargo.lock | 10 +- Cargo.toml | 18 ++- hydro_optimize/Cargo.toml | 30 ++-- hydro_optimize/src/decoupler.rs | 17 +- hydro_optimize/src/deploy_and_analyze.rs | 1 + hydro_optimize/src/partition_node_analysis.rs | 153 +++++++++--------- hydro_optimize/src/partitioner.rs | 5 +- hydro_optimize/src/rewrites.rs | 56 +++++++ hydro_optimize_examples/Cargo.toml | 30 ++-- hydro_optimize_examples/src/simple_graphs.rs | 6 +- .../src/simple_kv_bench.rs | 2 + hydro_optimize_examples/src/web_submit.rs | 2 + 12 files changed, 206 insertions(+), 124 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 45112e3..972763a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1049,7 +1049,6 @@ dependencies = [ [[package]] name = "dfir_lang" version = "0.14.0" -source = "git+https://github.com/hydro-project/hydro.git#c01575c6f8d249cb0f96e0bc92761bae309e294b" dependencies = [ "auto_impl", "documented", @@ -1705,7 +1704,6 @@ checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" [[package]] name = "hydro_build_utils" version = "0.0.1" -source = "git+https://github.com/hydro-project/hydro.git#c01575c6f8d249cb0f96e0bc92761bae309e294b" dependencies = [ "insta", "rustc_version", @@ -1714,7 +1712,6 @@ dependencies = [ [[package]] name = "hydro_deploy" version = "0.14.0" -source = "git+https://github.com/hydro-project/hydro.git#c01575c6f8d249cb0f96e0bc92761bae309e294b" dependencies = [ "anyhow", "async-process", @@ -1750,7 +1747,6 @@ dependencies = [ [[package]] name = "hydro_deploy_integration" version = "0.14.0" -source = "git+https://github.com/hydro-project/hydro.git#c01575c6f8d249cb0f96e0bc92761bae309e294b" dependencies = [ "async-recursion", "async-trait", @@ -1767,7 +1763,6 @@ dependencies = [ [[package]] name = "hydro_lang" version = "0.14.0" -source = "git+https://github.com/hydro-project/hydro.git#c01575c6f8d249cb0f96e0bc92761bae309e294b" dependencies = [ "auto_impl", "backtrace", @@ -1816,7 +1811,9 @@ dependencies = [ "hydro_std", "hydro_test", "include_mdtests", + "proc-macro-crate", "proc-macro2", + "quote", "regex", "serde", "stageleft", @@ -1848,7 +1845,6 @@ dependencies = [ [[package]] name = "hydro_std" version = "0.14.0" -source = "git+https://github.com/hydro-project/hydro.git#c01575c6f8d249cb0f96e0bc92761bae309e294b" dependencies = [ "hdrhistogram", "hydro_lang", @@ -1860,7 +1856,6 @@ dependencies = [ [[package]] name = "hydro_test" version = "0.0.0" -source = "git+https://github.com/hydro-project/hydro.git#c01575c6f8d249cb0f96e0bc92761bae309e294b" dependencies = [ "bytes", "colored", @@ -2072,7 +2067,6 @@ dependencies = [ [[package]] name = "include_mdtests" version = "0.0.0" -source = "git+https://github.com/hydro-project/hydro.git#c01575c6f8d249cb0f96e0bc92761bae309e294b" dependencies = [ "glob", "proc-macro2", diff --git a/Cargo.toml b/Cargo.toml index 2553f9b..8b129b8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,4 +5,20 @@ members = [ "hydro_optimize_examples", ] -resolver = "2" \ No newline at end of file +resolver = "2" + +[workspace.dependencies] +hydro_lang = { path = "../hydroflow/hydro_lang" } +hydro_std = { path = "../hydroflow/hydro_std" } +hydro_test = { path = "../hydroflow/hydro_test" } +dfir_lang = { path = "../hydroflow/dfir_lang" } +hydro_build_utils = { path = "../hydroflow/hydro_build_utils" } +hydro_deploy = { path = "../hydroflow/hydro_deploy/core" } +include_mdtests = { path = "../hydroflow/include_mdtests" } +serde = { version = "1.0.197", features = ["derive"] } +stageleft = "0.9.7" +stageleft_tool = "0.9.7" +tokio = { version = "1.29.0", features = ["full"] } +ctor = "0.2" +clap = { version = "4.4", features = ["derive"] } +regex = "1.11.1" \ No newline at end of file diff --git a/hydro_optimize/Cargo.toml b/hydro_optimize/Cargo.toml index 2c664e4..ef16de4 100644 --- a/hydro_optimize/Cargo.toml +++ b/hydro_optimize/Cargo.toml @@ -11,27 +11,29 @@ all-features = true [dependencies] good_lp = { version = "1.14.0", features = ["microlp"], default-features = false } -hydro_deploy = { git = "https://github.com/hydro-project/hydro.git" } -hydro_lang = { git = "https://github.com/hydro-project/hydro.git", features = ["deploy"] } +hydro_deploy.workspace = true +hydro_lang = { workspace = true, features = ["deploy"] } proc-macro2 = "1.0.95" -regex = "1.11.1" -serde = { version = "1.0.197", features = ["derive"] } -stageleft = "0.9.7" +regex.workspace = true +serde.workspace = true +stageleft.workspace = true syn = { version = "2.0.46", features = [ "parsing", "extra-traits", "visit-mut", ] } -tokio = { version = "1.29.0", features = ["full"] } +tokio.workspace = true +proc-macro-crate = "3.3" +quote = "1.0.35" [dev-dependencies] -ctor = "0.2" -hydro_build_utils = { git = "https://github.com/hydro-project/hydro.git", version = "0.0.1" } -hydro_lang = { git = "https://github.com/hydro-project/hydro.git", version = "^0.14.0", features = ["viz"] } -hydro_test = { git = "https://github.com/hydro-project/hydro.git", version = "^0.0.0" } -hydro_std = { git = "https://github.com/hydro-project/hydro.git", version = "^0.14.0" } -clap = { version = "4.4", features = ["derive"] } -include_mdtests = { git = "https://github.com/hydro-project/hydro.git", version = "^0.0.0" } +ctor.workspace = true +hydro_build_utils.workspace = true +hydro_lang = { workspace = true, features = ["viz"] } +hydro_test.workspace = true +hydro_std.workspace = true +clap.workspace = true +include_mdtests.workspace = true [build-dependencies] -hydro_build_utils = { git = "https://github.com/hydro-project/hydro.git", version = "0.0.1" } \ No newline at end of file +hydro_build_utils.workspace = true \ No newline at end of file diff --git a/hydro_optimize/src/decoupler.rs b/hydro_optimize/src/decoupler.rs index 6a4d512..e7d6cd4 100644 --- a/hydro_optimize/src/decoupler.rs +++ b/hydro_optimize/src/decoupler.rs @@ -6,9 +6,6 @@ use hydro_lang::compile::ir::{ DebugInstantiate, DebugType, HydroIrMetadata, HydroIrOpMetadata, HydroNode, HydroRoot, TeeNode, transform_bottom_up, traverse_dfir, }; -use hydro_lang::live_collections::stream::networking::{ - deserialize_bincode_with_type, serialize_bincode_with_type, -}; use hydro_lang::location::MemberId; use hydro_lang::location::dynamic::LocationId; use proc_macro2::Span; @@ -18,7 +15,7 @@ use syn::visit_mut::VisitMut; use crate::parse_results::{MultiRunMetadata, get_or_append_run_metadata}; use crate::repair::{cycle_source_to_sink_input, inject_id, inject_location}; -use crate::rewrites::ClusterSelfIdReplace; +use crate::rewrites::{deserialize_bincode_with_type, serialize_bincode_with_type, ClusterSelfIdReplace}; #[derive(Clone, Serialize, Deserialize)] pub struct Decoupler { @@ -287,10 +284,12 @@ mod tests { use hydro_build_utils::insta; use hydro_deploy::Deployment; use hydro_lang::compile::builder::FlowBuilder; + use hydro_lang::compile::built::BuiltFlow; use hydro_lang::compile::ir; use hydro_lang::compile::rewrites::persist_pullup::persist_pullup; use hydro_lang::location::Location; use hydro_lang::nondet::nondet; + use hydro_lang::prelude::Cluster; use stageleft::q; use crate::debug::name_to_id_map; @@ -302,10 +301,10 @@ mod tests { output_to_original_machine_after: Vec<(&str, i32)>, place_on_decoupled_machine: Vec<(&str, i32)>, ) -> ( - hydro_lang::location::Cluster<'a, ()>, - hydro_lang::location::Cluster<'a, ()>, - hydro_lang::location::Cluster<'a, ()>, - hydro_lang::compile::built::BuiltFlow<'a>, + Cluster<'a, ()>, + Cluster<'a, ()>, + Cluster<'a, ()>, + BuiltFlow<'a>, ) { let builder = FlowBuilder::new(); let send_cluster = builder.cluster::<()>(); @@ -318,6 +317,8 @@ mod tests { .ir_node_named("map") .broadcast_bincode(&recv_cluster, nondet!(/** test */)) .values() + .assume_ordering(nondet!(/** test */)) + .assume_retries(nondet!(/** test */)) .for_each(q!(|a| println!("Got it: {}", a))); let multi_run_metadata = RefCell::new(vec![]); diff --git a/hydro_optimize/src/deploy_and_analyze.rs b/hydro_optimize/src/deploy_and_analyze.rs index 068c034..427f68d 100644 --- a/hydro_optimize/src/deploy_and_analyze.rs +++ b/hydro_optimize/src/deploy_and_analyze.rs @@ -36,6 +36,7 @@ fn insert_counter_node(node: &mut HydroNode, next_stmt_id: &mut usize, duration: | HydroNode::Persist { metadata, .. } | HydroNode::Delta { metadata, .. } | HydroNode::Chain { metadata, .. } // Can technically be derived by summing parent cardinalities + | HydroNode::ChainFirst { metadata, .. } // Can technically be derived by taking parent cardinality + 1 | HydroNode::CrossSingleton { metadata, .. } | HydroNode::CrossProduct { metadata, .. } // Can technically be derived by multiplying parent cardinalities | HydroNode::Join { metadata, .. } diff --git a/hydro_optimize/src/partition_node_analysis.rs b/hydro_optimize/src/partition_node_analysis.rs index a6a0e6c..1938bfa 100644 --- a/hydro_optimize/src/partition_node_analysis.rs +++ b/hydro_optimize/src/partition_node_analysis.rs @@ -190,7 +190,8 @@ fn input_dependency_analysis_node( } } // Alters parent in a predicatable way - HydroNode::Chain { .. } => { + HydroNode::Chain { .. } + | HydroNode::ChainFirst { .. } => { assert_eq!(parent_ids.len(), 2, "Node {:?} has the wrong number of parents.", node); // [a,b] chain [c,d] = [a,b,c,d]. Take the intersection of dependencies of the two parents for each input. If only one parent is tainted, then just take that dependency for (input_id, parent_positions) in parent_taints { @@ -538,10 +539,32 @@ fn partitioning_constraint_analysis_node( } HydroNode::Reduce { .. } | HydroNode::Fold { .. } + | HydroNode::Scan { .. } | HydroNode::Enumerate { .. } | HydroNode::CrossProduct { .. } | HydroNode::CrossSingleton { .. } => {} // Partitioning is impossible - _ => { + HydroNode::Placeholder + | HydroNode::Source { .. } + | HydroNode::CycleSource { .. } + | HydroNode::Tee { .. } + | HydroNode::Persist { .. } + | HydroNode::Unpersist { .. } + | HydroNode::Delta { .. } + | HydroNode::Chain { .. } + | HydroNode::ChainFirst { .. } + | HydroNode::ResolveFutures { .. } + | HydroNode::ResolveFuturesOrdered { .. } + | HydroNode::Map { .. } + | HydroNode::FlatMap { .. } + | HydroNode::Filter { .. } + | HydroNode::FilterMap { .. } + | HydroNode::DeferTick { .. } + | HydroNode::Inspect { .. } + | HydroNode::Unique { .. } + | HydroNode::Sort { .. } + | HydroNode::Network { .. } + | HydroNode::ExternalInput { .. } + | HydroNode::Counter { .. } => { // Doesn't impede partitioning, return return; } @@ -900,6 +923,8 @@ mod tests { .ir_node_named("the map following network") .map(q!(|(a, b)| (b, a + 2))) .ir_node_named("the operator being tested") + .assume_ordering(nondet!(/** test */)) + .assume_retries(nondet!(/** test */)) .for_each(q!(|(b, a2)| { println!("b: {}, a+2: {}", b, a2); })); @@ -947,6 +972,8 @@ mod tests { .broadcast_bincode(&cluster2, nondet!(/** test */)) .values() .map(q!(|(a, b)| (b, a + 2))) + .assume_ordering(nondet!(/** test */)) + .assume_retries(nondet!(/** test */)) .for_each(q!(|(b, a2)| { println!("b: {}, a+2: {}", b, a2); })); @@ -970,6 +997,8 @@ mod tests { .ir_node_named("map 1") .map(q!(|(b1, _a, b0a)| (b0a, b1.0))) .ir_node_named("map 2") + .assume_ordering(nondet!(/** test */)) + .assume_retries(nondet!(/** test */)) .for_each(q!(|(b0a, b10)| { println!("b.0 - a: {}, b.1.0: {}", b0a, b10); })); @@ -1039,6 +1068,8 @@ mod tests { .ir_node_named("map after network") .filter_map(q!(|(a, b)| { if a > 1 { Some((b, a + 2)) } else { None } })) .ir_node_named("operator") + .assume_ordering(nondet!(/** test */)) + .assume_retries(nondet!(/** test */)) .for_each(q!(|(b, a2)| { println!("b: {}, a+2: {}", b, a2); })); @@ -1097,6 +1128,8 @@ mod tests { } })) .ir_node_named("operator") + .assume_ordering(nondet!(/** test */)) + .assume_retries(nondet!(/** test */)) .for_each(q!(|(none, a2)| { println!("None: {:?}, a+2: {}", none, a2); })); @@ -1126,73 +1159,6 @@ mod tests { ); } - #[test] - fn test_delta() { - let builder = FlowBuilder::new(); - let cluster1 = builder.cluster::<()>(); - let cluster2 = builder.cluster::<()>(); - cluster1 - .source_iter(q!([(1, 2)])) - .broadcast_bincode(&cluster2, nondet!(/** test */)) - .ir_node_named("network") - .values() - .ir_node_named("map after network") - .batch(&cluster2.tick(), nondet!(/** test */)) - .delta() - .ir_node_named("operator") - .all_ticks() - .for_each(q!(|(a, b)| { - println!("a: {}, b: {}", a, b); - })); - - let expected_taint = BTreeMap::from([ - ("map after network", BTreeSet::from(["network"])), - ("operator", BTreeSet::from(["network"])), - ]); - - let mut implicit_map_dependencies = StructOrTuple::default(); - implicit_map_dependencies.add_dependency(&vec![], vec!["1".to_string()]); - - let expected_dependencies = BTreeMap::from([ - ("network", BTreeMap::new()), - ( - "map after network", - BTreeMap::from([("network", implicit_map_dependencies.clone())]), - ), - ( - "operator", - BTreeMap::from([("network", implicit_map_dependencies)]), - ), // No dependency changes from parent - ]); - - test_input( - builder, - cluster2.id(), - expected_taint, - expected_dependencies, - ); - } - - #[test] - fn test_delta_partitionable() { - let builder = FlowBuilder::new(); - let cluster1 = builder.cluster::<()>(); - let cluster2 = builder.cluster::<()>(); - cluster1 - .source_iter(q!([(1, 2)])) - .broadcast_bincode(&cluster2, nondet!(/** test */)) - .values() - .batch(&cluster2.tick(), nondet!(/** test */)) - .delta() - .all_ticks() - .for_each(q!(|(a, b)| { - println!("a: {}, b: {}", a, b); - })); - - let expected_partitionings = Some(Vec::new()); // No partitioning constraints - test_input_partitionable(builder, cluster2.id(), expected_partitionings); - } - #[test] fn test_chain() { let builder = FlowBuilder::new(); @@ -1219,6 +1185,8 @@ mod tests { .chain(stream1.batch(&tick, nondet!(/** test */))) .ir_node_named("chain") .all_ticks() + .assume_ordering(nondet!(/** test */)) + .assume_retries(nondet!(/** test */)) .for_each(q!(|((x, b1), y)| { println!("x: {}, b.1: {}, y: {}", x, b1, y); })); @@ -1303,6 +1271,8 @@ mod tests { .batch(&tick, nondet!(/** test */)) .chain(stream1.batch(&tick, nondet!(/** test */))) .all_ticks() + .assume_ordering(nondet!(/** test */)) + .assume_retries(nondet!(/** test */)) .for_each(q!(|((x, b1), y)| { println!("x: {}, b.1: {}, y: {}", x, b1, y); })); @@ -1337,6 +1307,8 @@ mod tests { .cross_product(stream1.batch(&tick, nondet!(/** test */))) .ir_node_named("cross product") .all_ticks() + .assume_ordering(nondet!(/** test */)) + .assume_retries(nondet!(/** test */)) .for_each(q!(|(((b1, b1_again), a3), (b, a2))| { println!("((({}, {}), {}), ({:?}, {}))", b1, b1_again, a3, b, a2); })); @@ -1432,6 +1404,8 @@ mod tests { .batch(&tick, nondet!(/** test */)) .cross_product(stream1.batch(&tick, nondet!(/** test */))) .all_ticks() + .assume_ordering(nondet!(/** test */)) + .assume_retries(nondet!(/** test */)) .for_each(q!(|(((b1, b1_again), a3), (b, a2))| { println!("((({}, {}), {}), ({:?}, {}))", b1, b1_again, a3, b, a2); })); @@ -1466,6 +1440,8 @@ mod tests { .join(stream1.batch(&tick, nondet!(/** test */))) .ir_node_named("join") .all_ticks() + .assume_ordering(nondet!(/** test */)) + .assume_retries(nondet!(/** test */)) .for_each(q!(|((b1, b1_again), (a3, a))| { println!("(({}, {}), {}, {})", b1, b1_again, a3, a); })); @@ -1571,6 +1547,8 @@ mod tests { .batch(&tick, nondet!(/** test */)) .join(stream1.batch(&tick, nondet!(/** test */))) .all_ticks() + .assume_ordering(nondet!(/** test */)) + .assume_retries(nondet!(/** test */)) .for_each(q!(|((b1, b1_again), (a3, a))| { println!("(({}, {}), {}, {})", b1, b1_again, a3, a); })); @@ -1677,6 +1655,8 @@ mod tests { .ir_node_named("reduce keyed") .entries() .all_ticks() + .assume_ordering(nondet!(/** test */)) + .assume_retries(nondet!(/** test */)) .for_each(q!(|(a, b_sum)| { println!("a: {}, b_sum: {}", a, b_sum); })); @@ -1729,6 +1709,8 @@ mod tests { .reduce_commutative(q!(|acc, b| *acc += b)) .entries() .all_ticks() + .assume_ordering(nondet!(/** test */)) + .assume_retries(nondet!(/** test */)) .for_each(q!(|(a, b_sum)| { println!("a: {}, b_sum: {}", a, b_sum); })); @@ -1839,6 +1821,8 @@ mod tests { cycle .ir_node_named("teed cycle 2") .all_ticks() + .assume_ordering(nondet!(/** test */)) + .assume_retries(nondet!(/** test */)) .for_each(q!(|(a, b)| { println!("a: {}, b: {}", a, b); })); @@ -1934,9 +1918,13 @@ mod tests { prev_tick_input.chain(input.batch(&cluster2_tick, nondet!(/** test */))), ); - cycle.all_ticks().for_each(q!(|(a, b)| { - println!("a: {}, b: {}", a, b); - })); + cycle + .all_ticks() + .assume_ordering(nondet!(/** test */)) + .assume_retries(nondet!(/** test */)) + .for_each(q!(|(a, b)| { + println!("a: {}, b: {}", a, b); + })); let expected_partitionings = Some(Vec::new()); test_input_partitionable(builder, cluster2.id(), expected_partitionings); @@ -1979,6 +1967,8 @@ mod tests { cycle2_out .ir_node_named("teed map (a,b) to (b,b) 2") .all_ticks() + .assume_ordering(nondet!(/** test */)) + .assume_retries(nondet!(/** test */)) .for_each(q!(|(b, _)| { println!("b: {}", b); })); @@ -2128,7 +2118,8 @@ mod tests { complete_cycle1.complete_next_tick(chained.clone()); let cycle2_out = chained.map(q!(|(_a, b)| (b, b))); complete_cycle2.complete_next_tick(cycle2_out.clone()); - cycle2_out.all_ticks().for_each(q!(|(b, _)| { + cycle2_out.all_ticks().assume_ordering(nondet!(/** test */)) + .assume_retries(nondet!(/** test */)).for_each(q!(|(b, _)| { println!("b: {}", b); })); @@ -2160,6 +2151,8 @@ mod tests { .chain(stream1.batch(&tick, nondet!(/** test */))) .ir_node_named("chain") .all_ticks() + .assume_ordering(nondet!(/** test */)) + .assume_retries(nondet!(/** test */)) .for_each(q!(|_| { println!("No dependencies"); })); @@ -2221,6 +2214,8 @@ mod tests { .batch(&tick, nondet!(/** test */)) .chain(stream1.batch(&tick, nondet!(/** test */))) .all_ticks() + .assume_ordering(nondet!(/** test */)) + .assume_retries(nondet!(/** test */)) .for_each(q!(|_| { println!("No dependencies"); })); @@ -2259,6 +2254,8 @@ mod tests { .chain(stream1.clone().ir_node_named("teed map1 1")) .ir_node_named("chain") .all_ticks() + .assume_ordering(nondet!(/** test */)) + .assume_retries(nondet!(/** test */)) .for_each(q!(|_| { println!("Dependent on both input1.b and input2.b"); })); @@ -2267,6 +2264,8 @@ mod tests { .join(stream1.ir_node_named("teed map1 2")) .ir_node_named("join") .all_ticks() + .assume_ordering(nondet!(/** test */)) + .assume_retries(nondet!(/** test */)) .for_each(q!(|(_, (b1, b2))| { println!("b from input 1: {}, b from input 2: {}", b1, b2); })); @@ -2393,12 +2392,16 @@ mod tests { .clone() .chain(stream1.clone()) .all_ticks() + .assume_ordering(nondet!(/** test */)) + .assume_retries(nondet!(/** test */)) .for_each(q!(|_| { println!("Dependent on both input1.b and input2.b"); })); stream2 .join(stream1) .all_ticks() + .assume_ordering(nondet!(/** test */)) + .assume_retries(nondet!(/** test */)) .for_each(q!(|(_, (a1, a2))| { println!("a*2 from input 1: {}, -a from input 2: {}", a1, a2); })); @@ -2433,6 +2436,8 @@ mod tests { .filter_not_in(input2.batch(&tick, nondet!(/** test */))) .ir_node_named("difference") .all_ticks() + .assume_ordering(nondet!(/** test */)) + .assume_retries(nondet!(/** test */)) .for_each(q!(|(a, b)| { println!("a: {}, b: {}", a, b); })); @@ -2491,6 +2496,8 @@ mod tests { .batch(&tick, nondet!(/** test */)) .filter_not_in(input2.batch(&tick, nondet!(/** test */))) .all_ticks() + .assume_ordering(nondet!(/** test */)) + .assume_retries(nondet!(/** test */)) .for_each(q!(|(a, b)| { println!("a: {}, b: {}", a, b); })); diff --git a/hydro_optimize/src/partitioner.rs b/hydro_optimize/src/partitioner.rs index 3dc1cea..b11b91e 100644 --- a/hydro_optimize/src/partitioner.rs +++ b/hydro_optimize/src/partitioner.rs @@ -2,16 +2,13 @@ use core::panic; use std::collections::HashMap; use hydro_lang::compile::ir::{HydroNode, HydroRoot, traverse_dfir}; -use hydro_lang::live_collections::stream::networking::{ - deserialize_bincode_with_type, serialize_bincode_with_type, -}; use hydro_lang::location::dynamic::LocationId; use serde::{Deserialize, Serialize}; use syn::visit_mut::{self, VisitMut}; use crate::partition_syn_analysis::StructOrTupleIndex; use crate::repair::inject_id; -use crate::rewrites::{ClusterSelfIdReplace, NetworkType, get_network_type}; +use crate::rewrites::{deserialize_bincode_with_type, get_network_type, serialize_bincode_with_type, ClusterSelfIdReplace, NetworkType}; #[derive(Clone, Serialize, Deserialize)] pub struct Partitioner { diff --git a/hydro_optimize/src/rewrites.rs b/hydro_optimize/src/rewrites.rs index a3f6a3d..274b80e 100644 --- a/hydro_optimize/src/rewrites.rs +++ b/hydro_optimize/src/rewrites.rs @@ -6,7 +6,10 @@ use hydro_lang::compile::ir::{HydroIrMetadata, HydroNode, HydroRoot, deep_clone, use hydro_lang::location::dynamic::LocationId; use hydro_lang::location::{Cluster, Location}; use serde::{Deserialize, Serialize}; +use syn::parse_quote; use syn::visit_mut::{self, VisitMut}; +use proc_macro2::{Span, TokenStream}; +use quote::quote; use crate::decoupler::{self, Decoupler}; use crate::partitioner::Partitioner; @@ -200,3 +203,56 @@ pub fn get_network_type(node: &HydroNode, location: usize) -> Option TokenStream { + let hydro_lang_crate = proc_macro_crate::crate_name("hydro_lang") + .expect("hydro_lang should be present in `Cargo.toml`"); + match hydro_lang_crate { + proc_macro_crate::FoundCrate::Itself => quote! { hydro_lang }, + proc_macro_crate::FoundCrate::Name(name) => { + let ident = syn::Ident::new(&name, Span::call_site()); + quote! { #ident } + } + } +} + +pub fn serialize_bincode_with_type(is_demux: bool, t_type: &syn::Type) -> syn::Expr { + let root = get_this_crate(); + + if is_demux { + parse_quote! { + ::#root::runtime_support::stageleft::runtime_support::fn1_type_hint::<(#root::location::MemberId<_>, #t_type), _>( + |(id, data)| { + (id.raw_id, #root::runtime_support::bincode::serialize(&data).unwrap().into()) + } + ) + } + } else { + parse_quote! { + ::#root::runtime_support::stageleft::runtime_support::fn1_type_hint::<#t_type, _>( + |data| { + #root::runtime_support::bincode::serialize(&data).unwrap().into() + } + ) + } + } +} + +pub fn deserialize_bincode_with_type(tagged: Option<&syn::Type>, t_type: &syn::Type) -> syn::Expr { + let root = get_this_crate(); + + if let Some(c_type) = tagged { + parse_quote! { + |res| { + let (id, b) = res.unwrap(); + (#root::location::MemberId::<#c_type>::from_raw(id), #root::runtime_support::bincode::deserialize::<#t_type>(&b).unwrap()) + } + } + } else { + parse_quote! { + |res| { + #root::runtime_support::bincode::deserialize::<#t_type>(&res.unwrap()).unwrap() + } + } + } +} \ No newline at end of file diff --git a/hydro_optimize_examples/Cargo.toml b/hydro_optimize_examples/Cargo.toml index 32792ed..342f76c 100644 --- a/hydro_optimize_examples/Cargo.toml +++ b/hydro_optimize_examples/Cargo.toml @@ -8,24 +8,24 @@ edition = "2024" all-features = true [dependencies] -hydro_lang = { git = "https://github.com/hydro-project/hydro.git" } -hydro_std = { git = "https://github.com/hydro-project/hydro.git" } -hydro_test = { git = "https://github.com/hydro-project/hydro.git" } -serde = { version = "1.0.197", features = ["derive"] } +hydro_lang.workspace = true +hydro_std.workspace = true +hydro_test.workspace = true +serde.workspace = true sha2 = "0.10.9" -stageleft = "0.9.7" -tokio = { version = "1.29.0", features = ["full"] } +stageleft.workspace = true +tokio.workspace = true [dev-dependencies] -ctor = "0.2" -clap = { version = "4.4", features = ["derive"] } -dfir_lang = { git = "https://github.com/hydro-project/hydro.git" } -hydro_build_utils = { git = "https://github.com/hydro-project/hydro.git", version = "0.0.1" } -hydro_deploy = { git = "https://github.com/hydro-project/hydro.git" } -hydro_lang = { git = "https://github.com/hydro-project/hydro.git", features = ["deploy", "viz"] } +ctor.workspace = true +clap.workspace = true +dfir_lang.workspace = true +hydro_build_utils.workspace = true +hydro_deploy.workspace = true +hydro_lang = { workspace = true, features = ["deploy", "viz"] } hydro_optimize = { path = "../hydro_optimize" } -regex = "1.11.1" +regex.workspace = true [build-dependencies] -stageleft_tool = "0.9.7" -hydro_build_utils = { git = "https://github.com/hydro-project/hydro.git", version = "0.0.1" } \ No newline at end of file +stageleft_tool.workspace = true +hydro_build_utils.workspace = true \ No newline at end of file diff --git a/hydro_optimize_examples/src/simple_graphs.rs b/hydro_optimize_examples/src/simple_graphs.rs index b550336..7a464ee 100644 --- a/hydro_optimize_examples/src/simple_graphs.rs +++ b/hydro_optimize_examples/src/simple_graphs.rs @@ -28,13 +28,17 @@ impl<'a, F> GraphFunction<'a> for F where } fn sha256(n: u32) -> u32 { + let start_time = std::time::Instant::now(); let mut sha_input = n; - for _ in 0..n { + loop { let mut sha = Sha256::new(); sha.update(sha_input.to_be_bytes()); let sha_output = sha.finalize(); sha_input = sha_output[0].into(); + if start_time.elapsed().as_micros() >= n.into() { + break; + } } sha_input diff --git a/hydro_optimize_examples/src/simple_kv_bench.rs b/hydro_optimize_examples/src/simple_kv_bench.rs index 89aaa1b..da8e1f9 100644 --- a/hydro_optimize_examples/src/simple_kv_bench.rs +++ b/hydro_optimize_examples/src/simple_kv_bench.rs @@ -37,6 +37,8 @@ pub fn simple_kv_bench<'a>( })) .entries() .all_ticks() + .assume_ordering(nondet!(/** for_each does nothing, just need to end on a HydroLeaf */)) + .assume_retries(nondet!(/** for_each does nothing, just need to end on a HydroLeaf */)) .for_each(q!(|_| {})); // Do nothing, just need to end on a HydroLeaf // Send committed requests back to the original client diff --git a/hydro_optimize_examples/src/web_submit.rs b/hydro_optimize_examples/src/web_submit.rs index 61d216b..2ba935b 100644 --- a/hydro_optimize_examples/src/web_submit.rs +++ b/hydro_optimize_examples/src/web_submit.rs @@ -114,6 +114,8 @@ pub fn web_submit<'a, Client>( users_this_tick_with_api_key .clone() .all_ticks() + .assume_ordering(nondet!(/** Email order doesn't matter */)) + .assume_retries(nondet!(/** At least once delivery is fine */)) .for_each(q!(|(_client_id, (email, _is_admin, api_key))| { self::send_email(api_key, email) })); From 9a6f6ff8c220ad9919f72357e9878ac0895a4fc5 Mon Sep 17 00:00:00 2001 From: David Chu Date: Tue, 23 Sep 2025 00:19:46 +0000 Subject: [PATCH 04/12] Fixes for cases where no send/recv overhead is recorded, added noop experiment --- hydro_optimize/src/deploy_and_analyze.rs | 4 ++-- hydro_optimize/src/parse_results.rs | 4 ++-- hydro_optimize_examples/src/simple_graphs.rs | 8 ++++++++ 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/hydro_optimize/src/deploy_and_analyze.rs b/hydro_optimize/src/deploy_and_analyze.rs index 427f68d..46e14d8 100644 --- a/hydro_optimize/src/deploy_and_analyze.rs +++ b/hydro_optimize/src/deploy_and_analyze.rs @@ -216,8 +216,8 @@ pub async fn deploy_and_analyze<'a>( // Create a mapping from each CycleSink to its corresponding CycleSource let cycle_source_to_sink_input = cycle_source_to_sink_input(&mut ir); analyze_send_recv_overheads(&mut ir, run_metadata); - let send_overhead = *run_metadata.send_overhead.get(&bottleneck).unwrap(); - let recv_overhead = *run_metadata.recv_overhead.get(&bottleneck).unwrap(); + let send_overhead = run_metadata.send_overhead.get(&bottleneck).cloned().unwrap_or_default(); + let recv_overhead = run_metadata.recv_overhead.get(&bottleneck).cloned().unwrap_or_default(); // Check the expected/actual CPU usages before/after rewrites std::mem::drop(mut_multi_run_metadata); // Release borrow diff --git a/hydro_optimize/src/parse_results.rs b/hydro_optimize/src/parse_results.rs index 2b0e557..741f3ce 100644 --- a/hydro_optimize/src/parse_results.rs +++ b/hydro_optimize/src/parse_results.rs @@ -589,7 +589,7 @@ pub fn compare_expected_performance( { compare_expected_values( cpu_usage, - prev_run_metadata.send_overhead.get(prev_location).unwrap() + prev_run_metadata.send_overhead.get(prev_location).cloned().unwrap_or_default() * *prev_cardinality as f64, location, prev_location, @@ -605,7 +605,7 @@ pub fn compare_expected_performance( { compare_expected_values( cpu_usage, - prev_run_metadata.recv_overhead.get(prev_location).unwrap() + prev_run_metadata.recv_overhead.get(prev_location).cloned().unwrap_or_default() * *prev_cardinality as f64, &location, prev_location, diff --git a/hydro_optimize_examples/src/simple_graphs.rs b/hydro_optimize_examples/src/simple_graphs.rs index 7a464ee..08fbb98 100644 --- a/hydro_optimize_examples/src/simple_graphs.rs +++ b/hydro_optimize_examples/src/simple_graphs.rs @@ -48,6 +48,7 @@ fn sha256(n: u32) -> u32 { pub fn get_graph_function<'a>(name: &str) -> impl GraphFunction<'a> { match name { + "noop" => noop, "map_h_map_h_map_h" => map_h_map_h_map_h, "map_h_map_h_map_l" => map_h_map_h_map_l, "map_h_map_l_map_h" => map_h_map_l_map_h, @@ -84,6 +85,13 @@ pub fn get_graph_function<'a>(name: &str) -> impl GraphFunction<'a> { } } +pub fn noop<'a>( + _server: &Cluster<'a, Server>, + payloads: KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder>, +) -> KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder> { + payloads +} + pub fn map_h_map_h_map_h<'a>( _server: &Cluster<'a, Server>, payloads: KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder>, From 4ea9bcf1918c053a4920d944068fd14ab70a6fc2 Mon Sep 17 00:00:00 2001 From: David Chu Date: Mon, 27 Oct 2025 23:13:26 +0000 Subject: [PATCH 05/12] Adopting changes HydroNode, HydroRoot, and HydroIrMetadata. WIP --- hydro_optimize/src/partitioner.rs | 54 ++++++++++++++---------------- hydro_optimize/src/repair.rs | 54 ++++++++++-------------------- hydro_optimize/src/rewrites.rs | 19 ++++++++++- hydro_optimize/src/tests/mod.rs | 3 -- hydro_optimize/src/tests/two_pc.rs | 3 -- 5 files changed, 62 insertions(+), 71 deletions(-) diff --git a/hydro_optimize/src/partitioner.rs b/hydro_optimize/src/partitioner.rs index b11b91e..4c427ae 100644 --- a/hydro_optimize/src/partitioner.rs +++ b/hydro_optimize/src/partitioner.rs @@ -1,14 +1,15 @@ use core::panic; use std::collections::HashMap; -use hydro_lang::compile::ir::{HydroNode, HydroRoot, traverse_dfir}; +use hydro_lang::compile::ir::{BoundKind, CollectionKind, DebugType, HydroNode, HydroRoot, KeyedSingletonBoundKind, StreamOrder, StreamRetry, traverse_dfir}; +use hydro_lang::live_collections::keyed_singleton::KeyedSingletonBound; use hydro_lang::location::dynamic::LocationId; use serde::{Deserialize, Serialize}; use syn::visit_mut::{self, VisitMut}; use crate::partition_syn_analysis::StructOrTupleIndex; use crate::repair::inject_id; -use crate::rewrites::{deserialize_bincode_with_type, get_network_type, serialize_bincode_with_type, ClusterSelfIdReplace, NetworkType}; +use crate::rewrites::{ClusterSelfIdReplace, NetworkType, collection_kind_to_debug_type, deserialize_bincode_with_type, get_network_type, serialize_bincode_with_type}; #[derive(Clone, Serialize, Deserialize)] pub struct Partitioner { @@ -125,11 +126,29 @@ fn replace_sender_dest(node: &mut HydroNode, partitioner: &Partitioner, next_stm let f: syn::Expr = if new_cluster_id.is_some() { // Output type of Map now includes dest ID - let original_output_type = *metadata.output_type.clone().unwrap().0; - let new_output_type: syn::Type = syn::parse_quote! { - (::hydro_lang::location::MemberId<()>, #original_output_type) + let member_id_syn_type: syn::Type = syn::parse_quote! { ::hydro_lang::location::MemberId<()> }; + let member_id_debug_type = DebugType::from(member_id_syn_type); + metadata.collection_kind = match metadata.collection_kind { + CollectionKind::Singleton { element_type, .. } + | CollectionKind::Optional { element_type, .. } => { + CollectionKind::KeyedSingleton { + bound: KeyedSingletonBoundKind::Unbounded, + key_type: member_id_debug_type, + value_type: element_type, + } + } + CollectionKind::Stream { .. } + | CollectionKind::KeyedStream { .. } + | CollectionKind::KeyedSingleton { .. } => { + CollectionKind::KeyedStream { + bound: BoundKind::Unbounded, + value_order: StreamOrder::NoOrder, + value_retry: StreamRetry::ExactlyOnce, + key_type: member_id_debug_type, + value_type: collection_kind_to_debug_type(&metadata.collection_kind), + } + } }; - metadata.output_type = Some(new_output_type.into()); // Partitioning a process into a cluster syn::parse_quote!( @@ -220,7 +239,7 @@ fn replace_network_serialization(node: &mut HydroNode, partitioner: &Partitioner panic!("Expected a HydroNode::Network, but found {:?}", node); }; - let output_type = metadata.output_type.clone().unwrap().0; + let output_type = collection_kind_to_debug_type(&metadata.collection_kind); // The partitioned process (now cluster) is the sender // Its ID will now be in the recipient's output, so change the deserialize fn @@ -321,22 +340,6 @@ fn replace_process_node_location(node: &mut HydroNode, partitioner: &Partitioner } } -/// If we're partitioning a process into a cluster, we need to replace references to its location -fn replace_process_root_location(root: &mut HydroRoot, partitioner: &Partitioner) { - let Partitioner { - location_id, - new_cluster_id, - .. - } = partitioner; - - if let Some(new_id) = new_cluster_id { - // Modify the metadata - if let HydroRoot::CycleSink { out_location, .. } = root { - replace_process_location_id(out_location, *location_id, *new_id); - } - } -} - /// If we're partitioning a process into a cluster, we need to remove the default sender ID on outgoing networks fn remove_sender_id_from_receiver(node: &mut HydroNode, partitioner: &Partitioner, op_id: usize) { let Partitioner { new_cluster_id, .. } = partitioner; @@ -383,11 +386,6 @@ pub fn partition(ir: &mut [HydroRoot], partitioner: &Partitioner) { ); if partitioner.new_cluster_id.is_some() { - // Separately traverse roots since CycleSink isn't processed in traverse_dfir - for root in ir.iter_mut() { - replace_process_root_location(root, partitioner); - } - // DANGER: Do not depend on the ID here, since nodes would've been injected // Fix network only after all IDs have been replaced, since get_network_type relies on it traverse_dfir( diff --git a/hydro_optimize/src/repair.rs b/hydro_optimize/src/repair.rs index fb1f51a..776c1f6 100644 --- a/hydro_optimize/src/repair.rs +++ b/hydro_optimize/src/repair.rs @@ -84,24 +84,6 @@ pub fn cycle_source_to_sink_input(ir: &mut [HydroRoot]) -> HashMap source_to_sink_input } -fn inject_location_root( - root: &mut HydroRoot, - id_to_location: &RefCell>, - missing_location: &RefCell, -) { - let inputs = root.input_metadata(); - let input_metadata = inputs.first().unwrap(); - - if let Some(location) = id_to_location.borrow().get(&input_metadata.op.id.unwrap()) { - if let HydroRoot::CycleSink { out_location, .. } = root { - out_location.swap_root(location.root().clone()); - } - } else { - println!("Missing location for root: {:?}", root.print_root()); - *missing_location.borrow_mut() = true; - } -} - fn inject_location_input_persist(input: &mut Box, new_location: LocationId) { if let HydroNode::Persist { metadata: persist_metadata, @@ -112,22 +94,22 @@ fn inject_location_input_persist(input: &mut Box, new_location: Locat } } +// Returns whether location was missing for any node and requires another round of calculation (to reach fixpoint) fn inject_location_node( node: &mut HydroNode, - id_to_location: &RefCell>, - missing_location: &RefCell, + id_to_location: &mut HashMap, cycle_source_to_sink_input: &HashMap, -) { +) -> bool { if let Some(op_id) = node.op_metadata().id { let inputs = match node { HydroNode::Source { metadata, .. } + | HydroNode::SingletonSource { metadata, .. } | HydroNode::ExternalInput { metadata, .. } | HydroNode::Network { metadata, .. } => { // Get location sources from the nodes must have it be correct: Source and Network id_to_location - .borrow_mut() .insert(op_id, metadata.location_kind.clone()); - return; + return false; } HydroNode::Tee { inner, .. } => { vec![inner.0.borrow().op_metadata().id.unwrap()] @@ -145,11 +127,10 @@ fn inject_location_node( // Otherwise, get it from (either) input let metadata = node.metadata_mut(); for input in inputs { - let location = id_to_location.borrow().get(&input).cloned(); + let location = id_to_location.get(&input).cloned(); if let Some(location) = location { metadata.location_kind.swap_root(location.root().clone()); id_to_location - .borrow_mut() .insert(op_id, metadata.location_kind.clone()); match node { @@ -168,45 +149,46 @@ fn inject_location_node( | HydroNode::FoldKeyed { input, .. } | HydroNode::Reduce { input, .. } | HydroNode::ReduceKeyed { input, .. } + | HydroNode::ReduceKeyedWatermark { input, .. } | HydroNode::Scan { input, .. } => { inject_location_input_persist(input, location.root().clone()); } _ => {} } - return; + return false; } } // If the location was not set, let the recursive function know println!("Missing location for node: {:?}", node.print_root()); - *missing_location.borrow_mut() = true; + return true; } + + // No op_id, probably can ignore? + return false; } pub fn inject_location(ir: &mut [HydroRoot], cycle_source_to_sink_input: &HashMap) { - let id_to_location = RefCell::new(HashMap::new()); + let mut id_to_location = HashMap::new(); loop { println!("Attempting to inject location, looping until fixpoint..."); - let missing_location = RefCell::new(false); + let mut missing_location = false; transform_bottom_up( ir, - &mut |leaf| { - inject_location_root(leaf, &id_to_location, &missing_location); - }, + &mut |_| {}, &mut |node| { - inject_location_node( + missing_location |= inject_location_node( node, - &id_to_location, - &missing_location, + &mut id_to_location, cycle_source_to_sink_input, ); }, false, ); - if !*missing_location.borrow() { + if !missing_location { println!("Locations injected!"); // Check well-formedness here diff --git a/hydro_optimize/src/rewrites.rs b/hydro_optimize/src/rewrites.rs index 274b80e..a53c86b 100644 --- a/hydro_optimize/src/rewrites.rs +++ b/hydro_optimize/src/rewrites.rs @@ -2,7 +2,7 @@ use std::cell::RefCell; use std::collections::HashMap; use hydro_lang::compile::builder::{FlowBuilder, RewriteIrFlowBuilder}; -use hydro_lang::compile::ir::{HydroIrMetadata, HydroNode, HydroRoot, deep_clone, traverse_dfir}; +use hydro_lang::compile::ir::{CollectionKind, DebugType, HydroIrMetadata, HydroNode, HydroRoot, deep_clone, traverse_dfir}; use hydro_lang::location::dynamic::LocationId; use hydro_lang::location::{Cluster, Location}; use serde::{Deserialize, Serialize}; @@ -255,4 +255,21 @@ pub fn deserialize_bincode_with_type(tagged: Option<&syn::Type>, t_type: &syn::T } } } +} + +pub fn collection_kind_to_debug_type(collection_kind: &CollectionKind) -> DebugType { + match collection_kind { + CollectionKind::Stream { element_type, .. } + | CollectionKind::Singleton { element_type, .. } + | CollectionKind::Optional { element_type, .. } => DebugType::from(*element_type.clone().0), + CollectionKind::KeyedStream { key_type, value_type, .. } + | CollectionKind::KeyedSingleton { key_type, value_type, .. } => { + let original_key_type = *key_type.clone().0; + let original_value_type = *value_type.clone().0; + let new_type: syn::Type = syn::parse_quote! { + (#original_key_type, #original_value_type) + }; + DebugType::from(new_type) + } + } } \ No newline at end of file diff --git a/hydro_optimize/src/tests/mod.rs b/hydro_optimize/src/tests/mod.rs index 9d3d091..51563ce 100644 --- a/hydro_optimize/src/tests/mod.rs +++ b/hydro_optimize/src/tests/mod.rs @@ -2,7 +2,6 @@ use std::cell::RefCell; use std::collections::HashMap; use hydro_build_utils::insta; -use hydro_lang::compile::rewrites::persist_pullup; use hydro_lang::deploy::HydroDeploy; use hydro_lang::location::Location; use hydro_lang::prelude::*; @@ -28,7 +27,6 @@ fn decoupled_compute_pi_ir() { }; let multi_run_metadata = RefCell::new(vec![]); let built = builder - .optimize_with(persist_pullup::persist_pullup) .optimize_with(|roots| decoupler::decouple(roots, &decoupler, &multi_run_metadata, 0)) .into_deploy::(); @@ -54,7 +52,6 @@ fn partitioned_simple_cluster_ir() { new_cluster_id: None, }; let built = builder - .optimize_with(persist_pullup::persist_pullup) .optimize_with(|roots| crate::partitioner::partition(roots, &partitioner)) .into_deploy::(); diff --git a/hydro_optimize/src/tests/two_pc.rs b/hydro_optimize/src/tests/two_pc.rs index c6cab70..dc30513 100644 --- a/hydro_optimize/src/tests/two_pc.rs +++ b/hydro_optimize/src/tests/two_pc.rs @@ -2,7 +2,6 @@ use std::collections::{BTreeMap, HashMap}; use hydro_build_utils::insta; use hydro_lang::compile::ir::deep_clone; -use hydro_lang::compile::rewrites::persist_pullup::persist_pullup; use hydro_lang::deploy::HydroDeploy; use hydro_lang::location::Location; use hydro_lang::prelude::*; @@ -45,7 +44,6 @@ fn two_pc_partition_coordinator() { let mut cycle_data = HashMap::new(); let built = builder - .optimize_with(persist_pullup) .optimize_with(|ir| { inject_id(ir); cycle_data = cycle_source_to_sink_input(ir); @@ -101,7 +99,6 @@ fn two_pc_partition_participant() { let mut cycle_data = HashMap::new(); let built = builder - .optimize_with(persist_pullup) .optimize_with(|ir| { inject_id(ir); cycle_data = cycle_source_to_sink_input(ir); From 4d09fa8393362b903ead703fd7351a0b8a241809 Mon Sep 17 00:00:00 2001 From: David Chu Date: Mon, 27 Oct 2025 23:59:07 +0000 Subject: [PATCH 06/12] Fixed compile-time errors --- Cargo.lock | 50 ++++++++++++++++--- Cargo.toml | 4 +- hydro_optimize/src/decoupler.rs | 31 ++++++------ hydro_optimize/src/deploy_and_analyze.rs | 12 +++-- hydro_optimize/src/partition_node_analysis.rs | 27 ++++++---- hydro_optimize/src/partitioner.rs | 29 ++--------- hydro_optimize/src/rewrites.rs | 30 ++++++++++- .../examples/benchmark_paxos.rs | 2 +- .../examples/decouple_compute_pi.rs | 4 +- .../examples/partition_simple_cluster.rs | 4 +- .../examples/partition_two_pc.rs | 2 - .../examples/perf_compute_pi.rs | 2 +- .../examples/perf_paxos.rs | 2 +- .../examples/simple_graphs.rs | 2 +- 14 files changed, 123 insertions(+), 78 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 972763a..4b38acb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -91,6 +91,12 @@ dependencies = [ "alloc-no-stdlib", ] +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + [[package]] name = "android_system_properties" version = "0.1.5" @@ -1589,6 +1595,10 @@ name = "hashbrown" version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" +dependencies = [ + "ahash", + "allocator-api2", +] [[package]] name = "hashbrown" @@ -1754,6 +1764,7 @@ dependencies = [ "futures", "pin-project-lite", "serde", + "sinktools", "tempfile", "tokio", "tokio-stream", @@ -1772,6 +1783,7 @@ dependencies = [ "ctor 0.2.9", "data-encoding", "dfir_lang", + "flate2", "futures", "hydro_build_utils", "hydro_deploy", @@ -1795,6 +1807,7 @@ dependencies = [ "tokio-util", "toml", "trybuild-internals-api", + "urlencoding", "webbrowser", ] @@ -3954,6 +3967,16 @@ version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa" +[[package]] +name = "sinktools" +version = "0.0.1" +dependencies = [ + "futures-util", + "pin-project-lite", + "sealed", + "variadics", +] + [[package]] name = "siphasher" version = "1.0.1" @@ -4067,9 +4090,9 @@ checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" [[package]] name = "stageleft" -version = "0.9.7" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54b1cbaa5d12efbc7445d148e28befd36da1417276fe7a786cf602af1e63ee89" +checksum = "b92cb4d28ec3c2b3aba8ee05487f10c3aa00d7a369a3fe9d4d89e8719f28ca4f" dependencies = [ "ctor 0.4.3", "proc-macro-crate", @@ -4081,9 +4104,9 @@ dependencies = [ [[package]] name = "stageleft_macro" -version = "0.9.7" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "693fb6c3ec9a968373bc7f62aff4327bd143ffdfe952f02212e4ea75f60ca1f3" +checksum = "e05624677c37d2abebe0c3e50fa7722f99936d26de2a8a23ac5d2a397be596c0" dependencies = [ "proc-macro-crate", "proc-macro2", @@ -4094,9 +4117,9 @@ dependencies = [ [[package]] name = "stageleft_tool" -version = "0.9.7" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "18b040a605df7aade9cc7a1ec11bd996adbebde50df78f7ae3e1bf021050288c" +checksum = "da14207006ed0031a24197e0a2d3bc84b2a7ecf3a2ca70b70f1886cf1a37b464" dependencies = [ "prettyplease", "proc-macro-crate", @@ -4622,6 +4645,12 @@ dependencies = [ "serde", ] +[[package]] +name = "urlencoding" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" + [[package]] name = "utf8_iter" version = "1.0.4" @@ -4644,6 +4673,15 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "variadics" +version = "0.0.9" +dependencies = [ + "hashbrown 0.14.5", + "hydro_build_utils", + "sealed", +] + [[package]] name = "version_check" version = "0.9.5" diff --git a/Cargo.toml b/Cargo.toml index 8b129b8..23fed39 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,8 +16,8 @@ hydro_build_utils = { path = "../hydroflow/hydro_build_utils" } hydro_deploy = { path = "../hydroflow/hydro_deploy/core" } include_mdtests = { path = "../hydroflow/include_mdtests" } serde = { version = "1.0.197", features = ["derive"] } -stageleft = "0.9.7" -stageleft_tool = "0.9.7" +stageleft = "0.10.0" +stageleft_tool = "0.10.0" tokio = { version = "1.29.0", features = ["full"] } ctor = "0.2" clap = { version = "4.4", features = ["derive"] } diff --git a/hydro_optimize/src/decoupler.rs b/hydro_optimize/src/decoupler.rs index e7d6cd4..7af2e3f 100644 --- a/hydro_optimize/src/decoupler.rs +++ b/hydro_optimize/src/decoupler.rs @@ -3,10 +3,9 @@ use std::collections::HashMap; use std::rc::Rc; use hydro_lang::compile::ir::{ - DebugInstantiate, DebugType, HydroIrMetadata, HydroIrOpMetadata, HydroNode, HydroRoot, TeeNode, + DebugInstantiate, HydroIrMetadata, HydroIrOpMetadata, HydroNode, HydroRoot, TeeNode, transform_bottom_up, traverse_dfir, }; -use hydro_lang::location::MemberId; use hydro_lang::location::dynamic::LocationId; use proc_macro2::Span; use serde::{Deserialize, Serialize}; @@ -15,7 +14,7 @@ use syn::visit_mut::VisitMut; use crate::parse_results::{MultiRunMetadata, get_or_append_run_metadata}; use crate::repair::{cycle_source_to_sink_input, inject_id, inject_location}; -use crate::rewrites::{deserialize_bincode_with_type, serialize_bincode_with_type, ClusterSelfIdReplace}; +use crate::rewrites::{ClusterSelfIdReplace, collection_kind_to_debug_type, deserialize_bincode_with_type, prepend_member_id_to_collection_kind, serialize_bincode_with_type}; #[derive(Clone, Serialize, Deserialize)] pub struct Decoupler { @@ -28,8 +27,6 @@ pub struct Decoupler { fn add_network(node: &mut HydroNode, new_location: &LocationId) { let metadata = node.metadata().clone(); - let output_debug_type = metadata.output_type.clone().unwrap(); - let parent_id = metadata.location_kind.root().raw_id(); let node_content = std::mem::replace(node, HydroNode::Placeholder); @@ -42,14 +39,17 @@ fn add_network(node: &mut HydroNode, new_location: &LocationId) { ::hydro_lang::location::MemberId::<()>::from_raw(#ident), b )); - let cluster_id_type = quote_type::>(); - let mapped_output_type: syn::Type = syn::parse_quote!((#cluster_id_type, #output_debug_type)); + + // Calculate the new CollectionKind + let original_collection_kind = metadata.collection_kind.clone(); + let new_collection_kind = prepend_member_id_to_collection_kind(&original_collection_kind); + let mapped_node = HydroNode::Map { f: f.into(), input: Box::new(node_content), metadata: HydroIrMetadata { location_kind: metadata.location_kind.root().clone(), // Remove any ticks - output_type: Some(DebugType(Box::new(mapped_output_type.clone()))), + collection_kind: new_collection_kind.clone(), cardinality: None, tag: None, op: HydroIrOpMetadata { @@ -62,19 +62,19 @@ fn add_network(node: &mut HydroNode, new_location: &LocationId) { }; // Set up the network node - let output_type = output_debug_type.clone().0; + let output_debug_type = collection_kind_to_debug_type(&original_collection_kind); let network_node = HydroNode::Network { - serialize_fn: Some(serialize_bincode_with_type(true, &output_type)).map(|e| e.into()), + serialize_fn: Some(serialize_bincode_with_type(true, &output_debug_type)).map(|e| e.into()), instantiate_fn: DebugInstantiate::Building, deserialize_fn: Some(deserialize_bincode_with_type( Some("e_type::<()>()), - &output_type, + &output_debug_type, )) .map(|e| e.into()), input: Box::new(mapped_node), metadata: HydroIrMetadata { location_kind: new_location.clone(), - output_type: Some(DebugType(Box::new(mapped_output_type))), + collection_kind: new_collection_kind, cardinality: None, tag: None, op: HydroIrOpMetadata { @@ -93,11 +93,11 @@ fn add_network(node: &mut HydroNode, new_location: &LocationId) { input: Box::new(network_node), metadata: HydroIrMetadata { location_kind: new_location.clone(), - output_type: Some(output_debug_type), + collection_kind: original_collection_kind, cardinality: None, tag: None, op: HydroIrOpMetadata { - backtrace: metadata.op.backtrace.clone(), + backtrace: metadata.op.backtrace, cpu_usage: None, network_recv_cpu_usage: None, id: None, @@ -286,7 +286,6 @@ mod tests { use hydro_lang::compile::builder::FlowBuilder; use hydro_lang::compile::built::BuiltFlow; use hydro_lang::compile::ir; - use hydro_lang::compile::rewrites::persist_pullup::persist_pullup; use hydro_lang::location::Location; use hydro_lang::nondet::nondet; use hydro_lang::prelude::Cluster; @@ -323,7 +322,7 @@ mod tests { let multi_run_metadata = RefCell::new(vec![]); let iteration = 0; - let built = builder.optimize_with(persist_pullup).optimize_with(|ir| { + let built = builder.optimize_with(|ir| { inject_id(ir); // Convert named nodes to IDs, accounting for the offset let name_to_id = name_to_id_map(ir); diff --git a/hydro_optimize/src/deploy_and_analyze.rs b/hydro_optimize/src/deploy_and_analyze.rs index 46e14d8..5b20c19 100644 --- a/hydro_optimize/src/deploy_and_analyze.rs +++ b/hydro_optimize/src/deploy_and_analyze.rs @@ -6,7 +6,6 @@ use hydro_deploy::Deployment; use hydro_lang::compile::builder::{FlowBuilder, RewriteIrFlowBuilder}; use hydro_lang::compile::deploy::DeployResult; use hydro_lang::compile::ir::{HydroNode, HydroRoot, deep_clone, traverse_dfir}; -use hydro_lang::compile::rewrites::persist_pullup::persist_pullup; use hydro_lang::deploy::HydroDeploy; use hydro_lang::deploy::deploy_graph::DeployCrateWrapper; use hydro_lang::location::dynamic::LocationId; @@ -27,14 +26,12 @@ pub(crate) const CPU_USAGE_PREFIX: &str = "HYDRO_OPTIMIZE_CPU:"; fn insert_counter_node(node: &mut HydroNode, next_stmt_id: &mut usize, duration: syn::Expr) { match node { HydroNode::Placeholder - | HydroNode::Unpersist { .. } | HydroNode::Counter { .. } => { std::panic!("Unexpected {:?} found in insert_counter_node", node.print_root()); } HydroNode::Source { metadata, .. } | HydroNode::CycleSource { metadata, .. } | HydroNode::Persist { metadata, .. } - | HydroNode::Delta { metadata, .. } | HydroNode::Chain { metadata, .. } // Can technically be derived by summing parent cardinalities | HydroNode::ChainFirst { metadata, .. } // Can technically be derived by taking parent cardinality + 1 | HydroNode::CrossSingleton { metadata, .. } @@ -79,6 +76,13 @@ fn insert_counter_node(node: &mut HydroNode, next_stmt_id: &mut usize, duration: | HydroNode::Enumerate { .. } | HydroNode::Inspect { .. } | HydroNode::Sort { .. } + | HydroNode::Cast { .. } + | HydroNode::ObserveNonDet { .. } + | HydroNode::SingletonSource { .. } // Cardinality = 1 + | HydroNode::BeginAtomic { .. } + | HydroNode::EndAtomic { .. } + | HydroNode::Batch { .. } + | HydroNode::YieldConcat { .. } => {} } } @@ -151,7 +155,7 @@ pub async fn deploy_and_analyze<'a>( // Rewrite with counter tracking let rewritten_ir_builder = builder.rewritten_ir_builder(); - let optimized = builder.optimize_with(persist_pullup).optimize_with(|leaf| { + let optimized = builder.optimize_with(|leaf| { inject_id(leaf); insert_counter(leaf, counter_output_duration); }); diff --git a/hydro_optimize/src/partition_node_analysis.rs b/hydro_optimize/src/partition_node_analysis.rs index 1938bfa..3872a5c 100644 --- a/hydro_optimize/src/partition_node_analysis.rs +++ b/hydro_optimize/src/partition_node_analysis.rs @@ -164,8 +164,6 @@ fn input_dependency_analysis_node( HydroNode::CycleSource { .. } | HydroNode::Tee { .. } | HydroNode::Persist { .. } - | HydroNode::Unpersist { .. } - | HydroNode::Delta { .. } | HydroNode::ResolveFutures { .. } | HydroNode::ResolveFuturesOrdered { .. } | HydroNode::DeferTick { .. } @@ -176,7 +174,13 @@ fn input_dependency_analysis_node( | HydroNode::Filter { .. } // Although it contains a function f, the output is just a subset of the input, so just inherit from the parent | HydroNode::Inspect { .. } | HydroNode::Network { .. } - | HydroNode::ExternalInput { .. } => { + | HydroNode::ExternalInput { .. } + | HydroNode::Cast { .. } + | HydroNode::ObserveNonDet { .. } + | HydroNode::BeginAtomic { .. } + | HydroNode::EndAtomic { .. } + | HydroNode::Batch { .. } + | HydroNode::YieldConcat { .. } => { // For each input the first (and potentially only) parent depends on, take its dependency for input_id in input_taint_entry.iter() { if let Some(parent_dependencies_on_input) = parent_input_dependencies.get(input_id) && @@ -327,7 +331,8 @@ fn input_dependency_analysis_node( | HydroNode::Fold { .. } | HydroNode::Scan { .. } | HydroNode::FlatMap { .. } - | HydroNode::Source { .. } => { + | HydroNode::Source { .. } + | HydroNode::SingletonSource { .. } => { input_dependencies_entry.clear(); } HydroNode::Placeholder @@ -548,8 +553,6 @@ fn partitioning_constraint_analysis_node( | HydroNode::CycleSource { .. } | HydroNode::Tee { .. } | HydroNode::Persist { .. } - | HydroNode::Unpersist { .. } - | HydroNode::Delta { .. } | HydroNode::Chain { .. } | HydroNode::ChainFirst { .. } | HydroNode::ResolveFutures { .. } @@ -564,7 +567,14 @@ fn partitioning_constraint_analysis_node( | HydroNode::Sort { .. } | HydroNode::Network { .. } | HydroNode::ExternalInput { .. } - | HydroNode::Counter { .. } => { + | HydroNode::Counter { .. } + | HydroNode::Cast { .. } + | HydroNode::ObserveNonDet { .. } + | HydroNode::SingletonSource { .. } + | HydroNode::BeginAtomic { .. } + | HydroNode::EndAtomic { .. } + | HydroNode::Batch { .. } + | HydroNode::YieldConcat { .. } => { // Doesn't impede partitioning, return return; } @@ -754,7 +764,6 @@ mod tests { use std::collections::{BTreeMap, BTreeSet, HashMap}; use hydro_lang::compile::ir::deep_clone; - use hydro_lang::compile::rewrites::persist_pullup::persist_pullup; use hydro_lang::deploy::HydroDeploy; use hydro_lang::live_collections::stream::NoOrder; use hydro_lang::location::dynamic::LocationId; @@ -779,7 +788,6 @@ mod tests { ) { let mut cycle_data = HashMap::new(); let built = builder - .optimize_with(persist_pullup) .optimize_with(|ir| { inject_id(ir); cycle_data = cycle_source_to_sink_input(ir); @@ -880,7 +888,6 @@ mod tests { ) { let mut cycle_data = HashMap::new(); let built = builder - .optimize_with(persist_pullup) .optimize_with(|ir| { inject_id(ir); cycle_data = cycle_source_to_sink_input(ir); diff --git a/hydro_optimize/src/partitioner.rs b/hydro_optimize/src/partitioner.rs index 4c427ae..c22bd44 100644 --- a/hydro_optimize/src/partitioner.rs +++ b/hydro_optimize/src/partitioner.rs @@ -1,15 +1,14 @@ use core::panic; use std::collections::HashMap; -use hydro_lang::compile::ir::{BoundKind, CollectionKind, DebugType, HydroNode, HydroRoot, KeyedSingletonBoundKind, StreamOrder, StreamRetry, traverse_dfir}; -use hydro_lang::live_collections::keyed_singleton::KeyedSingletonBound; +use hydro_lang::compile::ir::{HydroNode, HydroRoot, traverse_dfir}; use hydro_lang::location::dynamic::LocationId; use serde::{Deserialize, Serialize}; use syn::visit_mut::{self, VisitMut}; use crate::partition_syn_analysis::StructOrTupleIndex; use crate::repair::inject_id; -use crate::rewrites::{ClusterSelfIdReplace, NetworkType, collection_kind_to_debug_type, deserialize_bincode_with_type, get_network_type, serialize_bincode_with_type}; +use crate::rewrites::{ClusterSelfIdReplace, NetworkType, collection_kind_to_debug_type, deserialize_bincode_with_type, get_network_type, prepend_member_id_to_collection_kind, serialize_bincode_with_type}; #[derive(Clone, Serialize, Deserialize)] pub struct Partitioner { @@ -126,29 +125,7 @@ fn replace_sender_dest(node: &mut HydroNode, partitioner: &Partitioner, next_stm let f: syn::Expr = if new_cluster_id.is_some() { // Output type of Map now includes dest ID - let member_id_syn_type: syn::Type = syn::parse_quote! { ::hydro_lang::location::MemberId<()> }; - let member_id_debug_type = DebugType::from(member_id_syn_type); - metadata.collection_kind = match metadata.collection_kind { - CollectionKind::Singleton { element_type, .. } - | CollectionKind::Optional { element_type, .. } => { - CollectionKind::KeyedSingleton { - bound: KeyedSingletonBoundKind::Unbounded, - key_type: member_id_debug_type, - value_type: element_type, - } - } - CollectionKind::Stream { .. } - | CollectionKind::KeyedStream { .. } - | CollectionKind::KeyedSingleton { .. } => { - CollectionKind::KeyedStream { - bound: BoundKind::Unbounded, - value_order: StreamOrder::NoOrder, - value_retry: StreamRetry::ExactlyOnce, - key_type: member_id_debug_type, - value_type: collection_kind_to_debug_type(&metadata.collection_kind), - } - } - }; + metadata.collection_kind = prepend_member_id_to_collection_kind(&metadata.collection_kind); // Partitioning a process into a cluster syn::parse_quote!( diff --git a/hydro_optimize/src/rewrites.rs b/hydro_optimize/src/rewrites.rs index a53c86b..1b9f118 100644 --- a/hydro_optimize/src/rewrites.rs +++ b/hydro_optimize/src/rewrites.rs @@ -2,7 +2,7 @@ use std::cell::RefCell; use std::collections::HashMap; use hydro_lang::compile::builder::{FlowBuilder, RewriteIrFlowBuilder}; -use hydro_lang::compile::ir::{CollectionKind, DebugType, HydroIrMetadata, HydroNode, HydroRoot, deep_clone, traverse_dfir}; +use hydro_lang::compile::ir::{BoundKind, CollectionKind, DebugType, HydroIrMetadata, HydroNode, HydroRoot, KeyedSingletonBoundKind, StreamOrder, StreamRetry, deep_clone, traverse_dfir}; use hydro_lang::location::dynamic::LocationId; use hydro_lang::location::{Cluster, Location}; use serde::{Deserialize, Serialize}; @@ -272,4 +272,30 @@ pub fn collection_kind_to_debug_type(collection_kind: &CollectionKind) -> DebugT DebugType::from(new_type) } } -} \ No newline at end of file +} + +pub fn prepend_member_id_to_collection_kind(collection_kind: &CollectionKind) -> CollectionKind { + let member_id_syn_type: syn::Type = syn::parse_quote! { ::hydro_lang::location::MemberId<()> }; + let member_id_debug_type = DebugType::from(member_id_syn_type); + match collection_kind { + CollectionKind::Singleton { element_type, .. } + | CollectionKind::Optional { element_type, .. } => { + CollectionKind::KeyedSingleton { + bound: KeyedSingletonBoundKind::Unbounded, + key_type: member_id_debug_type, + value_type: element_type.clone(), + } + } + CollectionKind::Stream { .. } + | CollectionKind::KeyedStream { .. } + | CollectionKind::KeyedSingleton { .. } => { + CollectionKind::KeyedStream { + bound: BoundKind::Unbounded, + value_order: StreamOrder::NoOrder, + value_retry: StreamRetry::ExactlyOnce, + key_type: member_id_debug_type, + value_type: collection_kind_to_debug_type(&collection_kind), + } + } + } +} diff --git a/hydro_optimize_examples/examples/benchmark_paxos.rs b/hydro_optimize_examples/examples/benchmark_paxos.rs index e56ec6c..7a20dd8 100644 --- a/hydro_optimize_examples/examples/benchmark_paxos.rs +++ b/hydro_optimize_examples/examples/benchmark_paxos.rs @@ -19,7 +19,7 @@ async fn main() { #[command(author, version, about, long_about = None)] struct BenchmarkArgs { #[command(flatten)] - graph: hydro_lang::graph::config::GraphConfig, + graph: hydro_lang::viz::config::GraphConfig, /// Use GCP for deployment (provide project name) #[arg(long)] diff --git a/hydro_optimize_examples/examples/decouple_compute_pi.rs b/hydro_optimize_examples/examples/decouple_compute_pi.rs index 8727243..b3ac6c9 100644 --- a/hydro_optimize_examples/examples/decouple_compute_pi.rs +++ b/hydro_optimize_examples/examples/decouple_compute_pi.rs @@ -15,9 +15,8 @@ struct Args { } use hydro_deploy::gcp::GcpNetwork; use hydro_deploy::{Deployment, Host}; -use hydro_lang::compile::rewrites::persist_pullup; use hydro_lang::deploy::TrybuildHost; -use hydro_lang::graph::config::GraphConfig; +use hydro_lang::viz::config::GraphConfig; use hydro_lang::location::Location; use hydro_optimize::debug; use hydro_optimize::decoupler::{self, Decoupler}; @@ -82,7 +81,6 @@ async fn main() { let multi_run_metadata = RefCell::new(vec![]); let _nodes = built - .optimize_with(persist_pullup::persist_pullup) .optimize_with(|roots| decoupler::decouple(roots, &decoupler, &multi_run_metadata, 0)) .optimize_with(debug::print_id) .with_process( diff --git a/hydro_optimize_examples/examples/partition_simple_cluster.rs b/hydro_optimize_examples/examples/partition_simple_cluster.rs index 6b2e790..51c43b5 100644 --- a/hydro_optimize_examples/examples/partition_simple_cluster.rs +++ b/hydro_optimize_examples/examples/partition_simple_cluster.rs @@ -15,9 +15,8 @@ struct Args { } use hydro_deploy::gcp::GcpNetwork; use hydro_deploy::{Deployment, Host}; -use hydro_lang::compile::rewrites::persist_pullup; use hydro_lang::deploy::TrybuildHost; -use hydro_lang::graph::config::GraphConfig; +use hydro_lang::viz::config::GraphConfig; use hydro_lang::location::Location; use hydro_optimize::partitioner::{self, Partitioner}; use tokio::sync::RwLock; @@ -73,7 +72,6 @@ async fn main() { let _ = built.generate_graph_with_config(&args.graph, None); let _nodes = built - .optimize_with(persist_pullup::persist_pullup) .optimize_with(|roots| partitioner::partition(roots, &partitioner)) .with_process( &process, diff --git a/hydro_optimize_examples/examples/partition_two_pc.rs b/hydro_optimize_examples/examples/partition_two_pc.rs index b3e9f05..35ee680 100644 --- a/hydro_optimize_examples/examples/partition_two_pc.rs +++ b/hydro_optimize_examples/examples/partition_two_pc.rs @@ -3,7 +3,6 @@ use std::sync::Arc; use hydro_deploy::gcp::GcpNetwork; use hydro_deploy::{Deployment, Host}; -use hydro_lang::compile::rewrites::persist_pullup::persist_pullup; use hydro_lang::deploy::TrybuildHost; use hydro_lang::location::Location; use hydro_optimize::partition_node_analysis::{nodes_to_partition, partitioning_analysis}; @@ -60,7 +59,6 @@ async fn main() { let mut cycle_data = HashMap::new(); let deployable = builder - .optimize_with(persist_pullup) .optimize_with(|ir| { inject_id(ir); cycle_data = cycle_source_to_sink_input(ir); diff --git a/hydro_optimize_examples/examples/perf_compute_pi.rs b/hydro_optimize_examples/examples/perf_compute_pi.rs index 2dcde56..4d027c4 100644 --- a/hydro_optimize_examples/examples/perf_compute_pi.rs +++ b/hydro_optimize_examples/examples/perf_compute_pi.rs @@ -17,7 +17,7 @@ async fn main() { use clap::Parser; use hydro_deploy::Deployment; use hydro_deploy::gcp::GcpNetwork; - use hydro_lang::graph::config::GraphConfig; + use hydro_lang::viz::config::GraphConfig; use hydro_lang::location::Location; use hydro_optimize::deploy::ReusableHosts; use hydro_optimize::deploy_and_analyze::deploy_and_analyze; diff --git a/hydro_optimize_examples/examples/perf_paxos.rs b/hydro_optimize_examples/examples/perf_paxos.rs index 991d05d..84bd94a 100644 --- a/hydro_optimize_examples/examples/perf_paxos.rs +++ b/hydro_optimize_examples/examples/perf_paxos.rs @@ -8,7 +8,7 @@ async fn main() { use clap::Parser; use hydro_deploy::Deployment; use hydro_deploy::gcp::GcpNetwork; - use hydro_lang::graph::config::GraphConfig; + use hydro_lang::viz::config::GraphConfig; use hydro_lang::location::Location; use hydro_optimize::decoupler; use hydro_optimize::deploy::ReusableHosts; diff --git a/hydro_optimize_examples/examples/simple_graphs.rs b/hydro_optimize_examples/examples/simple_graphs.rs index eb62392..6182d24 100644 --- a/hydro_optimize_examples/examples/simple_graphs.rs +++ b/hydro_optimize_examples/examples/simple_graphs.rs @@ -5,7 +5,7 @@ use std::sync::Arc; use clap::Parser; use hydro_deploy::Deployment; use hydro_deploy::gcp::GcpNetwork; -use hydro_lang::graph::config::GraphConfig; +use hydro_lang::viz::config::GraphConfig; use hydro_lang::location::Location; use hydro_lang::prelude::FlowBuilder; use hydro_optimize::decoupler; From 9a87932c7d032ea7c60076df61573862dadf879d Mon Sep 17 00:00:00 2001 From: David Chu Date: Tue, 28 Oct 2025 21:22:24 +0000 Subject: [PATCH 07/12] HydroRoot input() and input_metadata() no longer return vecs --- hydro_optimize/src/debug.rs | 8 ++------ hydro_optimize/src/decouple_analysis.rs | 4 ++-- hydro_optimize/src/decoupler.rs | 2 +- hydro_optimize/src/parse_results.rs | 4 ++-- hydro_optimize/src/rewrites.rs | 2 +- hydro_optimize_examples/src/simple_graphs.rs | 16 ++++++++++++++++ 6 files changed, 24 insertions(+), 12 deletions(-) diff --git a/hydro_optimize/src/debug.rs b/hydro_optimize/src/debug.rs index faa11cf..b8e49c2 100644 --- a/hydro_optimize/src/debug.rs +++ b/hydro_optimize/src/debug.rs @@ -3,16 +3,12 @@ use std::collections::HashMap; use hydro_lang::compile::ir::{HydroNode, HydroRoot, traverse_dfir}; fn print_id_root(root: &mut HydroRoot, next_stmt_id: &mut usize) { - let inputs = root - .input_metadata() - .iter() - .map(|m| m.op.id) - .collect::>>(); + let input = root.input_metadata().op.id; println!( "{} Root {}, Inputs: {:?}", next_stmt_id, root.print_root(), - inputs, + input, ); } diff --git a/hydro_optimize/src/decouple_analysis.rs b/hydro_optimize/src/decouple_analysis.rs index 258b30b..813ff9d 100644 --- a/hydro_optimize/src/decouple_analysis.rs +++ b/hydro_optimize/src/decouple_analysis.rs @@ -290,12 +290,12 @@ fn decouple_analysis_root( model_metadata: &RefCell, ) { // Ignore nodes that are not in the cluster to decouple - if model_metadata.borrow().cluster_to_decouple != *root.input_metadata()[0].location_kind.root() + if model_metadata.borrow().cluster_to_decouple != *root.input_metadata().location_kind.root() { return; } - add_tick_constraint(root.input_metadata()[0], op_id_to_inputs, model_metadata); + add_tick_constraint(root.input_metadata(), op_id_to_inputs, model_metadata); } fn decouple_analysis_node( diff --git a/hydro_optimize/src/decoupler.rs b/hydro_optimize/src/decoupler.rs index 7af2e3f..3daa6cc 100644 --- a/hydro_optimize/src/decoupler.rs +++ b/hydro_optimize/src/decoupler.rs @@ -215,7 +215,7 @@ fn fix_cluster_self_id_root(root: &mut HydroRoot, mut locations: ClusterSelfIdRe decoupled_cluster_id, .. } = locations - && root.input_metadata()[0].location_kind.root().raw_id() == decoupled_cluster_id + && root.input_metadata().location_kind.root().raw_id() == decoupled_cluster_id { root.visit_debug_expr(|expr| { locations.visit_expr_mut(&mut expr.0); diff --git a/hydro_optimize/src/parse_results.rs b/hydro_optimize/src/parse_results.rs index 741f3ce..59a03d2 100644 --- a/hydro_optimize/src/parse_results.rs +++ b/hydro_optimize/src/parse_results.rs @@ -367,11 +367,11 @@ fn record_metadata( } fn record_metadata_root(root: &mut HydroRoot, run_metadata: &mut RunMetadata) { - record_metadata(root.op_metadata(), root.input_metadata(), run_metadata); + record_metadata(root.op_metadata(), vec![root.input_metadata()], run_metadata); // Location = input's location, cardinality = input's cardinality let id = root.op_metadata().id.unwrap(); - let input = root.input_metadata()[0]; + let input = root.input_metadata(); run_metadata .op_id_to_location .insert(id, input.location_kind.root().clone()); diff --git a/hydro_optimize/src/rewrites.rs b/hydro_optimize/src/rewrites.rs index 1b9f118..6ba7552 100644 --- a/hydro_optimize/src/rewrites.rs +++ b/hydro_optimize/src/rewrites.rs @@ -151,7 +151,7 @@ pub fn op_id_to_inputs( traverse_dfir( ir, |leaf, op_id| { - let relevant_input_ids = relevant_inputs(leaf.input_metadata(), location); + let relevant_input_ids = relevant_inputs(vec![leaf.input_metadata()], location); mapping.borrow_mut().insert(*op_id, relevant_input_ids); }, |node, op_id| { diff --git a/hydro_optimize_examples/src/simple_graphs.rs b/hydro_optimize_examples/src/simple_graphs.rs index 08fbb98..437e53d 100644 --- a/hydro_optimize_examples/src/simple_graphs.rs +++ b/hydro_optimize_examples/src/simple_graphs.rs @@ -49,6 +49,7 @@ fn sha256(n: u32) -> u32 { pub fn get_graph_function<'a>(name: &str) -> impl GraphFunction<'a> { match name { "noop" => noop, + "map_h_map_h" => map_h_map_h, "map_h_map_h_map_h" => map_h_map_h_map_h, "map_h_map_h_map_l" => map_h_map_h_map_l, "map_h_map_l_map_h" => map_h_map_l_map_h, @@ -92,6 +93,21 @@ pub fn noop<'a>( payloads } +pub fn map_h_map_h<'a>( + _server: &Cluster<'a, Server>, + payloads: KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder>, +) -> KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder> { + payloads + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(100 + n % 2) + ))) + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(100 + n % 2) + ))) +} + pub fn map_h_map_h_map_h<'a>( _server: &Cluster<'a, Server>, payloads: KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder>, From 8e068e72387948249385d7ec566789f0fa85e13e Mon Sep 17 00:00:00 2001 From: David Chu Date: Mon, 3 Nov 2025 22:37:25 +0000 Subject: [PATCH 08/12] Started network_calibrator, untested --- .gitignore | 6 +- hydro_optimize/src/deploy.rs | 7 +- .../examples/network_calibrator.rs | 112 ++++++++++++++++++ hydro_optimize_examples/src/lib.rs | 1 + .../src/network_calibrator.rs | 50 ++++++++ 5 files changed, 169 insertions(+), 7 deletions(-) create mode 100644 hydro_optimize_examples/examples/network_calibrator.rs create mode 100644 hydro_optimize_examples/src/network_calibrator.rs diff --git a/.gitignore b/.gitignore index 98d6e30..e13ee29 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,7 @@ target/ *.data.folded -*.perf.data \ No newline at end of file +*.perf.data +scripts/*.png + +# I'll output the results of cargo run to these files +*.results.txt \ No newline at end of file diff --git a/hydro_optimize/src/deploy.rs b/hydro_optimize/src/deploy.rs index 4c84252..f7494ab 100644 --- a/hydro_optimize/src/deploy.rs +++ b/hydro_optimize/src/deploy.rs @@ -46,18 +46,13 @@ impl ReusableHosts { deployment: &mut Deployment, display_name: String, ) -> TrybuildHost { - let rustflags = if self.host_arg == "gcp" { - "-C opt-level=3 -C codegen-units=1 -C strip=none -C debuginfo=2 -C lto=off -C link-args=--no-rosegment" - } else { - "-C opt-level=3 -C codegen-units=1 -C strip=none -C debuginfo=2 -C lto=off" - }; TrybuildHost::new(self.lazy_create_host(deployment, display_name.clone())) .additional_hydro_features(vec!["runtime_measure".to_string()]) .build_env( "HYDRO_RUNTIME_MEASURE_CPU_PREFIX", super::deploy_and_analyze::CPU_USAGE_PREFIX, ) - .rustflags(rustflags) + .rustflags("-C opt-level=3 -C codegen-units=1 -C strip=none -C debuginfo=2 -C lto=off") .tracing( TracingOptions::builder() .perf_raw_outfile(format!("{}.perf.data", display_name.clone())) diff --git a/hydro_optimize_examples/examples/network_calibrator.rs b/hydro_optimize_examples/examples/network_calibrator.rs new file mode 100644 index 0000000..6658f8a --- /dev/null +++ b/hydro_optimize_examples/examples/network_calibrator.rs @@ -0,0 +1,112 @@ +use std::cell::RefCell; +use std::collections::HashMap; +use std::sync::Arc; + +use clap::Parser; +use hydro_deploy::Deployment; +use hydro_deploy::gcp::GcpNetwork; +use hydro_lang::viz::config::GraphConfig; +use hydro_lang::location::Location; +use hydro_lang::prelude::FlowBuilder; +use hydro_optimize::deploy::ReusableHosts; +use hydro_optimize::deploy_and_analyze::deploy_and_analyze; +use hydro_optimize_examples::network_calibrator::{Aggregator, Client, Server, network_calibrator}; +use tokio::sync::RwLock; + +#[derive(Parser, Debug)] +#[command(author, version, about, long_about = None)] +struct Args { + #[command(flatten)] + graph: GraphConfig, + + /// Use GCP for deployment (provide project name) + #[arg(long)] + gcp: Option, + + #[arg(long)] + function: String, +} + +#[tokio::main] +async fn main() { + let args = Args::parse(); + + let mut deployment = Deployment::new(); + let (host_arg, project) = if let Some(project) = args.gcp { + ("gcp".to_string(), project) + } else { + ("localhost".to_string(), String::new()) + }; + let network = Arc::new(RwLock::new(GcpNetwork::new(&project, None))); + + let mut builder = FlowBuilder::new(); + let num_clients = 1; + let num_clients_per_node = 10000000; + let server = builder.cluster(); + let clients = builder.cluster(); + let client_aggregator = builder.process(); + + let clusters = vec![ + ( + server.id().raw_id(), + std::any::type_name::().to_string(), + 1, + ), + ( + clients.id().raw_id(), + std::any::type_name::().to_string(), + num_clients, + ), + ]; + let processes = vec![( + client_aggregator.id().raw_id(), + std::any::type_name::().to_string(), + )]; + + // Deploy + let mut reusable_hosts = ReusableHosts { + hosts: HashMap::new(), + host_arg, + project: project.clone(), + network: network.clone(), + }; + + let message_sizes = vec![1, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192]; + let num_seconds_to_profile = Some(20); + let multi_run_metadata = RefCell::new(vec![]); + + for (i, message_size) in message_sizes.iter().enumerate() { + + network_calibrator( + num_clients_per_node, + *message_size, + &server, + &clients, + &client_aggregator, + ); + + let (rewritten_ir_builder, ir, _, _, _) = + deploy_and_analyze( + &mut reusable_hosts, + &mut deployment, + builder, + &clusters, + &processes, + vec![ + std::any::type_name::().to_string(), + std::any::type_name::().to_string(), + ], + num_seconds_to_profile, + &multi_run_metadata, + i, + ) + .await; + + builder = rewritten_ir_builder.build_with(|_| ir); + } + + let built = builder.finalize(); + + // Generate graphs if requested + _ = built.generate_graph_with_config(&args.graph, None); +} \ No newline at end of file diff --git a/hydro_optimize_examples/src/lib.rs b/hydro_optimize_examples/src/lib.rs index 9617274..aebaec4 100644 --- a/hydro_optimize_examples/src/lib.rs +++ b/hydro_optimize_examples/src/lib.rs @@ -1,5 +1,6 @@ stageleft::stageleft_no_entry_crate!(); +pub mod network_calibrator; pub mod simple_graphs; pub mod simple_graphs_bench; pub mod simple_kv_bench; diff --git a/hydro_optimize_examples/src/network_calibrator.rs b/hydro_optimize_examples/src/network_calibrator.rs new file mode 100644 index 0000000..8b2a3d1 --- /dev/null +++ b/hydro_optimize_examples/src/network_calibrator.rs @@ -0,0 +1,50 @@ +use hydro_lang::{live_collections::stream::NoOrder, nondet::nondet, prelude::{Cluster, Process, Stream, Unbounded}}; +use hydro_std::bench_client::{bench_client, print_bench_results}; + +use stageleft::q; + +pub struct Client; +pub struct Server; +pub struct Aggregator; + +pub fn network_calibrator<'a>( + num_clients_per_node: usize, + message_size: usize, + server: &Cluster<'a, Server>, + clients: &Cluster<'a, Client>, + client_aggregator: &Process<'a, Aggregator>, +) { + let bench_results = bench_client( + clients, + |_client, payload_request| { + size_based_workload_generator(message_size, payload_request) + }, + |payloads| { + // Server just echoes the payload + payloads + .broadcast_bincode(server, nondet!(/** Test */)) + .demux_bincode(clients) + .values() + }, + num_clients_per_node, + nondet!(/** bench */), + ); + + print_bench_results(bench_results, client_aggregator, clients); +} + +/// Generates an incrementing u32 for each virtual client ID, starting at 0 +pub fn size_based_workload_generator<'a, Client>( + message_size: usize, + payload_request: Stream<(u32, Option>), Cluster<'a, Client>, Unbounded, NoOrder>, +) -> Stream<(u32, Vec), Cluster<'a, Client>, Unbounded, NoOrder> { + payload_request.map(q!(move |(virtual_id, payload)| { + if let Some(mut payload) = payload { + if let Some(last) = payload.last_mut() { + *last += 1; + return (virtual_id, payload); + } + } + (virtual_id, vec![0; message_size]) + })) +} \ No newline at end of file From b5bcb65285b95db9f7da3513cade809bc3884e22 Mon Sep 17 00:00:00 2001 From: David Chu Date: Mon, 3 Nov 2025 23:19:48 +0000 Subject: [PATCH 09/12] Map tests without decouple penalty --- .../examples/simple_graphs.rs | 8 +- hydro_optimize_examples/src/simple_graphs.rs | 126 ++++++++++++++++++ .../src/simple_graphs_bench.rs | 39 +++++- 3 files changed, 171 insertions(+), 2 deletions(-) diff --git a/hydro_optimize_examples/examples/simple_graphs.rs b/hydro_optimize_examples/examples/simple_graphs.rs index 6182d24..48570ca 100644 --- a/hydro_optimize_examples/examples/simple_graphs.rs +++ b/hydro_optimize_examples/examples/simple_graphs.rs @@ -12,7 +12,7 @@ use hydro_optimize::decoupler; use hydro_optimize::deploy::ReusableHosts; use hydro_optimize::deploy_and_analyze::deploy_and_analyze; use hydro_optimize_examples::simple_graphs::{Client, Server, get_graph_function}; -use hydro_optimize_examples::simple_graphs_bench::{Aggregator, simple_graphs_bench}; +use hydro_optimize_examples::simple_graphs_bench::{Aggregator, simple_graphs_bench, simple_graphs_bench_no_union}; use tokio::sync::RwLock; #[derive(Parser, Debug)] @@ -56,6 +56,12 @@ async fn main() { &client_aggregator, graph_function, ); + // simple_graphs_bench_no_union( + // num_clients_per_node, + // &server, + // &clients, + // &client_aggregator, + // ); let mut clusters = vec![ ( diff --git a/hydro_optimize_examples/src/simple_graphs.rs b/hydro_optimize_examples/src/simple_graphs.rs index 437e53d..dba95be 100644 --- a/hydro_optimize_examples/src/simple_graphs.rs +++ b/hydro_optimize_examples/src/simple_graphs.rs @@ -50,6 +50,8 @@ pub fn get_graph_function<'a>(name: &str) -> impl GraphFunction<'a> { match name { "noop" => noop, "map_h_map_h" => map_h_map_h, + "map_h_map_h_split_up" => map_h_map_h_split_up, + "map_h_map_h_parallel" => map_h_map_h_parallel, "map_h_map_h_map_h" => map_h_map_h_map_h, "map_h_map_h_map_l" => map_h_map_h_map_l, "map_h_map_l_map_h" => map_h_map_l_map_h, @@ -108,6 +110,130 @@ pub fn map_h_map_h<'a>( ))) } +pub fn map_h_map_h_split_up<'a>( + _server: &Cluster<'a, Server>, + payloads: KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder>, +) -> KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder> { + payloads + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(10 + n % 2) + ))) + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(10 + n % 2) + ))) + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(10 + n % 2) + ))) + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(10 + n % 2) + ))) + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(10 + n % 2) + ))) + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(10 + n % 2) + ))) + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(10 + n % 2) + ))) + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(10 + n % 2) + ))) + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(10 + n % 2) + ))) + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(10 + n % 2) + ))) + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(10 + n % 2) + ))) + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(10 + n % 2) + ))) + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(10 + n % 2) + ))) + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(10 + n % 2) + ))) + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(10 + n % 2) + ))) + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(10 + n % 2) + ))) + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(10 + n % 2) + ))) + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(10 + n % 2) + ))) + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(10 + n % 2) + ))) + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(10 + n % 2) + ))) +} + +pub fn map_h_map_h_parallel<'a>( + _server: &Cluster<'a, Server>, + payloads: KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder>, +) -> KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder> { + let batch0 = payloads.clone().filter(q!(|(virt_client_id, _)| virt_client_id % 2 == 0)); + let batch1 = payloads.filter(q!(|(virt_client_id, _)| virt_client_id % 2 == 1)); + let batch0out = batch0 + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(100 + n % 2) + ))); + batch1 + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(100 + n % 2) + ))) + .interleave(batch0out) +} + +pub fn map_h_map_h_parallel_no_union<'a>( + _server: &Cluster<'a, Server>, + payloads1: KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder>, + payloads2: KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder>, +) -> (KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder>, +KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder>) { + (payloads1 + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(100 + n % 2) + ))), + payloads2 + .map(q!(|(virt_client_id, n)| ( + virt_client_id, + self::sha256(100 + n % 2) + )))) +} + pub fn map_h_map_h_map_h<'a>( _server: &Cluster<'a, Server>, payloads: KeyedStream, (u32, u32), Cluster<'a, Server>, Unbounded, NoOrder>, diff --git a/hydro_optimize_examples/src/simple_graphs_bench.rs b/hydro_optimize_examples/src/simple_graphs_bench.rs index 84bb2e8..57ae990 100644 --- a/hydro_optimize_examples/src/simple_graphs_bench.rs +++ b/hydro_optimize_examples/src/simple_graphs_bench.rs @@ -2,7 +2,8 @@ use hydro_lang::{prelude::{Cluster, Process}, nondet::nondet}; use hydro_std::bench_client::{bench_client, print_bench_results}; use hydro_test::cluster::paxos_bench::inc_u32_workload_generator; -use crate::simple_graphs::{Client, GraphFunction, Server}; +use stageleft::q; +use crate::simple_graphs::{Client, GraphFunction, Server, map_h_map_h_parallel_no_union}; pub struct Aggregator; pub fn simple_graphs_bench<'a>( @@ -29,5 +30,41 @@ pub fn simple_graphs_bench<'a>( nondet!(/** bench */), ); + print_bench_results(bench_results, client_aggregator, clients); +} + +pub fn simple_graphs_bench_no_union<'a>( + num_clients_per_node: usize, + server: &Cluster<'a, Server>, + clients: &Cluster<'a, Client>, + client_aggregator: &Process<'a, Aggregator>, +) { + let bench_results = bench_client( + clients, + inc_u32_workload_generator, + |payloads| { + let payloads1 = payloads.clone().filter(q!(|(virt_client_id, _)| virt_client_id % 2 == 0)); + let payloads2 = payloads.filter(q!(|(virt_client_id, _)| virt_client_id % 2 == 1)); + let (batch0, batch1) = map_h_map_h_parallel_no_union( + server, + payloads1 + .broadcast_bincode(server, nondet!(/** Test */)) + .into(), + payloads2 + .broadcast_bincode(server, nondet!(/** Test */)) + .into(), + ); + let clients_batch0 = batch0 + .demux_bincode(clients) + .values(); + batch1 + .demux_bincode(clients) + .values() + .interleave(clients_batch0) + }, + num_clients_per_node, + nondet!(/** bench */), + ); + print_bench_results(bench_results, client_aggregator, clients); } \ No newline at end of file From 760b4666e34e2d9f545bbe4fa35f62e7f10be077 Mon Sep 17 00:00:00 2001 From: David Chu Date: Wed, 5 Nov 2025 18:03:20 +0000 Subject: [PATCH 10/12] Fix Cargo.lock --- Cargo.lock | 40 ++++------------------------------------ 1 file changed, 4 insertions(+), 36 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 56aa0a9..fee4098 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -106,12 +106,6 @@ version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" -[[package]] -name = "allocator-api2" -version = "0.2.21" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" - [[package]] name = "android_system_properties" version = "0.1.5" @@ -1669,10 +1663,6 @@ dependencies = [ "ahash", "allocator-api2", ] -dependencies = [ - "ahash", - "allocator-api2", -] [[package]] name = "hashbrown" @@ -1859,7 +1849,6 @@ dependencies = [ "pin-project-lite", "serde", "sinktools", - "sinktools", "tempfile", "tokio", "tokio-stream", @@ -1879,7 +1868,6 @@ dependencies = [ "data-encoding", "dfir_lang", "flate2", - "flate2", "futures", "hydro_build_utils", "hydro_deploy", @@ -1904,7 +1892,6 @@ dependencies = [ "toml", "trybuild-internals-api", "urlencoding", - "urlencoding", "webbrowser", ] @@ -1922,10 +1909,8 @@ dependencies = [ "hydro_test", "include_mdtests", "proc-macro-crate", - "proc-macro-crate", "proc-macro2", "quote", - "quote", "regex", "serde", "stageleft", @@ -1941,8 +1926,6 @@ dependencies = [ "ctor 0.2.9", "dfir_lang", "hydro_build_utils", - "dfir_lang", - "hydro_build_utils", "hydro_deploy", "hydro_lang", "hydro_optimize", @@ -1951,9 +1934,6 @@ dependencies = [ "regex", "serde", "sha2", - "regex", - "serde", - "sha2", "stageleft", "stageleft_tool", "tokio", @@ -2298,9 +2278,9 @@ checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" [[package]] name = "iri-string" -version = "0.7.8" +version = "0.7.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbc5ebe9c3a1a7a5127f920a418f7585e9e758e911d0466ed004f393b0e380b2" +checksum = "4f867b9d1d896b67beb18518eda36fdb77a32ea590de864f1325b294a6d14397" dependencies = [ "memchr", "serde", @@ -3717,9 +3697,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.34" +version = "0.23.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a9586e9ee2b4f8fab52a0048ca7334d7024eef48e2cb9407e3497bb7cab7fa7" +checksum = "533f54bc6a7d4f647e46ad909549eda97bf5afc1585190ef692b4286b198bd8f" dependencies = [ "once_cell", "ring", @@ -4145,10 +4125,8 @@ checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" [[package]] name = "stageleft" version = "0.10.0" -version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b92cb4d28ec3c2b3aba8ee05487f10c3aa00d7a369a3fe9d4d89e8719f28ca4f" -checksum = "b92cb4d28ec3c2b3aba8ee05487f10c3aa00d7a369a3fe9d4d89e8719f28ca4f" dependencies = [ "ctor 0.4.3", "proc-macro-crate", @@ -4161,10 +4139,8 @@ dependencies = [ [[package]] name = "stageleft_macro" version = "0.10.0" -version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e05624677c37d2abebe0c3e50fa7722f99936d26de2a8a23ac5d2a397be596c0" -checksum = "e05624677c37d2abebe0c3e50fa7722f99936d26de2a8a23ac5d2a397be596c0" dependencies = [ "proc-macro-crate", "proc-macro2", @@ -4176,10 +4152,8 @@ dependencies = [ [[package]] name = "stageleft_tool" version = "0.10.0" -version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da14207006ed0031a24197e0a2d3bc84b2a7ecf3a2ca70b70f1886cf1a37b464" -checksum = "da14207006ed0031a24197e0a2d3bc84b2a7ecf3a2ca70b70f1886cf1a37b464" dependencies = [ "prettyplease", "proc-macro-crate", @@ -4738,12 +4712,6 @@ version = "2.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" -[[package]] -name = "urlencoding" -version = "2.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" - [[package]] name = "utf8_iter" version = "1.0.4" From c98987f6a1fd281019466474851832aed518cdf0 Mon Sep 17 00:00:00 2001 From: David Chu Date: Fri, 7 Nov 2025 00:49:09 +0000 Subject: [PATCH 11/12] Network calibrator running --- .../examples/network_calibrator.rs | 68 +++++++++---------- .../src/network_calibrator.rs | 5 +- 2 files changed, 36 insertions(+), 37 deletions(-) diff --git a/hydro_optimize_examples/examples/network_calibrator.rs b/hydro_optimize_examples/examples/network_calibrator.rs index 6658f8a..5688ad4 100644 --- a/hydro_optimize_examples/examples/network_calibrator.rs +++ b/hydro_optimize_examples/examples/network_calibrator.rs @@ -22,9 +22,6 @@ struct Args { /// Use GCP for deployment (provide project name) #[arg(long)] gcp: Option, - - #[arg(long)] - function: String, } #[tokio::main] @@ -39,29 +36,8 @@ async fn main() { }; let network = Arc::new(RwLock::new(GcpNetwork::new(&project, None))); - let mut builder = FlowBuilder::new(); - let num_clients = 1; - let num_clients_per_node = 10000000; - let server = builder.cluster(); - let clients = builder.cluster(); - let client_aggregator = builder.process(); - - let clusters = vec![ - ( - server.id().raw_id(), - std::any::type_name::().to_string(), - 1, - ), - ( - clients.id().raw_id(), - std::any::type_name::().to_string(), - num_clients, - ), - ]; - let processes = vec![( - client_aggregator.id().raw_id(), - std::any::type_name::().to_string(), - )]; + let num_clients = 5; // >1 clients so it doesn't become the bottleneck + let num_clients_per_node = 1; // Deploy let mut reusable_hosts = ReusableHosts { @@ -75,11 +51,33 @@ async fn main() { let num_seconds_to_profile = Some(20); let multi_run_metadata = RefCell::new(vec![]); - for (i, message_size) in message_sizes.iter().enumerate() { - + for message_size in message_sizes { + let builder = FlowBuilder::new(); + let server = builder.cluster(); + let clients = builder.cluster(); + let client_aggregator = builder.process(); + + let clusters = vec![ + ( + server.id().raw_id(), + std::any::type_name::().to_string(), + 1, + ), + ( + clients.id().raw_id(), + std::any::type_name::().to_string(), + num_clients, + ), + ]; + let processes = vec![( + client_aggregator.id().raw_id(), + std::any::type_name::().to_string(), + )]; + + println!("Running network calibrator with message size: {} bytes, num clients: {}", message_size, num_clients); network_calibrator( num_clients_per_node, - *message_size, + message_size, &server, &clients, &client_aggregator, @@ -98,15 +96,13 @@ async fn main() { ], num_seconds_to_profile, &multi_run_metadata, - i, + 0, // Set to 0 to turn off comparisons between iterations ) .await; - builder = rewritten_ir_builder.build_with(|_| ir); - } - - let built = builder.finalize(); + let built = rewritten_ir_builder.build_with(|_| ir).finalize(); - // Generate graphs if requested - _ = built.generate_graph_with_config(&args.graph, None); + // Generate graphs if requested + _ = built.generate_graph_with_config(&args.graph, None); + } } \ No newline at end of file diff --git a/hydro_optimize_examples/src/network_calibrator.rs b/hydro_optimize_examples/src/network_calibrator.rs index 8b2a3d1..fc2f058 100644 --- a/hydro_optimize_examples/src/network_calibrator.rs +++ b/hydro_optimize_examples/src/network_calibrator.rs @@ -45,6 +45,9 @@ pub fn size_based_workload_generator<'a, Client>( return (virtual_id, payload); } } - (virtual_id, vec![0; message_size]) + + // Temp fix for macro stuff that isn't supported by stageleft I guess + let msg_size = message_size; + (virtual_id, vec![0; msg_size]) })) } \ No newline at end of file From 02ba6e9b66a698b0a4b4e218015fea9e9b4fbe54 Mon Sep 17 00:00:00 2001 From: David Chu Date: Mon, 10 Nov 2025 18:54:29 +0000 Subject: [PATCH 12/12] Saturate network calibrator --- Cargo.lock | 21 +++++++++++++------ .../examples/network_calibrator.rs | 6 +++--- .../examples/simple_graphs.rs | 2 +- 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fee4098..aee1001 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -886,6 +886,14 @@ dependencies = [ "unicode-segmentation", ] +[[package]] +name = "copy_span" +version = "0.1.0" +dependencies = [ + "proc-macro2", + "syn", +] + [[package]] name = "core-foundation" version = "0.10.1" @@ -1864,6 +1872,7 @@ dependencies = [ "bincode", "bytes", "clap", + "copy_span", "ctor 0.2.9", "data-encoding", "dfir_lang", @@ -4124,9 +4133,9 @@ checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" [[package]] name = "stageleft" -version = "0.10.0" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b92cb4d28ec3c2b3aba8ee05487f10c3aa00d7a369a3fe9d4d89e8719f28ca4f" +checksum = "101469d4cf8d54ac88b735ecd1dcc5e11da859e191a1dd0e28e71a298ffae1b9" dependencies = [ "ctor 0.4.3", "proc-macro-crate", @@ -4138,9 +4147,9 @@ dependencies = [ [[package]] name = "stageleft_macro" -version = "0.10.0" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e05624677c37d2abebe0c3e50fa7722f99936d26de2a8a23ac5d2a397be596c0" +checksum = "e1dc19da279ba29d00ae49363841037bd7c933130d0c4476899e1d7f8f04dab5" dependencies = [ "proc-macro-crate", "proc-macro2", @@ -4151,9 +4160,9 @@ dependencies = [ [[package]] name = "stageleft_tool" -version = "0.10.0" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da14207006ed0031a24197e0a2d3bc84b2a7ecf3a2ca70b70f1886cf1a37b464" +checksum = "977b4e22d5233ef274f43a02d9946dd4ee66c1957eac8a5f031450ab97bfa834" dependencies = [ "prettyplease", "proc-macro-crate", diff --git a/hydro_optimize_examples/examples/network_calibrator.rs b/hydro_optimize_examples/examples/network_calibrator.rs index 5688ad4..323ae35 100644 --- a/hydro_optimize_examples/examples/network_calibrator.rs +++ b/hydro_optimize_examples/examples/network_calibrator.rs @@ -36,8 +36,8 @@ async fn main() { }; let network = Arc::new(RwLock::new(GcpNetwork::new(&project, None))); - let num_clients = 5; // >1 clients so it doesn't become the bottleneck - let num_clients_per_node = 1; + let num_clients = 10; // >1 clients so it doesn't become the bottleneck + let num_clients_per_node = 1000; // Deploy let mut reusable_hosts = ReusableHosts { @@ -48,7 +48,7 @@ async fn main() { }; let message_sizes = vec![1, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192]; - let num_seconds_to_profile = Some(20); + let num_seconds_to_profile = Some(60); let multi_run_metadata = RefCell::new(vec![]); for message_size in message_sizes { diff --git a/hydro_optimize_examples/examples/simple_graphs.rs b/hydro_optimize_examples/examples/simple_graphs.rs index 48570ca..a2ad66a 100644 --- a/hydro_optimize_examples/examples/simple_graphs.rs +++ b/hydro_optimize_examples/examples/simple_graphs.rs @@ -89,7 +89,7 @@ async fn main() { }; let num_times_to_optimize = 2; - let num_seconds_to_profile = Some(20); + let num_seconds_to_profile = Some(60); let multi_run_metadata = RefCell::new(vec![]); for i in 0..num_times_to_optimize {