Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
31e4c9a
Updating ALCF endpoints to include the synaps-i allocation (to be set…
davramov Jan 23, 2026
bf712a4
Updating bl832 config.py to distinguish IRI and SYNAPS-I ALCF endpoints
davramov Jan 23, 2026
77d6bc8
Adding the config.yaml file for setting up the globus compute endpoin…
davramov Jan 23, 2026
f4f9efa
Adding the config.yaml file for setting up the globus compute endpoin…
davramov Jan 23, 2026
0f5d5c9
Adding segmentation Prefect task, and segmentation globus compute cod…
davramov Jan 23, 2026
d3ad219
ensuring self.config for scicat and ghcr images
davramov Jan 23, 2026
a96c4a8
linting
davramov Jan 23, 2026
5873d94
Making separate ALCF SYNAPS-I endpoint configs for raw, reconstructed…
davramov Jan 23, 2026
49e6e7f
Refactoring ALCF reconstruction flow to use the prune_controller class
davramov Jan 26, 2026
922b715
Removing old commented out prune code
davramov Jan 26, 2026
c903411
linting and docstrings
davramov Jan 26, 2026
5a5cff4
Docstrings, linting, and type hints
davramov Jan 26, 2026
b5e0ba9
Updating globus compute config for segmentation
davramov Jan 26, 2026
54dab5d
turning ALCF recon+segmentation into a separate flow from recon+zarr …
davramov Jan 28, 2026
6361a33
updating pytest for alcf reconstruction
davramov Jan 28, 2026
f96c5cd
Adjusting endpoint names for synaps
davramov Jan 28, 2026
8324b7d
adding the alcf_forge_recon_segment flow to prefect.yaml as a separat…
davramov Jan 28, 2026
7599f2e
updating bl832 dispatcher to include alcf_forge_recon_segment as a se…
davramov Jan 28, 2026
d78c98d
adding transfer client uuid for ALCF SYNAPS-I
davramov Feb 2, 2026
3c0f25e
this configuration worked for launching segmentation on 1 GPU
davramov Feb 2, 2026
e6ebd1f
Updating segmentation compute endpoint config
davramov Feb 2, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 23 additions & 4 deletions config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ globus:
uri: beegfs.als.lbl.gov
uuid: d33b5d6e-1603-414e-93cb-bcb732b7914a
name: bl733-beegfs-data

# 8.3.2 ENDPOINTS

spot832:
Expand All @@ -72,17 +73,35 @@ globus:
uuid: 75b478b2-37af-46df-bfbd-71ed692c6506
name: data832_scratch

alcf832_raw:
alcf832_synaps_raw:
root_path: /data/bl832/raw
uri: alcf.anl.gov
uuid: 728a8e30-32ef-4000-814c-f9ccbc00bf13
name: alcf832_synaps_raw

alcf832_synaps_recon:
root_path: /data/bl832/scratch/reconstruction/
uri: alcf.anl.gov
uuid: 728a8e30-32ef-4000-814c-f9ccbc00bf13
name: alcf832_synaps_recon

alcf832_synaps_segment:
root_path: /data/bl832/scratch/segmentation/
uri: alcf.anl.gov
uuid: 728a8e30-32ef-4000-814c-f9ccbc00bf13
name: alcf832_synaps_segment

alcf832_iri_raw:
root_path: /data/raw
uri: alcf.anl.gov
uuid: 55c3adf6-31f1-4647-9a38-52591642f7e7
name: alcf_raw
name: alcf_iri_raw

alcf832_scratch:
alcf832_iri_scratch:
root_path: /data/scratch
uri: alcf.anl.gov
uuid: 55c3adf6-31f1-4647-9a38-52591642f7e7
name: alcf_scratch
name: alcf_iri_scratch

alcf_eagle832:
root_path: /IRIBeta/als/example
Expand Down
39 changes: 22 additions & 17 deletions orchestration/_tests/test_globus_flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,8 +147,8 @@ def __init__(self) -> None:
MockSecret.for_endpoint("nersc832_alsdev_raw")),
"nersc832_alsdev_scratch": MockEndpoint("mock_nersc832_alsdev_scratch_path",
MockSecret.for_endpoint("nersc832_alsdev_scratch")),
"alcf832_raw": MockEndpoint("mock_alcf832_raw_path", MockSecret.for_endpoint("alcf832_raw")),
"alcf832_scratch": MockEndpoint("mock_alcf832_scratch_path", MockSecret.for_endpoint("alcf832_scratch")),
"alcf832_iri_raw": MockEndpoint("mock_alcf832_raw_path", MockSecret.for_endpoint("alcf832_iri_raw")),
"alcf832_iri_scratch": MockEndpoint("mock_alcf832_scratch_path", MockSecret.for_endpoint("alcf832_iri_scratch")),
}

# Mock apps
Expand All @@ -163,8 +163,8 @@ def __init__(self) -> None:
self.spot832 = self.endpoints["spot832"]
self.data832 = self.endpoints["data832"]
self.nersc832 = self.endpoints["nersc832"]
self.alcf832_raw = self.endpoints["alcf832_raw"]
self.alcf832_scratch = self.endpoints["alcf832_scratch"]
self.alcf832_iri_raw = self.endpoints["alcf832_iri_raw"]
self.alcf832_iri_scratch = self.endpoints["alcf832_iri_scratch"]
self.data832_raw = self.endpoints["data832_raw"]
self.data832_scratch = self.endpoints["data832_scratch"]
self.nersc832_alsdev_scratch = self.endpoints["nersc832_alsdev_scratch"]
Expand Down Expand Up @@ -247,8 +247,11 @@ def test_alcf_recon_flow(mocker: MockFixture):
"nersc832_alsdev_pscratch_raw": mocker.MagicMock(),
"nersc832_alsdev_pscratch_scratch": mocker.MagicMock(),
"nersc832_alsdev_recon_scripts": mocker.MagicMock(),
"alcf832_raw": mocker.MagicMock(),
"alcf832_scratch": mocker.MagicMock(),
"alcf832_iri_raw": mocker.MagicMock(),
"alcf832_iri_scratch": mocker.MagicMock(),
"alcf832_synaps_raw": mocker.MagicMock(),
"alcf832_synaps_recon": mocker.MagicMock(),
"alcf832_synaps_segment": mocker.MagicMock(),
}
)
mocker.patch(
Expand Down Expand Up @@ -298,10 +301,12 @@ def test_alcf_recon_flow(mocker: MockFixture):
return_value=mock_transfer_controller
)

# 7) Patch schedule_pruning => skip real scheduling
mock_schedule_pruning = mocker.patch(
"orchestration.flows.bl832.alcf.schedule_pruning",
return_value=True
# 7) Patch get_prune_controller(...) => skip real scheduling
mock_prune_controller = mocker.MagicMock()
mock_prune_controller.prune.return_value = True
mocker.patch(
"orchestration.flows.bl832.alcf.get_prune_controller",
return_value=mock_prune_controller
)

file_path = "/global/raw/transfer_tests/test.h5"
Expand All @@ -316,13 +321,13 @@ def test_alcf_recon_flow(mocker: MockFixture):
assert mock_transfer_controller.copy.call_count == 3, "Should do 3 transfers in success path"
mock_hpc_reconstruct.assert_called_once()
mock_hpc_multires.assert_called_once()
mock_schedule_pruning.assert_called_once()
assert mock_prune_controller.prune.call_count == 5, "Should schedule 5 prune operations in success path"

# Reset for next scenario
mock_transfer_controller.copy.reset_mock()
mock_hpc_reconstruct.reset_mock()
mock_hpc_multires.reset_mock()
mock_schedule_pruning.reset_mock()
mock_prune_controller.prune.reset_mock()

#
# ---------- CASE 2: HPC reconstruction fails ----------
Expand All @@ -339,13 +344,13 @@ def test_alcf_recon_flow(mocker: MockFixture):
assert mock_transfer_controller.copy.call_count == 1, (
"Should only do the first data832->alcf copy before HPC fails"
)
mock_schedule_pruning.assert_not_called()
mock_prune_controller.prune.assert_not_called()

# Reset
mock_transfer_controller.copy.reset_mock()
mock_hpc_reconstruct.reset_mock()
mock_hpc_multires.reset_mock()
mock_schedule_pruning.reset_mock()
mock_prune_controller.prune.reset_mock()

# ---------- CASE 3: Tiff->Zarr fails ----------
mock_transfer_controller.copy.return_value = True
Expand All @@ -360,13 +365,13 @@ def test_alcf_recon_flow(mocker: MockFixture):
# HPC is done, so there's 2 successful transfer (data832->alcf).
# We have not transferred tiff or zarr => total 2 copies
assert mock_transfer_controller.copy.call_count == 2
mock_schedule_pruning.assert_not_called()
mock_prune_controller.prune.assert_not_called()

# Reset
mock_transfer_controller.copy.reset_mock()
mock_hpc_reconstruct.reset_mock()
mock_hpc_multires.reset_mock()
mock_schedule_pruning.reset_mock()
mock_prune_controller.prune.reset_mock()

# ---------- CASE 4: data832->ALCF fails immediately ----------
mock_transfer_controller.copy.return_value = False
Expand All @@ -380,4 +385,4 @@ def test_alcf_recon_flow(mocker: MockFixture):
mock_hpc_multires.assert_not_called()
# The only call is the failing copy
mock_transfer_controller.copy.assert_called_once()
mock_schedule_pruning.assert_not_called()
mock_prune_controller.prune.assert_not_called()
Loading