Skip to content

Commit 36f585e

Browse files
committed
Migrate to uv and pyproject.toml (#2148)
Summary: Pull Request resolved: #2148 Migrates Monarch to use `uv` and rely more on `pyproject.toml`. - Migrate static package metadata from setup.py to pyproject.toml - Add uv-specific configuration for PyTorch stable builds with multiple CUDA versions (cu128, cu124, cu121, cpu) - Note, for some reason `uv` does not play nicely with PyTorch nightly so we pin it to stable. Logs: P2082930246. Haven't looked into this yet. - Users can install with PyTorch nightly via: `$ uv sync --extra tensor --extra-index-url https://download.pytorch.org/whl/nightly/cu128` - Make torch optional with auto-detection and graceful degradation when not available - We need this because `uv` will first run `setup.py` to identify any dependencies etc., but fails if torch isn't installed. `uv` will then install torch in a secondary pass, then finally run the setup.py script. - This also allows us to avoid needing to build with `--no-build-isolation`. - Use PyO3's extension-module feature liberally to avoid libpython linking issues in Python extensions - Create separate `monarch_hyperactor_bin` crate to isolate binaries from `extension-module` feature. - Without this, `monarch_hyperactor` would always build as "`python-embed`" mode, when it itself is technically a `python-extension`. - add a uv.lock file which ... - Adds a .python-version so `uv` would default to Python 3.12. Without it it defaults to Python 3.10 because we support >=3.10 but this modernizes a bit more. Differential Revision: D89229505
1 parent 4fbae16 commit 36f585e

File tree

26 files changed

+1846
-153
lines changed

26 files changed

+1846
-153
lines changed

.github/workflows/build-cpu.yml

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,14 @@ jobs:
2626
# Source common setup functions
2727
source scripts/common-setup.sh
2828
29-
# Setup build environment (conda + system deps + rust + build deps)
29+
# Setup build environment (conda + system deps + rust + uv)
3030
setup_build_environment
3131
32-
# Install torch nightly (CPU version)
33-
pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu
34-
pip install -r build-requirements.txt
32+
# Install all dependencies (including torch and build tools) without building the project
33+
# This syncs from pyproject.toml and installs everything we need for the build
34+
uv pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu
35+
uv sync --no-install-project
3536
3637
# Build monarch (No tensor engine, CPU version)
37-
USE_TENSOR_ENGINE=0 python setup.py bdist_wheel
38+
# --no-build-isolation uses the dependencies we just installed
39+
USE_TENSOR_ENGINE=0 uv build --wheel --no-build-isolation

.github/workflows/build-cuda.yml

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,15 +31,17 @@ jobs:
3131
# Source common setup functions
3232
source scripts/common-setup.sh
3333
34-
# Setup build environment (conda + system deps + rust + build deps)
34+
# Setup build environment (conda + system deps + rust + uv)
3535
setup_build_environment
3636
37-
# Install torch nightly
38-
pip install ${{ matrix.torch-spec }}
39-
pip install -r build-requirements.txt
37+
# Install all dependencies (including torch and build tools) without building the project
38+
# This syncs from pyproject.toml and installs everything we need for the build
39+
uv pip install ${{ matrix.torch-spec }}
40+
uv sync --no-install-project
4041
4142
# Setup Tensor Engine
4243
setup_tensor_engine
4344
4445
# Build monarch (CUDA version)
45-
python setup.py bdist_wheel
46+
# --no-build-isolation uses the dependencies we just installed
47+
uv build --wheel --no-build-isolation

.github/workflows/publish_release.yml

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,28 +36,30 @@ jobs:
3636
source scripts/common-setup.sh
3737
setup_build_environment ${{ matrix.python-version }}
3838
39-
# Install torch nightly before installing the wheel,
40-
# so that we can test the wheel against the latest nightly
41-
pip install ${{ matrix.torch-spec }}
42-
pip install -r build-requirements.txt
39+
# Install all dependencies (including torch and build tools) without building the project
40+
# This syncs from pyproject.toml and installs everything we need for the build
41+
uv pip install ${{ matrix.torch-spec }}
42+
uv sync --no-install-project
4343
4444
# Setup Tensor Engine dependencies
4545
setup_tensor_engine
4646
47-
cargo install --path monarch_hyperactor
47+
# Install process_allocator binary for backwards compatibility
48+
cargo install --path monarch_hyperactor_bin --bin process_allocator
4849
4950
# Build wheel
51+
# --no-build-isolation uses the dependencies we just installed
5052
export MONARCH_PACKAGE_NAME="torchmonarch"
5153
export MONARCH_VERSION="${{ github.event.inputs.version }}"
52-
python setup.py bdist_wheel
54+
uv build --wheel --no-build-isolation
5355
5456
# hacky until the right distribution wheel can be made...
5557
find dist -name "*linux_x86_64.whl" -type f -exec bash -c 'mv "$1" "${1/linux_x86_64.whl/manylinux2014_x86_64.whl}"' _ {} \;
5658
ls -la dist/
5759
5860
# Run tests
5961
install_python_test_dependencies
60-
pip install dist/*.whl
62+
uv pip install dist/*.whl
6163
python -c "import monarch"
6264
publish:
6365
name: Publish to PyPI

.github/workflows/wheels.yml

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -36,29 +36,31 @@ jobs:
3636
source scripts/common-setup.sh
3737
setup_build_environment ${{ matrix.python-version }}
3838
39-
# Install torch nightly before installing the wheel,
40-
# so that we can test the wheel against the latest nightly
41-
pip install ${{ matrix.torch-spec }}
42-
pip install -r build-requirements.txt
39+
# Install all dependencies (including torch and build tools) without building the project
40+
# This syncs from pyproject.toml and installs everything we need for the build
41+
uv pip install ${{ matrix.torch-spec }}
42+
uv sync --no-install-project
4343
4444
# Setup Tensor Engine dependencies
4545
setup_tensor_engine
4646
47-
cargo install --path monarch_hyperactor
47+
# Install process_allocator binary for backwards compatibility
48+
cargo install --path monarch_hyperactor_bin --bin process_allocator
4849
49-
# Build wheel
50+
# Build wheel with uv
51+
# --no-build-isolation uses the dependencies we just installed
5052
export MONARCH_PACKAGE_NAME="torchmonarch-nightly"
5153
export MONARCH_VERSION=$(date +'%Y.%m.%d')
5254
53-
python setup.py bdist_wheel
55+
uv build --wheel --no-build-isolation
5456
5557
# hacky until the right distribution wheel can be made...
5658
find dist -name "*linux_x86_64.whl" -type f -exec bash -c 'mv "$1" "${1/linux_x86_64.whl/manylinux2014_x86_64.whl}"' _ {} \;
5759
ls -la dist/
5860
5961
# Run tests
62+
uv pip install dist/*.whl
6063
install_python_test_dependencies
61-
pip install dist/*.whl
6264
python -c "import monarch"
6365
publish:
6466
name: Publish to PyPI

.python-version

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
3.12

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ members = [
1313
"monarch_conda",
1414
"monarch_extension",
1515
"monarch_hyperactor",
16+
"monarch_hyperactor_bin",
1617
"monarch_messages",
1718
"monarch_perfetto_trace",
1819
"monarch_rdma",

README.md

Lines changed: 37 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,18 @@
33
**Monarch** is a distributed programming framework for PyTorch based on scalable
44
actor messaging. It provides:
55

6-
1. Remote actors with scalable messaging: Actors are grouped into collections called meshes and messages can be broadcast to all members.
7-
2. Fault tolerance through supervision trees: Actors and processes form a tree and failures propagate up the tree, providing good default error behavior and enabling fine-grained fault recovery.
8-
3. Point-to-point RDMA transfers: cheap registration of any GPU or CPU memory in a process, with the one-sided transfers based on libibverbs
9-
4. Distributed tensors: actors can work with tensor objects sharded across processes
10-
11-
Monarch code imperatively describes how to create processes and actors using a simple python API:
6+
1. Remote actors with scalable messaging: Actors are grouped into collections
7+
called meshes and messages can be broadcast to all members.
8+
2. Fault tolerance through supervision trees: Actors and processes form a tree
9+
and failures propagate up the tree, providing good default error behavior and
10+
enabling fine-grained fault recovery.
11+
3. Point-to-point RDMA transfers: cheap registration of any GPU or CPU memory in
12+
a process, with the one-sided transfers based on libibverbs
13+
4. Distributed tensors: actors can work with tensor objects sharded across
14+
processes
15+
16+
Monarch code imperatively describes how to create processes and actors using a
17+
simple python API:
1218

1319
```python
1420
from monarch.actor import Actor, endpoint, this_host
@@ -33,8 +39,9 @@ fut = trainers.train.call(step=0)
3339
fut.get()
3440
```
3541

36-
37-
The [introduction to monarch concepts](https://meta-pytorch.org/monarch/generated/examples/getting_started.html) provides an introduction to using these features.
42+
The
43+
[introduction to monarch concepts](https://meta-pytorch.org/monarch/generated/examples/getting_started.html)
44+
provides an introduction to using these features.
3845

3946
> ⚠️ **Early Development Warning** Monarch is currently in an experimental
4047
> stage. You should expect bugs, incomplete features, and APIs that may change
@@ -45,16 +52,21 @@ The [introduction to monarch concepts](https://meta-pytorch.org/monarch/generate
4552
4653
## 📖 Documentation
4754

48-
View Monarch's hosted documentation [at this link](https://meta-pytorch.org/monarch/).
55+
View Monarch's hosted documentation
56+
[at this link](https://meta-pytorch.org/monarch/).
4957

5058
## Installation
51-
Note for running distributed tensors and RDMA, the local torch version must match the version that monarch was built with.
52-
Stable and nightly distributions require libmxl and libibverbs (runtime).
59+
60+
Note for running distributed tensors and RDMA, the local torch version must
61+
match the version that monarch was built with. Stable and nightly distributions
62+
require libmxl and libibverbs (runtime).
5363

5464
## Fedora
65+
5566
`sudo dnf install -y libibverbs rdma-core libmlx5 libibverbs-devel rdma-core-devel`
5667

5768
## Ubuntu
69+
5870
`sudo apt install -y rdma-core libibverbs1 libmlx5-1 libibverbs-dev`
5971

6072
### Stable
@@ -64,14 +76,15 @@ Stable and nightly distributions require libmxl and libibverbs (runtime).
6476
torchmonarch stable is built with the latest stable torch.
6577

6678
### Nightly
79+
6780
`pip install torchmonarch-nightly`
6881

6982
torchmonarch-nightly is built with torch nightly.
7083

7184
### Build and Install from Source
7285

73-
If you're building Monarch from source, you should be building it with the nightly PyTorch as well for ABI compatibility.
74-
86+
If you're building Monarch from source, you should be building it with the
87+
nightly PyTorch as well for ABI compatibility.
7588

7689
#### On Fedora distributions
7790

@@ -161,10 +174,11 @@ pip list | grep monarch
161174

162175
#### On non-CUDA machines
163176

164-
You can also build Monarch to run on non-CUDA machines, e.g. locally on a MacOS system.
165-
166-
Note that this does not support tensor engine, which is tied to CUDA and RDMA (via ibverbs).
177+
You can also build Monarch to run on non-CUDA machines, e.g. locally on a MacOS
178+
system.
167179

180+
Note that this does not support tensor engine, which is tied to CUDA and RDMA
181+
(via ibverbs).
168182

169183
```sh
170184

@@ -180,8 +194,6 @@ rustup default nightly
180194
# Install build dependencies
181195
pip install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu
182196
pip install -r build-requirements.txt
183-
# Install test dependencies
184-
pip install -r python/tests/requirements.txt
185197

186198
# Build and install Monarch
187199
USE_TENSOR_ENGINE=0 pip install --no-build-isolation .
@@ -192,10 +204,10 @@ USE_TENSOR_ENGINE=0 pip install --no-build-isolation -e .
192204
pip list | grep monarch
193205
```
194206

195-
196207
## Running examples
197208

198-
Check out the `examples/` directory for demonstrations of how to use Monarch's APIs.
209+
Check out the `examples/` directory for demonstrations of how to use Monarch's
210+
APIs.
199211

200212
We'll be adding more examples as we stabilize and polish functionality!
201213

@@ -205,6 +217,7 @@ We have both Rust and Python unit tests. Rust tests are run with `cargo-nextest`
205217
and Python tests are run with `pytest`.
206218

207219
Rust tests:
220+
208221
```sh
209222
# We use cargo-nextest to run our tests, as they can provide strong process isolation
210223
# between every test.
@@ -213,12 +226,14 @@ Rust tests:
213226
cargo install cargo-nextest --locked
214227
cargo nextest run
215228
```
229+
216230
cargo-nextest supports all of the filtering flags of "cargo test".
217231

218232
Python tests:
233+
219234
```sh
220-
# Make sure to install test dependencies first
221-
pip install -r python/tests/requirements.txt
235+
# Install test dependencies if not already installed
236+
pip install -e '.[test]'
222237
# Run unit tests. consider -s for more verbose output
223238
pytest python/tests/ -v -m "not oss_skip"
224239
```

build-requirements.txt

Lines changed: 0 additions & 4 deletions
This file was deleted.

monarch_extension/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ monarch_rdma_extension = { version = "0.0.0", path = "../monarch_rdma/extension"
3030
monarch_tensor_worker = { version = "0.0.0", path = "../monarch_tensor_worker", optional = true }
3131
nccl-sys = { path = "../nccl-sys", optional = true }
3232
ndslice = { version = "0.0.0", path = "../ndslice" }
33-
pyo3 = { version = "0.24", features = ["anyhow", "multiple-pymethods", "py-clone"] }
33+
pyo3 = { version = "0.24", features = ["anyhow", "extension-module", "multiple-pymethods", "py-clone"] }
3434
rdmaxcel-sys = { path = "../rdmaxcel-sys", optional = true }
3535
serde = { version = "1.0.219", features = ["derive", "rc"] }
3636
tokio = { version = "1.47.1", features = ["full", "test-util", "tracing"] }

monarch_hyperactor/Cargo.toml

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# @generated by autocargo from //monarch/monarch_hyperactor:[monarch_hyperactor,monarch_hyperactor_test_bootstrap,process_allocator-oss,test_monarch_hyperactor]
1+
# @generated by autocargo from //monarch/monarch_hyperactor:[monarch_hyperactor,test_monarch_hyperactor]
22

33
[package]
44
name = "monarch_hyperactor"
@@ -7,15 +7,6 @@ authors = ["Meta"]
77
edition = "2021"
88
license = "BSD-3-Clause"
99

10-
[[bin]]
11-
name = "monarch_hyperactor_test_bootstrap"
12-
path = "test/bootstrap.rs"
13-
edition = "2024"
14-
15-
[[bin]]
16-
name = "process_allocator"
17-
edition = "2024"
18-
1910
[[test]]
2011
name = "test_monarch_hyperactor"
2112
path = "tests/lib.rs"
@@ -26,7 +17,6 @@ async-once-cell = "0.4.2"
2617
async-trait = "0.1.86"
2718
bincode = "1.3.3"
2819
bytes = { version = "1.10", features = ["serde"] }
29-
clap = { version = "4.5.42", features = ["derive", "env", "string", "unicode", "wrap_help"] }
3020
erased-serde = "0.4.9"
3121
fastrand = "2.1.1"
3222
fbinit = { version = "0.2.0", git = "https://github.com/facebookexperimental/rust-shed.git", branch = "main" }
@@ -39,7 +29,6 @@ hyperactor_telemetry = { version = "0.0.0", path = "../hyperactor_telemetry" }
3929
inventory = "0.3.21"
4030
lazy_errors = "0.10.1"
4131
lazy_static = "1.5"
42-
libc = "0.2.139"
4332
monarch_conda = { version = "0.0.0", path = "../monarch_conda" }
4433
monarch_types = { version = "0.0.0", path = "../monarch_types" }
4534
ndslice = { version = "0.0.0", path = "../ndslice" }
@@ -61,7 +50,7 @@ buck-resources = "1"
6150
dir-diff = "0.3"
6251

6352
[features]
64-
default = []
53+
default = ["pyo3/extension-module"]
6554
packaged_rsync = []
6655

6756
[lints]

0 commit comments

Comments
 (0)