Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions conf/experimental/ai_dynamo/test/vllm.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand Down Expand Up @@ -29,7 +29,14 @@ docker_image_url = "nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.7.0"
decode-cmd = 'python3 -m dynamo.vllm'

[cmd_args.dynamo.decode_worker]
pipeline-parallel-size = 1
num-nodes = 1

[cmd_args.dynamo.decode_worker.args]
model = "Qwen/Qwen3-0.6B"
gpu-memory-utilization = 0.95
tensor-parallel-size = 8
pipeline-parallel-size = 1
data-parallel-size = 1

[cmd_args.genai_perf]
model = "Qwen/Qwen3-0.6B"
Expand All @@ -46,6 +53,10 @@ docker_image_url = "nvcr.io/nvidia/ai-dynamo/vllm-runtime:0.7.0"
concurrency = 2
extra-args = "--streaming -- -v --async"

[cmd_args.lmcache]

[cmd_args.lmbench]

[extra_env_vars]
UCX_LOG_LEVEL = "warn"
UCX_TLS = "cuda_copy,rc_x"
Expand Down
9 changes: 6 additions & 3 deletions conf/experimental/ai_dynamo/test_scenario/vllm_k8s.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand All @@ -24,7 +24,10 @@ test_name = "vLLM-Qwen3-0.6B"
[Tests.cmd_args.dynamo]
[Tests.cmd_args.dynamo.prefill_worker]
num-nodes = 1
tensor-parallel-size = 8
[Tests.cmd_args.dynamo.prefill_worker.args]
tensor-parallel-size = 8

[Tests.cmd_args.dynamo.decode_worker]
num-nodes = 1
tensor-parallel-size = 8
[Tests.cmd_args.dynamo.decode_worker.args]
tensor-parallel-size = 8
22 changes: 13 additions & 9 deletions conf/experimental/ai_dynamo/test_scenario/vllm_slurm.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand All @@ -25,13 +25,15 @@ time_limit = "00:10:00"

[Tests.cmd_args.dynamo.prefill_worker]
num-nodes = 1
tensor-parallel-size = 4
pipeline-parallel-size = 1
[Tests.cmd_args.dynamo.prefill_worker.args]
tensor-parallel-size = 4
pipeline-parallel-size = 1

[Tests.cmd_args.dynamo.decode_worker]
num-nodes = 1
tensor-parallel-size = 4
pipeline-parallel-size = 1
[Tests.cmd_args.dynamo.decode_worker.args]
tensor-parallel-size = 4
pipeline-parallel-size = 1

[[Tests]]
id = "test.disagg.multinode"
Expand All @@ -41,10 +43,12 @@ time_limit = "00:10:00"

[Tests.cmd_args.dynamo.prefill_worker]
num-nodes = 2
tensor-parallel-size = 4
pipeline-parallel-size = 1
[Tests.cmd_args.dynamo.prefill_worker.args]
tensor-parallel-size = 4
pipeline-parallel-size = 1

[Tests.cmd_args.dynamo.decode_worker]
num-nodes = 2
tensor-parallel-size = 4
pipeline-parallel-size = 1
[Tests.cmd_args.dynamo.decode_worker.args]
tensor-parallel-size = 4
pipeline-parallel-size = 1
74 changes: 62 additions & 12 deletions src/cloudai/systems/kubernetes/kubernetes_system.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,25 +298,75 @@ def _run_genai_perf(self, job: KubernetesJob) -> None:
raise TypeError("Test definition must be an instance of AIDynamoTestDefinition")

genai_perf_results_path = "/tmp/cloudai/genai-perf"
frontend_pod = self._get_dynamo_pod_by_role(role="frontend")

genai_perf_cmd = ["genai-perf", "profile", f"--artifact-dir={genai_perf_results_path}"]
for k, v in tdef.cmd_args.genai_perf.model_dump(
exclude={"extra_args", "extra-args"}, exclude_none=True
).items():
genai_perf_cmd.append(f"--{k}={v}")
if extra_args := tdef.cmd_args.genai_perf.extra_args:
genai_perf_cmd.extend(extra_args.split())
logging.debug(f"GenAI perf arguments: {genai_perf_cmd=}")
# Copy wrapper script and calc_percentile_csv script to the pod
wrapper_script_path = tdef.genai_perf_script.installed_path
calc_csv_script_path = tdef.calc_percentile_csv.installed_path

frontend_pod = self._get_dynamo_pod_by_role(role="frontend")
pod_wrapper_path = "/tmp/genai_perf.sh"
pod_calc_csv_path = "/tmp/calc_percentile_csv.py"

logging.debug(f"Copying wrapper script {wrapper_script_path} to pod {frontend_pod}")
cp_wrapper_cmd = f"kubectl cp {wrapper_script_path} {self.default_namespace}/{frontend_pod}:{pod_wrapper_path}"
subprocess.run(cp_wrapper_cmd, shell=True, capture_output=True, text=True, check=True)

logging.debug(f"Copying calc_percentile_csv script {calc_csv_script_path} to pod {frontend_pod}")
cp_calc_cmd = f"kubectl cp {calc_csv_script_path} {self.default_namespace}/{frontend_pod}:{pod_calc_csv_path}"
subprocess.run(cp_calc_cmd, shell=True, capture_output=True, text=True, check=True)

logging.debug(f"Executing genai-perf in pod={frontend_pod} cmd={genai_perf_cmd}")
# Make wrapper script executable
chmod_cmd = ["chmod", "+x", pod_wrapper_path]
logging.debug(f"Making wrapper script executable in pod {frontend_pod}")
try:
lazy.k8s.stream.stream(
self.core_v1.connect_get_namespaced_pod_exec,
name=frontend_pod,
namespace=self.default_namespace,
command=chmod_cmd,
stderr=True,
stdin=False,
stdout=True,
tty=False,
)
except lazy.k8s.client.ApiException as e:
logging.error(f"Error making wrapper script executable in pod '{frontend_pod}': {e}")

# Build genai-perf command arguments
genai_perf_cmd_parts = ["genai-perf", "profile", f"--artifact-dir={genai_perf_results_path}"]
if tdef.cmd_args.genai_perf.args:
for k, v in tdef.cmd_args.genai_perf.args.model_dump(exclude_none=True).items():
genai_perf_cmd_parts.append(f"--{k}={v}")
if extra_args := tdef.cmd_args.genai_perf.extra_args:
if isinstance(extra_args, str):
genai_perf_cmd_parts.extend(extra_args.split())
else:
genai_perf_cmd_parts.extend(extra_args)

# Build wrapper command with proper parameters
report_file = "genai_perf_report.csv"
wrapper_cmd = [
"/bin/bash",
Comment on lines +345 to +349
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Check if tdef.cmd_args.genai_perf.args exists before accessing it

pod_wrapper_path,
"--result_dir",
genai_perf_results_path,
"--report_file",
report_file,
"--calc_percentile_csv_script",
pod_calc_csv_path,
Comment on lines +347 to +356
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Wrapper flag mismatch: --report_file isn’t parsed by genai_perf.sh.
The script accepts --report_name, so this flag is ignored. Use --report_name (or support both in the script) to avoid silent defaults.

🔧 Proposed fix
-            "--report_file",
+            "--report_name",
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
report_file = "genai_perf_report.csv"
wrapper_cmd = [
"/bin/bash",
pod_wrapper_path,
"--result_dir",
genai_perf_results_path,
"--report_file",
report_file,
"--calc_percentile_csv_script",
pod_calc_csv_path,
report_file = "genai_perf_report.csv"
wrapper_cmd = [
"/bin/bash",
pod_wrapper_path,
"--result_dir",
genai_perf_results_path,
"--report_name",
report_file,
"--calc_percentile_csv_script",
pod_calc_csv_path,
🤖 Prompt for AI Agents
In `@src/cloudai/systems/kubernetes/kubernetes_system.py` around lines 347 - 356,
The wrapper flag used in the wrapper_cmd array is incorrect: genai_perf.sh
expects --report_name but the code passes --report_file (see variables
wrapper_cmd, report_file, pod_wrapper_path); update the argument list in
wrapper_cmd to use "--report_name" (or add both "--report_name" and
"--report_file" if you want backward compatibility) so the script receives the
intended report name (ensure the value remains report_file and keep
pod_calc_csv_path and genai_perf_results_path unchanged).

"--gpus_per_node",
str(self.gpus_per_node),
"--",
*genai_perf_cmd_parts,
]

logging.debug(f"Executing genai-perf wrapper in pod={frontend_pod} cmd={wrapper_cmd}")
try:
genai_results = lazy.k8s.stream.stream(
self.core_v1.connect_get_namespaced_pod_exec,
name=frontend_pod,
namespace=self.default_namespace,
command=genai_perf_cmd,
command=wrapper_cmd,
stderr=True,
stdin=False,
stdout=True,
Expand All @@ -326,7 +376,7 @@ def _run_genai_perf(self, job: KubernetesJob) -> None:
with (job.test_run.output_path / "genai_perf.log").open("w") as f:
f.write(genai_results)
except lazy.k8s.client.ApiException as e:
logging.error(f"Error executing genai-perf command in pod '{frontend_pod}': {e}")
logging.error(f"Error executing genai-perf wrapper command in pod '{frontend_pod}': {e}")

cp_logs_cmd = " ".join(
[
Expand Down
8 changes: 5 additions & 3 deletions src/cloudai/systems/slurm/slurm_command_gen_strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ def __init__(self, system: System, test_run: TestRun) -> None:
super().__init__(system, test_run)
self.system = cast(SlurmSystem, system)
self.test_run = test_run
self.container_install_path = "/cloudai_install"
self.container_results_path = "/cloudai_run_results"

self._node_spec_cache: dict[str, tuple[int, list[str]]] = {}

Expand Down Expand Up @@ -79,8 +81,8 @@ def container_mounts(self) -> list[str]:
repo_mounts.append(f"{path}:{repo.container_mount}")

mounts = [
f"{self.test_run.output_path.absolute()}:/cloudai_run_results",
f"{self.system.install_path.absolute()}:/cloudai_install",
f"{self.test_run.output_path.absolute()}:{self.container_results_path}",
f"{self.system.install_path.absolute()}:{self.container_install_path}",
f"{self.test_run.output_path.absolute()}",
*tdef.extra_container_mounts,
*repo_mounts,
Expand Down Expand Up @@ -302,7 +304,7 @@ def _ranks_mapping_cmd(self) -> str:
def _metadata_cmd(self) -> str:
(self.test_run.output_path.absolute() / "metadata").mkdir(parents=True, exist_ok=True)
num_nodes, _ = self.get_cached_nodes_spec()
metadata_script_path = "/cloudai_install"
metadata_script_path = self.container_install_path
if not self.image_path():
metadata_script_path = str(self.system.install_path.absolute())
return " ".join(
Expand Down
16 changes: 13 additions & 3 deletions src/cloudai/workloads/ai_dynamo/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
# Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand All @@ -19,8 +19,13 @@
AIDynamoCmdArgs,
AIDynamoTestDefinition,
DecodeWorkerArgs,
GenAIPerfArgs,
GenAIPerf,
LMBench,
LMCache,
LMCacheArgs,
PrefillWorkerArgs,
WorkerBaseArgs,
WorkerConfig,
)
from .kubernetes_json_gen_strategy import AIDynamoKubernetesJsonGenStrategy
from .report_generation_strategy import AIDynamoReportGenerationStrategy
Expand All @@ -34,6 +39,11 @@
"AIDynamoSlurmCommandGenStrategy",
"AIDynamoTestDefinition",
"DecodeWorkerArgs",
"GenAIPerfArgs",
"GenAIPerf",
"LMBench",
"LMCache",
"LMCacheArgs",
"PrefillWorkerArgs",
"WorkerBaseArgs",
"WorkerConfig",
]
Loading