From b221d2952ed4cee84e0d76070f00ef970209cf46 Mon Sep 17 00:00:00 2001 From: Andrei Maslennikov Date: Fri, 30 Jan 2026 11:32:46 +0100 Subject: [PATCH] More robust bench execution for Dynamo over k8s --- .../systems/kubernetes/kubernetes_system.py | 25 ++++++++----------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/src/cloudai/systems/kubernetes/kubernetes_system.py b/src/cloudai/systems/kubernetes/kubernetes_system.py index a74bc1fc6..f9ae85113 100644 --- a/src/cloudai/systems/kubernetes/kubernetes_system.py +++ b/src/cloudai/systems/kubernetes/kubernetes_system.py @@ -310,23 +310,18 @@ def _run_genai_perf(self, job: KubernetesJob) -> None: frontend_pod = self._get_dynamo_pod_by_role(role="frontend") - logging.debug(f"Executing genai-perf in pod={frontend_pod} cmd={genai_perf_cmd}") + kubectl_exec_cmd = ["kubectl", "exec", "-n", self.default_namespace, frontend_pod, "--", *genai_perf_cmd] + logging.debug(f"Executing genai-perf in pod={frontend_pod} cmd={kubectl_exec_cmd}") try: - genai_results = lazy.k8s.stream.stream( - self.core_v1.connect_get_namespaced_pod_exec, - name=frontend_pod, - namespace=self.default_namespace, - command=genai_perf_cmd, - stderr=True, - stdin=False, - stdout=True, - tty=False, - _request_timeout=60 * 10, - ) + result = subprocess.run(kubectl_exec_cmd, capture_output=True, text=True, timeout=60 * 10) + logging.debug(f"genai-perf exited with code {result.returncode}") with (job.test_run.output_path / "genai_perf.log").open("w") as f: - f.write(genai_results) - except lazy.k8s.client.ApiException as e: - logging.error(f"Error executing genai-perf command in pod '{frontend_pod}': {e}") + f.write(result.stdout) + if result.stderr: + f.write("\nSTDERR:\n") + f.write(result.stderr) + except Exception as e: + logging.debug(f"Error executing genai-perf command in pod '{frontend_pod}': {e}") cp_logs_cmd = " ".join( [