diff --git a/.github/workflows/agent-ci.yaml b/.github/workflows/agent-ci.yaml index 6261ed4e..cb3c18ee 100644 --- a/.github/workflows/agent-ci.yaml +++ b/.github/workflows/agent-ci.yaml @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # @@ -36,6 +36,52 @@ env: REGISTRY: ghcr.io IMAGE_NAME: ${{ github.repository }} jobs: + compute-metadata: + name: Compute Image Metadata + runs-on: ubuntu-latest + outputs: + git-sha: ${{ steps.meta.outputs.git-sha }} + agent-version: ${{ steps.meta.outputs.agent-version }} + agent-image-tag: ${{ steps.meta.outputs.agent-image-tag }} + tags: ${{ steps.meta.outputs.tags }} + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Fetch all tags + run: git fetch --tags --force + - name: Compute metadata + id: meta + run: | + export GIT_SHA=$(git rev-parse --short ${{ github.sha }}) + echo "git-sha=${GIT_SHA}" >> $GITHUB_OUTPUT + + case ${{ github.ref_type }} in + branch) + # The last tag + current git sha + export AGENT_VERSION=$(git tag --list 'agent*' --sort=-v:refname | head -n 1 | cut -d/ -f2)+${GIT_SHA} + # Convert + to - for docker tag compliance + export AGENT_IMAGE_TAG=$(echo "${AGENT_VERSION}" | tr + -) + TAGS="-t ${REGISTRY@L}/${{ github.repository }}/agent:${GIT_SHA} -t ${REGISTRY@L}/${{ github.repository }}/agent:${AGENT_IMAGE_TAG}" + ;; + tag) + # The version part of the tag + export AGENT_VERSION=$(echo "${{ github.ref_name }}" | cut -f 2 -d /) + export AGENT_IMAGE_TAG="${AGENT_VERSION}" + TAGS="-t ${REGISTRY@L}/${{ github.repository }}/agent:${GIT_SHA} -t ${REGISTRY@L}/${{ github.repository }}/agent:${AGENT_VERSION} -t ${REGISTRY@L}/${{ github.repository }}/agent:latest" + ;; + *) + echo "Unknown type ${{ github.ref_type }}" + exit 1 + ;; + esac + + echo "agent-version=${AGENT_VERSION}" >> $GITHUB_OUTPUT + echo "agent-image-tag=${AGENT_IMAGE_TAG}" >> $GITHUB_OUTPUT + echo "tags=${TAGS}" >> $GITHUB_OUTPUT + echo "📦 Agent Version: ${AGENT_VERSION}" + echo "🏷️ Image Tag: ${AGENT_IMAGE_TAG}" + echo "🏷️ All Tags: ${TAGS}" + test: name: Skyhook Agent Unit Tests runs-on: ubuntu-latest @@ -62,7 +108,7 @@ jobs: cat test-summary.md >> $GITHUB_STEP_SUMMARY build-and-push-agent: runs-on: ubuntu-latest - needs: [test] # Don't run the build and push if the unit tests fail + needs: [test, compute-metadata] # Don't run the build and push if the unit tests fail # Sets the permissions granted to the `GITHUB_TOKEN` for the actions in this job. permissions: contents: read @@ -92,38 +138,21 @@ jobs: - name: Build the agent container image id: build + env: + GIT_SHA: ${{ needs.compute-metadata.outputs.git-sha }} + AGENT_VERSION: ${{ needs.compute-metadata.outputs.agent-version }} + TAGS: ${{ needs.compute-metadata.outputs.tags }} run: | apt-get update && apt-get install -y make git jq cd agent - # if this is a tag build, use the tag as the version, otherwise use the sha - git fetch --all - export GIT_SHA=$(git rev-parse --short ${{ github.sha }}) - TAGS="-t ${REGISTRY@L}/${{env.IMAGE_NAME}}/agent:${GIT_SHA}" - case ${{ github.ref_type }} in - branch) - # The last tag + current git sha - export AGENT_VERSION=$(git tag --list 'agent*' --sort=-v:refname | head -n 1 | cut -d/ -f2)+${GIT_SHA} - TAGS="$TAGS -t ${REGISTRY@L}/${{env.IMAGE_NAME}}/agent:$(echo "${AGENT_VERSION}" | tr + -)" - ;; - tag) - # The version part of the tag - export AGENT_VERSION=$(echo "${{ github.ref_name }}" | cut -f 2 -d /) - TAGS="$TAGS -t ${REGISTRY@L}/${{env.IMAGE_NAME}}/agent:${AGENT_VERSION} -t ${REGISTRY@L}/${{env.IMAGE_NAME}}/agent:latest" - ;; - *) - echo "Unkown type ${{ github.ref_type }}" - exit 1 - ;; - esac - export TAGS=$TAGS + echo "📦 Building agent version: ${AGENT_VERSION}" + echo "🏷️ Tags: ${TAGS}" export REGISTRY=${REGISTRY@L} export BUILD_ARGS="--push" make docker-build-only agent_version=${AGENT_VERSION} cat metadata.json echo "digest=$(cat metadata.json | jq -r .\"containerimage.digest\")" >> $GITHUB_OUTPUT cat $GITHUB_OUTPUT - env: - AGENT_IMAGE: ${{env.IMAGE_NAME}}/agent # This step generates an artifact attestation for the image, which is an unforgeable statement about where and how it was built. It increases supply chain security for people who consume the image. For more information, see [AUTOTITLE](/actions/security-guides/using-artifact-attestations-to-establish-provenance-for-builds). - name: Generate artifact attestation @@ -132,3 +161,73 @@ jobs: subject-name: ${{ env.REGISTRY }}/${{env.IMAGE_NAME}}/agent subject-digest: ${{ steps.build.outputs.digest }} push-to-registry: true + + operator-agent-tests: + name: Operator Agent Integration Tests + runs-on: ubuntu-latest + needs: [compute-metadata, build-and-push-agent] + permissions: + contents: read + packages: read + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-tags: true + fetch-depth: 0 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: '1.25.5' + cache-dependency-path: operator/go.sum + + - name: Log in to the Container registry + uses: docker/login-action@v3 + with: + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Create Kubernetes KinD Cluster + uses: helm/kind-action@v1 + with: + version: v0.31.0 + node_image: kindest/node:v1.35.0 + config: operator/config/local-dev/kind-config.yaml + cluster_name: kind + + - name: Restore cached Binaries + id: cached-binaries + uses: actions/cache/restore@v4 + with: + key: 1.25.5-${{ runner.os }}-${{ runner.arch }}-bin-${{ hashFiles('operator/deps.mk') }} + restore-keys: 1.25.5-${{ runner.os }}-${{ runner.arch }}-bin- + path: | + ${{ github.workspace }}/operator/bin + ~/.cache/go-build + + - name: Install dependencies + if: steps.cached-binaries.outputs.cache-hit != 'true' + run: | + cd operator + make install-deps + + - name: Save cached Binaries + if: steps.cached-binaries.outputs.cache-hit != 'true' + uses: actions/cache/save@v4 + with: + key: 1.25.5-${{ runner.os }}-${{ runner.arch }}-bin-${{ hashFiles('operator/deps.mk') }} + path: | + ${{ github.workspace }}/operator/bin + ~/.cache/go-build + + - name: Run operator-agent tests + env: + AGENT_IMAGE: ${{ format('{0}/{1}/agent:{2}', env.REGISTRY, github.repository, needs.compute-metadata.outputs.agent-image-tag) }} + run: | + cd operator + export AGENT_IMAGE="${AGENT_IMAGE,,}" + echo "Testing with agent image: ${AGENT_IMAGE}" + make build-cli + make setup-kind-cluster operator-agent-tests diff --git a/agent/skyhook-agent/src/skyhook_agent/controller.py b/agent/skyhook-agent/src/skyhook_agent/controller.py index b87914c9..42c8da5b 100644 --- a/agent/skyhook-agent/src/skyhook_agent/controller.py +++ b/agent/skyhook-agent/src/skyhook_agent/controller.py @@ -17,6 +17,8 @@ # limitations under the License. +import contextlib +from multiprocessing import context import sys import os import shutil @@ -69,7 +71,9 @@ def _get_env_config() -> tuple[str]: SKYHOOK_LOG_DIR = os.getenv("SKYHOOK_LOG_DIR", "/var/log/skyhook") - return SKYHOOK_RESOURCE_ID, SKYHOOK_DATA_DIR, SKYHOOK_ROOT_DIR, SKYHOOK_LOG_DIR + SKYHOOK_AGENT_WRITE_LOGS = os.getenv("SKYHOOK_AGENT_WRITE_LOGS", "true").lower() == 'true' + + return SKYHOOK_RESOURCE_ID, SKYHOOK_DATA_DIR, SKYHOOK_ROOT_DIR, SKYHOOK_LOG_DIR, SKYHOOK_AGENT_WRITE_LOGS def _get_package_information(config_data: dict) -> tuple[str, str]: return config_data["package_name"], config_data["package_version"] @@ -129,15 +133,43 @@ async def _stream_process( sink.flush() break +# A file like context manager that black holes all writes to it. Does not need to implement read +class NullWriter: + """A file-like context manager that discards all writes.""" + + def write(self, *args, **kwargs): + # Swallow everything and return len to mimic file behaviour if needed + if args: + return len(args[0]) + return 0 + + def flush(self): + pass + + def close(self): + pass + + def __enter__(self): + return self -async def tee(chroot_dir: str, cmd: List[str], stdout_sink_path: str, stderr_sink_path: str, write_cmds=False, no_chmod=False, env: dict[str, str] = {}, **kwargs): + def __exit__(self, exc_type, exc_val, exc_tb): + # Nothing to cleanup, obviously + return False + + +async def tee(chroot_dir: str, cmd: List[str], stdout_sink_path: str, stderr_sink_path: str, write_cmds=False, no_chmod=False, env: dict[str, str] = {}, write_logs: bool=True, **kwargs): """ Run the cmd in a subprocess and keep the stream of stdout/stderr and merge both into the sink_path as a log. """ # get the directory of the script script_dir = os.path.dirname(os.path.abspath(__file__)) - with open(stdout_sink_path, "w") as stdout_sink_f, open(stderr_sink_path, "w") as stderr_sink_f: + # Switch out the opens with nulls in the event of not wanting to write files + if write_logs: + files = (lambda : open(stdout_sink_path, 'w'), lambda: open(stderr_sink_path, 'w')) + else: + files = (lambda: NullWriter(), lambda: NullWriter()) + with files[0]() as stdout_sink_f, files[1]() as stderr_sink_f: if write_cmds: sys.stdout.write(" ".join(cmd) + "\n") stdout_sink_f.write(" ".join(cmd) + "\n") @@ -172,7 +204,7 @@ def get_host_path_for_steps(copy_dir: str): return f"{copy_dir}/skyhook_dir" def get_skyhook_directory(root_mount: str) -> str: - _, _, SKYHOOK_ROOT_DIR, _ = _get_env_config() + _, _, SKYHOOK_ROOT_DIR, _, _ = _get_env_config() return f"{root_mount}{SKYHOOK_ROOT_DIR}" def get_flag_dir(root_mount: str) -> str: @@ -182,7 +214,7 @@ def get_history_dir(root_mount: str) -> str: return f"{get_skyhook_directory(root_mount)}/history" def get_log_dir(root_mount: str) -> str: - _, _, _, SKYHOOK_LOG_DIR = _get_env_config() + _, _, _, SKYHOOK_LOG_DIR, _ = _get_env_config() return f"{root_mount}{SKYHOOK_LOG_DIR}" def get_log_file(step_path: str, copy_dir: str, config_data: dict, root_mount: str, timestamp: str=None) -> str: @@ -220,21 +252,23 @@ def set_flag(flag_file: str, msg: str = "") -> None: f.write(msg) -def _run(chroot_dir: str, cmds: list[str], log_path: str, write_cmds=False, no_chmod=False, env: dict[str, str] = {}, **kwargs) -> int: +def _run(chroot_dir: str, cmds: list[str], log_path: str|None, write_cmds=False, no_chmod=False, env: dict[str, str] = {}, write_logs: bool=True, **kwargs) -> int: """ Synchronous wrapper around the tee command to have logs written to disk """ # "tee" the stdout and stderr to a file to log the step results + stderr_path = f"{log_path}.err" if log_path else None result = asyncio.run( tee( chroot_dir, cmds, log_path, - f"{log_path}.err", + stderr_path, write_cmds=write_cmds, no_chmod=no_chmod, env=env, + write_logs=write_logs, **kwargs ) ) @@ -283,7 +317,11 @@ def run_step( return True time.sleep(1) - log_file = get_log_file(step_path, copy_dir, config_data, chroot_dir) + _, _, _, _, SKYHOOK_AGENT_WRITE_LOGS = _get_env_config() + if SKYHOOK_AGENT_WRITE_LOGS: + log_file = get_log_file(step_path, copy_dir, config_data, chroot_dir) + else: + log_file = None # Compile additional environment variables env = {} @@ -294,9 +332,11 @@ def run_step( chroot_dir, [step_path, *step.arguments], log_file, - env=env) + env=env, + write_logs=SKYHOOK_AGENT_WRITE_LOGS) - cleanup_old_logs(get_log_file(step_path, copy_dir, config_data, "*")) + if SKYHOOK_AGENT_WRITE_LOGS: + cleanup_old_logs(get_log_file(step_path, copy_dir, config_data, chroot_dir, "*")) if return_code not in step.returncodes: print(f"FAILED: {step.path} {' '.join(step.arguments)} {return_code}") return True @@ -421,7 +461,7 @@ def summarize_check_results(results: list[bool], step_data: dict[Mode, list[Step return False def make_config_data_from_resource_id() -> dict: - SKYHOOK_RESOURCE_ID, _, _, _ = _get_env_config() + SKYHOOK_RESOURCE_ID, _, _, _, _ = _get_env_config() # Interrupts don't really have config data we can read from the Package as it is run standalone. # So read it off of SKYHOOK_RESOURCE_ID instead @@ -441,7 +481,7 @@ def do_interrupt(interrupt_data: str, root_mount: str, copy_dir: str) -> bool: def _make_interrupt_flag(interrupt_dir: str, interrupt_id: int) -> str: return f"{interrupt_dir}/{interrupt_id}.complete" - SKYHOOK_RESOURCE_ID, _, _, _ = _get_env_config() + SKYHOOK_RESOURCE_ID, _, _, _, _ = _get_env_config() config_data = make_config_data_from_resource_id() interrupt = interrupts.inflate(interrupt_data) @@ -509,7 +549,7 @@ def main(mode: Mode, root_mount: str, copy_dir: str, interrupt_data: None|str, a if mode == Mode.INTERRUPT: return do_interrupt(interrupt_data, root_mount, copy_dir) - _, SKYHOOK_DATA_DIR, _, _ = _get_env_config() + _, SKYHOOK_DATA_DIR, _, _, _ = _get_env_config() # Check to see if the directory has already been copied down. If it hasn't assume that we # are running in legacy mode and copy the directory down. @@ -651,12 +691,13 @@ def cli(sys_argv: list[str]=sys.argv): print(str.center("ENV CONFIGURATION", 20, "-")) print(f"COPY_RESOLV: {copy_resolv}") print(f"OVERLAY_ALWAYS_RUN_STEP: {always_run_step}") - SKYHOOK_RESOURCE_ID, SKYHOOK_DATA_DIR, SKYHOOK_ROOT_DIR, SKYHOOK_LOG_DIR = _get_env_config() + SKYHOOK_RESOURCE_ID, SKYHOOK_DATA_DIR, SKYHOOK_ROOT_DIR, SKYHOOK_LOG_DIR, SKYHOOK_AGENT_WRITE_LOGS = _get_env_config() print(f"SKYHOOK_RESOURCE_ID: {SKYHOOK_RESOURCE_ID}") print(f"SKYHOOK_DATA_DIR: {SKYHOOK_DATA_DIR}") print(f"SKYHOOK_ROOT_DIR: {SKYHOOK_ROOT_DIR}") print(f"SKYHOOK_LOG_DIR: {SKYHOOK_LOG_DIR}") print(f"SKYHOOK_AGENT_BUFFER_LIMIT: {buff_size}") + print(f"SKYHOOK_AGENT_WRITE_LOGS: {SKYHOOK_AGENT_WRITE_LOGS}") print(str.center("Directory CONFIGURATION", 20, "-")) # print flag dir and log dir config_data = make_config_data_from_resource_id() diff --git a/agent/skyhook-agent/tests/test_controller.py b/agent/skyhook-agent/tests/test_controller.py index c49995cb..de19a018 100644 --- a/agent/skyhook-agent/tests/test_controller.py +++ b/agent/skyhook-agent/tests/test_controller.py @@ -23,6 +23,8 @@ import asyncio import textwrap import shutil +import glob +import time from datetime import datetime, timezone @@ -92,6 +94,29 @@ class TestHelpers(unittest.TestCase): def setUp(self): self.config_data = {"package_name": "foo", "package_version": "1.0.0"} + def test_nullwriter_discards_writes(self): + """Test that NullWriter discards all writes and behaves like a file.""" + writer = controller.NullWriter() + + # Test write returns length + result = writer.write("test data") + self.assertEqual(result, 9) + + # Test write with empty string + result = writer.write("") + self.assertEqual(result, 0) + + # Test flush and close don't raise + writer.flush() + writer.close() + + def test_nullwriter_context_manager(self): + """Test that NullWriter works as a context manager.""" + with controller.NullWriter() as writer: + writer.write("test") + writer.flush() + # Should exit cleanly without errors + def test_make_flag_path_uses_args(self): path_a = controller.make_flag_path(Step("foo.sh", arguments=["1", "2"], returncodes=(0, 1, 2)), self.config_data, "root_mount") path_b = controller.make_flag_path(Step("foo.sh", arguments=["1"], returncodes=(0, 1, 2)), self.config_data, "root_mount") @@ -176,8 +201,7 @@ def test_make_flag_path_has_package_name(self): @mock.patch("skyhook_agent.controller.get_log_file") @mock.patch("skyhook_agent.controller.subprocess") @mock.patch("skyhook_agent.controller.tee") - @mock.patch("skyhook_agent.controller.os") - def test_run_step_is_successful(self, os_mock, tee_mock, subprocess_mock, log_mock, cleanup_mock): + def test_run_step_is_successful(self, tee_mock, subprocess_mock, log_mock, cleanup_mock): subprocess_mock.run.return_value = FakeSubprocessResult(0) tee_mock.return_value = FakeSubprocessResult(0) @@ -197,7 +221,8 @@ def test_run_step_is_successful(self, os_mock, tee_mock, subprocess_mock, log_mo f"{log_file}.err", env={"STEP_ROOT": "copy_dir/skyhook_dir", "SKYHOOK_DIR": "copy_dir"}, write_cmds=False, - no_chmod=False + no_chmod=False, + write_logs=True ) ] ) @@ -246,7 +271,8 @@ def test_run_step_replaces_environment_variables( f"{log_file}.err", write_cmds=False, no_chmod=False, - env={"STEP_ROOT": "copy_dir/skyhook_dir", "SKYHOOK_DIR": "copy_dir"} + env={"STEP_ROOT": "copy_dir/skyhook_dir", "SKYHOOK_DIR": "copy_dir"}, + write_logs=True ) ] ) @@ -692,7 +718,8 @@ def test_from_and_to_version_is_given_to_upgrade_step_as_env_var(self, run_mock, ), env=dict( **{"PREVIOUS_VERSION": "0.0.9", "CURRENT_VERSION": "1.0.0"}, - **{"STEP_ROOT": f"{root_dir}/{copy_dir}/skyhook_dir", "SKYHOOK_DIR": copy_dir}) + **{"STEP_ROOT": f"{root_dir}/{copy_dir}/skyhook_dir", "SKYHOOK_DIR": copy_dir}), + write_logs=True ) ]) @@ -731,7 +758,8 @@ def test_from_and_to_version_is_given_to_upgradestep_class_as_env_var_and_args(s ), env=dict( **{"PREVIOUS_VERSION": "2024.07.28", "CURRENT_VERSION": "1.0.0"}, - **{"STEP_ROOT": f"{root_dir}/{copy_dir}/skyhook_dir", "SKYHOOK_DIR": copy_dir}) + **{"STEP_ROOT": f"{root_dir}/{copy_dir}/skyhook_dir", "SKYHOOK_DIR": copy_dir}), + write_logs=True ) ]) @@ -1360,16 +1388,196 @@ def test_get_env_config(self): SKYHOOK_RESOURCE_ID="resource_id", SKYHOOK_DATA_DIR="data_dir", SKYHOOK_ROOT_DIR="skyhook_dir", - SKYHOOK_LOG_DIR="log_dir"): - SKYHOOK_RESOURCE_ID, SKYHOOK_DATA_DIR, SKYHOOK_ROOT_DIR, SKYHOOK_LOG_DIR = controller._get_env_config() + SKYHOOK_LOG_DIR="log_dir", + SKYHOOK_AGENT_WRITE_LOGS="false"): + SKYHOOK_RESOURCE_ID, SKYHOOK_DATA_DIR, SKYHOOK_ROOT_DIR, SKYHOOK_LOG_DIR, SKYHOOK_AGENT_WRITE_LOGS = controller._get_env_config() self.assertEqual(SKYHOOK_RESOURCE_ID, "resource_id") self.assertEqual(SKYHOOK_DATA_DIR, "data_dir") self.assertEqual(SKYHOOK_ROOT_DIR, "skyhook_dir") self.assertEqual(SKYHOOK_LOG_DIR, "log_dir") + self.assertFalse(SKYHOOK_AGENT_WRITE_LOGS) # Test the default values - SKYHOOK_RESOURCE_ID, SKYHOOK_DATA_DIR, SKYHOOK_ROOT_DIR, SKYHOOK_LOG_DIR = controller._get_env_config() + SKYHOOK_RESOURCE_ID, SKYHOOK_DATA_DIR, SKYHOOK_ROOT_DIR, SKYHOOK_LOG_DIR, SKYHOOK_AGENT_WRITE_LOGS = controller._get_env_config() self.assertEqual(SKYHOOK_RESOURCE_ID, "") self.assertEqual(SKYHOOK_DATA_DIR, "/skyhook-package") self.assertEqual(SKYHOOK_ROOT_DIR, "/etc/skyhook") self.assertEqual(SKYHOOK_LOG_DIR, "/var/log/skyhook") + self.assertTrue(SKYHOOK_AGENT_WRITE_LOGS) # Default should be True + + def test_get_env_config_write_logs_variations(self): + """Test SKYHOOK_AGENT_WRITE_LOGS with different values.""" + # Test "true" value + with set_env(SKYHOOK_AGENT_WRITE_LOGS="true"): + *_, SKYHOOK_AGENT_WRITE_LOGS = controller._get_env_config() + self.assertTrue(SKYHOOK_AGENT_WRITE_LOGS) + + # Test "True" value (case insensitive) + with set_env(SKYHOOK_AGENT_WRITE_LOGS="True"): + *_, SKYHOOK_AGENT_WRITE_LOGS = controller._get_env_config() + self.assertTrue(SKYHOOK_AGENT_WRITE_LOGS) + + # Test "false" value + with set_env(SKYHOOK_AGENT_WRITE_LOGS="false"): + *_, SKYHOOK_AGENT_WRITE_LOGS = controller._get_env_config() + self.assertFalse(SKYHOOK_AGENT_WRITE_LOGS) + + # Test "False" value (case insensitive) + with set_env(SKYHOOK_AGENT_WRITE_LOGS="False"): + *_, SKYHOOK_AGENT_WRITE_LOGS = controller._get_env_config() + self.assertFalse(SKYHOOK_AGENT_WRITE_LOGS) + + # Test other values default to false + with set_env(SKYHOOK_AGENT_WRITE_LOGS="anything"): + *_, SKYHOOK_AGENT_WRITE_LOGS = controller._get_env_config() + self.assertFalse(SKYHOOK_AGENT_WRITE_LOGS) + + @mock.patch("skyhook_agent.controller.cleanup_old_logs") + @mock.patch("skyhook_agent.controller.tee") + def test_run_step_with_write_logs_false(self, tee_mock, cleanup_mock): + """Test that run_step does not write log files when SKYHOOK_AGENT_WRITE_LOGS is false.""" + tee_mock.return_value = FakeSubprocessResult(0) + + with set_env(SKYHOOK_AGENT_WRITE_LOGS="false"): + run_step_result = controller.run_step( + Step("foo", arguments=["a", "b"], returncodes=[0]), "chroot_dir", "copy_dir", self.config_data + ) + + self.assertFalse(run_step_result) + + # Verify tee was called with write_logs=False and None log paths + tee_mock.assert_has_calls( + [ + mock.call( + "chroot_dir", + ["copy_dir/skyhook_dir/foo", "a", "b"], + None, + None, + env={"STEP_ROOT": "copy_dir/skyhook_dir", "SKYHOOK_DIR": "copy_dir"}, + write_cmds=False, + no_chmod=False, + write_logs=False + ) + ] + ) + # cleanup_old_logs should not be called when write_logs is False + cleanup_mock.assert_not_called() + + @mock.patch("skyhook_agent.controller.cleanup_old_logs") + @mock.patch("skyhook_agent.controller.get_log_file") + @mock.patch("skyhook_agent.controller.tee") + def test_run_step_with_write_logs_true(self, tee_mock, get_log_file_mock, cleanup_mock): + """Test that run_step writes log files when SKYHOOK_AGENT_WRITE_LOGS is true.""" + tee_mock.return_value = FakeSubprocessResult(0) + get_log_file_mock.return_value = "/log/file.log" + + with set_env(SKYHOOK_AGENT_WRITE_LOGS="true"): + run_step_result = controller.run_step( + Step("foo", arguments=["a", "b"], returncodes=[0]), "chroot_dir", "copy_dir", self.config_data + ) + + self.assertFalse(run_step_result) + + # Verify tee was called with the log file path and write_logs=True + tee_mock.assert_has_calls( + [ + mock.call( + "chroot_dir", + ["copy_dir/skyhook_dir/foo", "a", "b"], + "/log/file.log", + "/log/file.log.err", + env={"STEP_ROOT": "copy_dir/skyhook_dir", "SKYHOOK_DIR": "copy_dir"}, + write_cmds=False, + no_chmod=False, + write_logs=True + ) + ] + ) + # cleanup_old_logs should be called when write_logs is True + cleanup_mock.assert_called_once() + + @mock.patch("skyhook_agent.controller.sys") + def test_tee_with_nullwriter_when_write_logs_false(self, sys_mock): + """Test that tee uses NullWriter when write_logs is False.""" + sys_mock.stdout = FakeIO() + sys_mock.stderr = FakeIO() + sys_mock.executable = sys.executable + + with tempfile.TemporaryDirectory() as dir: + stdout_path = f"{dir}/stdout.log" + stderr_path = f"{dir}/stderr.log" + + # Run tee with write_logs=False + result = asyncio.run( + controller.tee("", ["echo", "test"], stdout_path, stderr_path, write_logs=False) + ) + + # Log files should not be created + self.assertFalse(os.path.exists(stdout_path)) + self.assertFalse(os.path.exists(stderr_path)) + + def test_cleanup_old_logs_keeps_only_5_files(self): + """Test that cleanup_old_logs removes all but the 5 most recent log files.""" + with tempfile.TemporaryDirectory() as temp_dir: + # Create directory structure for logs + log_dir = f"{temp_dir}/var/log/skyhook/foo/1.0.0" + os.makedirs(log_dir, exist_ok=True) + + # Create a simple step script that succeeds + step_dir = f"{temp_dir}/skyhook_dir" + os.makedirs(step_dir, exist_ok=True) + step_path = f"{step_dir}/test_step.sh" + with open(step_path, "w") as f: + f.write("#!/bin/sh\necho 'test output'\nexit 0\n") + os.chmod(step_path, os.stat(step_path).st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH) + + # Track log files created + log_files_created = [] + + # Mock get_log_file and get_host_path_for_steps to use our temp directories + def mock_get_log_file(step_path_arg, copy_dir, config_data, root_mount, timestamp=None): + if timestamp is None: + timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d-%H%M%S") + log_file = f"{log_dir}/test_step.sh-{timestamp}.log" + # Only track actual log files, not glob patterns + if timestamp != "*": + log_files_created.append(log_file) + return log_file + + # Run run_step 6 times with delays to ensure different timestamps + # Use chroot_dir="local" to avoid permission issues with chroot + with mock.patch("skyhook_agent.controller.get_log_file", side_effect=mock_get_log_file), \ + mock.patch("skyhook_agent.controller.get_host_path_for_steps", return_value=step_dir), \ + mock.patch("skyhook_agent.controller.get_log_dir", return_value=log_dir): + + for i in range(6): + # Small delay to ensure different timestamps and file modification times + time.sleep(0.05) + + result = controller.run_step( + Step("test_step.sh", arguments=[], returncodes=[0]), + "local", # chroot_dir - "local" skips actual chroot + temp_dir, # copy_dir + self.config_data + ) + self.assertFalse(result, f"Step {i+1} should have succeeded") + + # After 6 runs with cleanup, there should be exactly 5 log files + actual_log_files = sorted(glob.glob(f"{log_dir}/test_step.sh-*.log")) + self.assertEqual(len(actual_log_files), 5, + f"Expected 5 log files after 6 runs, but found {len(actual_log_files)}: {actual_log_files}") + + # Verify the oldest log file was removed + self.assertFalse(os.path.exists(log_files_created[0]), + f"The oldest log file {log_files_created[0]} should have been removed") + + # Verify the 5 most recent log files remain + for log_file in log_files_created[1:]: + self.assertTrue(os.path.exists(log_file), + f"Recent log file {log_file} should still exist") + + # Verify stderr files also exist for remaining logs + for log_file in actual_log_files: + stderr_file = f"{log_file}.err" + self.assertTrue(os.path.exists(stderr_file), + f"Stderr file {stderr_file} should exist") \ No newline at end of file diff --git a/containers/agent.Dockerfile b/containers/agent.Dockerfile index 419934c3..feda18f9 100644 --- a/containers/agent.Dockerfile +++ b/containers/agent.Dockerfile @@ -36,13 +36,13 @@ RUN make build build_version=${AGENT_VERSION} # Install the wheel in the builder stage RUN python3 -m venv venv && ./venv/bin/pip install /code/skyhook-agent/dist/skyhook_agent*.whl -FROM nvcr.io/nvidia/distroless/python:3.12-v3.4.15 +FROM nvcr.io/nvidia/distroless/python:3.12-v3.5.2 ARG AGENT_VERSION ARG GIT_SHA ## https://github.com/opencontainers/image-spec/blob/main/annotations.md -LABEL org.opencontainers.image.base.name="nvcr.io/nvidia/distroless/python:3.12-v3.4.15" \ +LABEL org.opencontainers.image.base.name="nvcr.io/nvidia/distroless/python:3.12-v3.5.2" \ org.opencontainers.image.licenses="Apache-2.0" \ org.opencontainers.image.title="skyhook-agent" \ org.opencontainers.image.version="${AGENT_VERSION}" \ diff --git a/k8s-tests/operator-agent/check_node.sh b/k8s-tests/operator-agent/check_node.sh index 3fb487e2..c1266689 100755 --- a/k8s-tests/operator-agent/check_node.sh +++ b/k8s-tests/operator-agent/check_node.sh @@ -23,12 +23,17 @@ node=$1 cmd=$2 check=$3 timeout=${4:-10} +invert=${5:-false} # loop until the command returns a non-zero exit code or the timeout is reached for i in $(seq 1 ${timeout}); do data=$(kubectl exec ${node}-debugger -- chroot /host bash -c "${cmd}") - if echo "${data}" | grep -q "${check}"; then + check_result=$(echo "${data}" | grep -c "${check}") + if [ "$invert" == "true" ]; then + check_result=$((! check_result)) + fi + if [ $check_result -gt 0 ]; then echo "Check passed" exit 0 else diff --git a/k8s-tests/operator-agent/dont_write_logs/assert.yaml b/k8s-tests/operator-agent/dont_write_logs/assert.yaml new file mode 100644 index 00000000..9b0206a7 --- /dev/null +++ b/k8s-tests/operator-agent/dont_write_logs/assert.yaml @@ -0,0 +1,43 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +apiVersion: v1 +kind: Node +metadata: + labels: + skyhook.nvidia.com/test-node: skyhooke2e + skyhook.nvidia.com/status_dont-write-logs-agent-operator: complete + annotations: + skyhook.nvidia.com/status_dont-write-logs-agent-operator: complete +status: + (conditions[?type == 'skyhook.nvidia.com/dont-write-logs-agent-operator/NotReady']): + - reason: "Complete" + status: "False" + (conditions[?type == 'skyhook.nvidia.com/dont-write-logs-agent-operator/Erroring']): + - reason: "Not Erroring" + status: "False" +--- +apiVersion: skyhook.nvidia.com/v1alpha1 +kind: Skyhook +metadata: + name: dont-write-logs-agent-operator +status: + status: complete + nodeStatus: + # grab values should be one and is complete + (values(@)): + - complete diff --git a/k8s-tests/operator-agent/dont_write_logs/chainsaw-test.yaml b/k8s-tests/operator-agent/dont_write_logs/chainsaw-test.yaml new file mode 100644 index 00000000..fcc40cd2 --- /dev/null +++ b/k8s-tests/operator-agent/dont_write_logs/chainsaw-test.yaml @@ -0,0 +1,52 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# yaml-language-server: $schema=https://raw.githubusercontent.com/kyverno/chainsaw/main/.schemas/json/test-chainsaw-v1alpha1.json +apiVersion: chainsaw.kyverno.io/v1alpha1 +kind: Test +metadata: + name: dont-write-logs-agent-operator +spec: + timeouts: + assert: 240s + exec: 90s + steps: + - try: + - script: + content: | + ## remove annotation from last run + ../../../operator/bin/skyhook reset dont-write-logs-agent-operator --confirm 2>/dev/null || true + - script: + content: | + ## reinstall the debug pod in case it was deleted + ../setup.sh kind-worker setup + - script: + content: | + ## clean up any logs from prior runs + ../check_node.sh kind-worker "rm -rf /var/log/skyhook/dont-write-logs-agent-operator || true" ".*" 2 + ../check_node.sh kind-worker "rm -rf /var/lib/skyhook/dont-write-logs-agent-operator || true" ".*" 2 + - apply: + file: skyhook.yaml + - assert: + file: assert.yaml + - script: + content: | + set -e + ## Wont even create the log directory + ../check_node.sh kind-worker "ls /var/log/skyhook/" "dont-write-logs-agent-operator" 2 true + - finally: + - delete: + file: skyhook.yaml diff --git a/k8s-tests/operator-agent/dont_write_logs/skyhook.yaml b/k8s-tests/operator-agent/dont_write_logs/skyhook.yaml new file mode 100644 index 00000000..a3b89ce8 --- /dev/null +++ b/k8s-tests/operator-agent/dont_write_logs/skyhook.yaml @@ -0,0 +1,43 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: skyhook.nvidia.com/v1alpha1 +kind: Skyhook +metadata: + labels: + app.kubernetes.io/part-of: skyhook-operator + app.kubernetes.io/created-by: skyhook-operator + name: dont-write-logs-agent-operator +spec: + nodeSelectors: + matchLabels: + skyhook.nvidia.com/test-node: skyhooke2e + packages: + shellscript: + version: "1.1.1" + image: ghcr.io/nvidia/skyhook-packages/shellscript + env: + - name: SKYHOOK_AGENT_WRITE_LOGS + value: "false" + configMap: + apply.sh: | + #!/bin/bash + echo "Hello, world!" + + cat $SKYHOOK_DIR/configmaps/extra.txt + + extra.txt: | + This is a test diff --git a/k8s-tests/operator-agent/interrupt/chainsaw-test.yaml b/k8s-tests/operator-agent/interrupt/chainsaw-test.yaml index b8b86ce7..609aecf2 100644 --- a/k8s-tests/operator-agent/interrupt/chainsaw-test.yaml +++ b/k8s-tests/operator-agent/interrupt/chainsaw-test.yaml @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # @@ -14,7 +14,6 @@ # See the License for the specific language governing permissions and # limitations under the License. - # yaml-language-server: $schema=https://raw.githubusercontent.com/kyverno/chainsaw/main/.schemas/json/test-chainsaw-v1alpha1.json apiVersion: chainsaw.kyverno.io/v1alpha1 kind: Test @@ -29,15 +28,21 @@ spec: - script: content: | ## remove annotation from last run - ../../../../operator/bin/skyhook reset interrupt-agent-operator --confirm 2>/dev/null || true + ../../../operator/bin/skyhook reset interrupt-agent-operator --confirm 2>/dev/null || true - script: content: | ## reinstall the debug pod in case it was deleted ../setup.sh kind-worker setup + - script: + content: | + ## clean up from prior runs + ../check_node.sh kind-worker "rm -rf /var/log/skyhook/interrupt-agent-operator || true" ".*" 2 + ../check_node.sh kind-worker "rm -rf /var/lib/skyhook/interrupt-agent-operator || true" ".*" 2 - apply: file: skyhook.yaml - script: content: | + set -e ../check_node.sh kind-worker "ls /var/lib/skyhook/interrupt-agent-operator/interrupts/flags/interrupt-agent-operator*/" "no_op.complete" 60 - assert: file: assert.yaml diff --git a/k8s-tests/operator-agent/interrupt/skyhook.yaml b/k8s-tests/operator-agent/interrupt/skyhook.yaml index ca004bbd..13c3d3bd 100644 --- a/k8s-tests/operator-agent/interrupt/skyhook.yaml +++ b/k8s-tests/operator-agent/interrupt/skyhook.yaml @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # @@ -14,7 +14,6 @@ # See the License for the specific language governing permissions and # limitations under the License. - apiVersion: skyhook.nvidia.com/v1alpha1 kind: Skyhook metadata: @@ -33,6 +32,6 @@ spec: interrupt: type: noop configMap: - config.sh: | + apply.sh: | #!/bin/bash echo "Hello, world!" diff --git a/k8s-tests/operator-agent/reap_old_logs/assert.yaml b/k8s-tests/operator-agent/reap_old_logs/assert.yaml new file mode 100644 index 00000000..a43c8ec7 --- /dev/null +++ b/k8s-tests/operator-agent/reap_old_logs/assert.yaml @@ -0,0 +1,43 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +apiVersion: v1 +kind: Node +metadata: + labels: + skyhook.nvidia.com/test-node: skyhooke2e + skyhook.nvidia.com/status_reap-old-logs-agent-operator: complete + annotations: + skyhook.nvidia.com/status_reap-old-logs-agent-operator: complete +status: + (conditions[?type == 'skyhook.nvidia.com/reap-old-logs-agent-operator/NotReady']): + - reason: "Complete" + status: "False" + (conditions[?type == 'skyhook.nvidia.com/reap-old-logs-agent-operator/Erroring']): + - reason: "Not Erroring" + status: "False" +--- +apiVersion: skyhook.nvidia.com/v1alpha1 +kind: Skyhook +metadata: + name: reap-old-logs-agent-operator +status: + status: complete + nodeStatus: + # grab values should be one and is complete + (values(@)): + - complete diff --git a/k8s-tests/operator-agent/reap_old_logs/chainsaw-test.yaml b/k8s-tests/operator-agent/reap_old_logs/chainsaw-test.yaml new file mode 100644 index 00000000..948c8811 --- /dev/null +++ b/k8s-tests/operator-agent/reap_old_logs/chainsaw-test.yaml @@ -0,0 +1,50 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# yaml-language-server: $schema=https://raw.githubusercontent.com/kyverno/chainsaw/main/.schemas/json/test-chainsaw-v1alpha1.json +apiVersion: chainsaw.kyverno.io/v1alpha1 +kind: Test +metadata: + name: reap-old-logs-agent-operator +spec: + timeouts: + assert: 240s + exec: 90s + steps: + - try: + - script: + content: | + ## remove annotation from last run + ../../../operator/bin/skyhook reset reap-old-logs-agent-operator --confirm + - script: + content: | + ## reinstall the debug pod in case it was deleted + ../setup.sh kind-worker setup + - script: + content: | + ## clean up from prior runs + ../check_node.sh kind-worker "rm -rf /var/log/skyhook/reap-old-logs-agent-operator || true" ".*" 2 + ../check_node.sh kind-worker "rm -rf /var/lib/skyhook/reap-old-logs-agent-operator || true" ".*" 2 + - apply: + file: skyhook.yaml + - assert: + file: assert.yaml + - script: + content: | + ../check_node.sh kind-worker "ls /var/log/skyhook/reap-old-logs-agent-operator/shellscript/1.1.1/*.log | wc -l" "5" 2 + - finally: + - delete: + file: skyhook.yaml diff --git a/k8s-tests/operator-agent/reap_old_logs/skyhook.yaml b/k8s-tests/operator-agent/reap_old_logs/skyhook.yaml new file mode 100644 index 00000000..fb0eed0f --- /dev/null +++ b/k8s-tests/operator-agent/reap_old_logs/skyhook.yaml @@ -0,0 +1,42 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: skyhook.nvidia.com/v1alpha1 +kind: Skyhook +metadata: + labels: + app.kubernetes.io/part-of: skyhook-operator + app.kubernetes.io/created-by: skyhook-operator + name: reap-old-logs-agent-operator +spec: + nodeSelectors: + matchLabels: + skyhook.nvidia.com/test-node: skyhooke2e + packages: + shellscript: + version: "1.1.1" + image: ghcr.io/nvidia/skyhook-packages/shellscript + configMap: + apply.sh: |- + #!/bin/bash + sleep 1 + if [ $(ls /var/log/skyhook/reap-old-logs-agent-operator/shellscript/1.1.1/*.log | wc -l) -eq 5 ]; then + echo "5 logs found. After this should still be 5." + exit 0 + else + echo "Not enough logs yet. Erroring to produce more." + exit 1 + fi diff --git a/k8s-tests/operator-agent/simple/chainsaw-test.yaml b/k8s-tests/operator-agent/simple/chainsaw-test.yaml index 2f5ed23a..69ecbefd 100644 --- a/k8s-tests/operator-agent/simple/chainsaw-test.yaml +++ b/k8s-tests/operator-agent/simple/chainsaw-test.yaml @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # @@ -14,7 +14,6 @@ # See the License for the specific language governing permissions and # limitations under the License. - # yaml-language-server: $schema=https://raw.githubusercontent.com/kyverno/chainsaw/main/.schemas/json/test-chainsaw-v1alpha1.json apiVersion: chainsaw.kyverno.io/v1alpha1 kind: Test @@ -29,11 +28,16 @@ spec: - script: content: | ## remove annotation from last run - ../../../../operator/bin/skyhook reset simple-agent-operator --confirm 2>/dev/null || true + ../../../operator/bin/skyhook reset simple-agent-operator --confirm 2>/dev/null || true - script: content: | ## reinstall the debug pod in case it was deleted ../setup.sh kind-worker setup + - script: + content: | + ## clean up from prior runs + ../check_node.sh kind-worker "rm -rf /var/log/skyhook/simple-agent-operator || true" ".*" 2 + ../check_node.sh kind-worker "rm -rf /var/lib/skyhook/simple-agent-operator || true" ".*" 2 - apply: file: skyhook.yaml - assert: diff --git a/k8s-tests/operator-agent/simple/skyhook.yaml b/k8s-tests/operator-agent/simple/skyhook.yaml index 9cac96a1..6a359912 100644 --- a/k8s-tests/operator-agent/simple/skyhook.yaml +++ b/k8s-tests/operator-agent/simple/skyhook.yaml @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 # # @@ -14,7 +14,6 @@ # See the License for the specific language governing permissions and # limitations under the License. - apiVersion: skyhook.nvidia.com/v1alpha1 kind: Skyhook metadata: @@ -31,7 +30,7 @@ spec: version: "1.1.1" image: ghcr.io/nvidia/skyhook-packages/shellscript configMap: - config.sh: | + apply.sh: | #!/bin/bash echo "Hello, world!" diff --git a/operator/Makefile b/operator/Makefile index 7b2e6b6c..59f45dd5 100644 --- a/operator/Makefile +++ b/operator/Makefile @@ -228,8 +228,14 @@ helm-tests: helm chainsaw ensure-test-symlinks $(CHAINSAW) test --test-dir ../k8s-tests/chainsaw/helm $(CHAINSAW_ARGS) operator-agent-tests: chainsaw install ## Run operator agent tests. + @if [ -z "$(AGENT_IMAGE)" ]; then \ + echo "Error: AGENT_IMAGE is not set. Please set it to the agent image to test against."; \ + echo "Example: AGENT_IMAGE=ghcr.io/nvidia/skyhook/agent:v6.3.1 make operator-agent-tests"; \ + exit 1; \ + fi + @echo "Running operator-agent tests with AGENT_IMAGE=$(AGENT_IMAGE)" ../k8s-tests/operator-agent/setup.sh kind-worker setup - AGENT_IMAGE=ghcr.io/nvidia/skyhook/agent:v6.2.0-30d8b7a $(MAKE) run + $(MAKE) run $(CHAINSAW) test --test-dir ../k8s-tests/operator-agent $(CHAINSAW_ARGS) $(MAKE) kill ## ../k8s-tests/operator-agent/setup.sh kind-worker teardown