From 157a4ffd1db0f741b06c5ff68aa0ffd129487d93 Mon Sep 17 00:00:00 2001 From: openhands Date: Tue, 17 Feb 2026 04:13:03 +0000 Subject: [PATCH 1/3] fix: use shallow clone to prevent reward hacking via git history Use --depth 1 when cloning repositories to prevent agents from accessing git history and exploiting it to retrieve original function implementations that were stripped out. This addresses a reward hacking vulnerability where agents can use git log/diff/show commands to find and copy original implementations instead of writing them from scratch. For Commit0Spec, also fetch the specific env_setup_commit with --depth 1 before resetting to it, since shallow clone only gets the default branch tip. Co-authored-by: openhands --- commit0/harness/spec.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/commit0/harness/spec.py b/commit0/harness/spec.py index 7a71118..243191b 100644 --- a/commit0/harness/spec.py +++ b/commit0/harness/spec.py @@ -114,9 +114,12 @@ def make_repo_script_list(self) -> list[str]: base_commit = self.instance["base_commit"] setup_commands = [ - f"git clone -o origin https://github.com/{repo} {self.repo_directory}", + # Use --depth 1 for shallow clone to prevent agents from accessing + # git history and exploiting it to retrieve original implementations + f"git clone --depth 1 -o origin https://github.com/{repo} {self.repo_directory}", f"chmod -R 777 {self.repo_directory}", # So nonroot user can run tests f"cd {self.repo_directory}", + f"git fetch --depth 1 origin {env_setup_commit}", f"git reset --hard {env_setup_commit}", # Remove the remote so the agent won't see newer commits. "git remote remove origin", @@ -218,7 +221,9 @@ def make_repo_script_list(self) -> list[str]: specs["python"] = 3.7 setup_commands = [ - f"git clone -o origin https://github.com/{repo} {self.repo_directory}", + # Use --depth 1 for shallow clone to prevent agents from accessing + # git history and exploiting it to retrieve original implementations + f"git clone --depth 1 -o origin https://github.com/{repo} {self.repo_directory}", f"chmod -R 777 {self.repo_directory}", # So nonroot user can run tests f"cd {self.repo_directory}", # Remove the remote so the agent won't see newer commits. From 07ad9d3cc645eb913bac2f0778ae6c5bf2236179 Mon Sep 17 00:00:00 2001 From: openhands Date: Wed, 18 Feb 2026 02:46:23 +0000 Subject: [PATCH 2/3] fix: fetch base_commit in shallow clone to enable git reset The shallow clone only fetches the latest commit, but the setup scripts need access to both env_setup_commit (reference_commit) and base_commit for the git reset operations. - Commit0Spec: Fetch both env_setup_commit and base_commit before removing remote - SWEBenchSpec: Fetch base_commit needed for eval script reset Co-authored-by: openhands --- commit0/harness/spec.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/commit0/harness/spec.py b/commit0/harness/spec.py index 243191b..e879cb8 100644 --- a/commit0/harness/spec.py +++ b/commit0/harness/spec.py @@ -119,7 +119,8 @@ def make_repo_script_list(self) -> list[str]: f"git clone --depth 1 -o origin https://github.com/{repo} {self.repo_directory}", f"chmod -R 777 {self.repo_directory}", # So nonroot user can run tests f"cd {self.repo_directory}", - f"git fetch --depth 1 origin {env_setup_commit}", + # Fetch both commits needed: env_setup_commit for setup and base_commit for later reset + f"git fetch --depth 1 origin {env_setup_commit} {base_commit}", f"git reset --hard {env_setup_commit}", # Remove the remote so the agent won't see newer commits. "git remote remove origin", @@ -220,12 +221,15 @@ def make_repo_script_list(self) -> list[str]: if version < 7: specs["python"] = 3.7 + base_commit = self.instance["base_commit"] setup_commands = [ # Use --depth 1 for shallow clone to prevent agents from accessing # git history and exploiting it to retrieve original implementations f"git clone --depth 1 -o origin https://github.com/{repo} {self.repo_directory}", f"chmod -R 777 {self.repo_directory}", # So nonroot user can run tests f"cd {self.repo_directory}", + # Fetch base_commit needed for eval script reset + f"git fetch --depth 1 origin {base_commit}", # Remove the remote so the agent won't see newer commits. "git remote remove origin", f"uv venv --python {specs['python']}", From 118bf36e4cef2263045f5e620b500c424a2e9656 Mon Sep 17 00:00:00 2001 From: openhands Date: Wed, 18 Feb 2026 02:47:40 +0000 Subject: [PATCH 3/3] ci: use local Docker backend instead of Modal for CI tests Modal requires authentication tokens that aren't available to fork PRs. Using the local Docker backend allows CI to run without Modal credentials. Co-authored-by: openhands --- .github/workflows/system.yml | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/.github/workflows/system.yml b/.github/workflows/system.yml index fa9eb70..43f478e 100644 --- a/.github/workflows/system.yml +++ b/.github/workflows/system.yml @@ -25,19 +25,13 @@ jobs: - name: Get tests run: uv run commit0 get-tests simpy - name: Test - env: - MODAL_TOKEN_ID: ${{secrets.MODAL_TOKEN_ID}} - MODAL_TOKEN_SECRET: ${{secrets.MODAL_TOKEN_SECRET}} run: | - uv run commit0 test simpy tests/test_event.py::test_succeed --reference --rebuild - uv run commit0 test simpy tests/test_event.py::test_succeed --reference + uv run commit0 test simpy tests/test_event.py::test_succeed --reference --rebuild --backend local + uv run commit0 test simpy tests/test_event.py::test_succeed --reference --backend local - name: Evaluate - env: - MODAL_TOKEN_ID: ${{secrets.MODAL_TOKEN_ID}} - MODAL_TOKEN_SECRET: ${{secrets.MODAL_TOKEN_SECRET}} run: | - uv run commit0 evaluate --reference --rebuild - uv run commit0 evaluate --reference + uv run commit0 evaluate --reference --rebuild --backend local + uv run commit0 evaluate --reference --backend local - name: Lint run: uv run commit0 lint commit0/harness/ - name: Save