From 819060836b63b5171031f3730f153f7f7273c94c Mon Sep 17 00:00:00 2001 From: aliyuldashev Date: Sat, 31 Jan 2026 08:37:53 +0900 Subject: [PATCH 1/6] fix: removing abstract method --- src/redteam_core/challenge_pool/controller.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/redteam_core/challenge_pool/controller.py b/src/redteam_core/challenge_pool/controller.py index 5a93479b..6b34ba95 100644 --- a/src/redteam_core/challenge_pool/controller.py +++ b/src/redteam_core/challenge_pool/controller.py @@ -699,11 +699,6 @@ def _exclude_output_keys(self, miner_output: dict, reference_output: dict): """ pass - @abstractmethod - def start_challenge(self): - """Initiates the challenge lifecycle by setting up and executing the challenge Docker container.""" - pass - @abstractmethod def _score_miner_with_new_inputs( self, miner_commit: MinerChallengeCommit, challenge_inputs From b09fd1a0113090d21fe5bb4fb4212afb0d07a1f5 Mon Sep 17 00:00:00 2001 From: aliyuldashev Date: Mon, 9 Feb 2026 08:01:10 +0900 Subject: [PATCH 2/6] feat: removal of comparison logs --- src/redteam_core/validator/models.py | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/src/redteam_core/validator/models.py b/src/redteam_core/validator/models.py index 489ae3ef..97965f1b 100644 --- a/src/redteam_core/validator/models.py +++ b/src/redteam_core/validator/models.py @@ -114,6 +114,26 @@ def public_view(self) -> "MinerChallengeCommit": accepted=self.accepted, ) + def remove_lower_than_highest_score(self): + """Remove all comparison logs that have scores lower than the highest score across all logs.""" + if not self.comparison_logs: + return + + highest_score = self.get_higest_comparison_score() + + if highest_score == 0.0: + return + + filtered_logs = {} + for ref_commit, logs in self.comparison_logs.items(): + highest_scoring_logs = [ + log for log in logs if log.similarity_score == highest_score + ] + if highest_scoring_logs: + filtered_logs[ref_commit] = highest_scoring_logs + + self.comparison_logs = filtered_logs + def remove_redundant_logs(self): """This is temporary function to clear scoring logs except in index 0 and comparison logs except the highest similarity score log.""" if len(self.scoring_logs) > 1: @@ -127,7 +147,7 @@ def remove_redundant_logs(self): self.comparison_logs[ref_commit] = [highest_log] def get_higest_comparison_score(self) -> float: - """Get the minimum and maximum similarity score from all comparison logs.""" + """Get the maximum similarity score from all comparison logs.""" if not self.comparison_logs: return 0.0 From 9c0b77de0d4607384e1aaf9e1d57d20a88880f2f Mon Sep 17 00:00:00 2001 From: aliyuldashev Date: Tue, 10 Feb 2026 11:11:02 +0900 Subject: [PATCH 3/6] feat: ada-2 challenge --- src/challenges/ada_detection | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/challenges/ada_detection b/src/challenges/ada_detection index c6b5010f..d813605a 160000 --- a/src/challenges/ada_detection +++ b/src/challenges/ada_detection @@ -1 +1 @@ -Subproject commit c6b5010fe2cfe37284ad89084c1251c867c2c173 +Subproject commit d813605a21c758f7ccc8aace11d1c79f7ef12f90 From 365066bd2e87f11889181da141414db21fe4b71f Mon Sep 17 00:00:00 2001 From: Batkhuu Byambajav Date: Tue, 10 Feb 2026 14:33:00 +0900 Subject: [PATCH 4/6] ci: update actions/checkout and actions/setup-python to v6 across workflows --- .github/workflows/1.bump-version.yml | 2 +- .github/workflows/2.build-publish.yml | 8 ++++---- .github/workflows/3.update-changelog.yml | 2 +- .github/workflows/publish-docs.yml | 4 ++-- src/challenges/ab_sniffer | 2 +- src/challenges/ada_detection | 2 +- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/workflows/1.bump-version.yml b/.github/workflows/1.bump-version.yml index 3fe43635..fc16d78b 100644 --- a/.github/workflows/1.bump-version.yml +++ b/.github/workflows/1.bump-version.yml @@ -17,7 +17,7 @@ jobs: contents: write steps: - name: Checkout - uses: actions/checkout@v5 + uses: actions/checkout@v6 - name: Bump version env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/2.build-publish.yml b/.github/workflows/2.build-publish.yml index f51f3201..e34f2e5b 100644 --- a/.github/workflows/2.build-publish.yml +++ b/.github/workflows/2.build-publish.yml @@ -17,9 +17,9 @@ jobs: # contents: read # steps: # - name: Checkout - # uses: actions/checkout@v5 + # uses: actions/checkout@v6 # - name: Set up Python - # uses: actions/setup-python@v5 + # uses: actions/setup-python@v6 # with: # python-version: "3.10" # - name: Install dependencies @@ -37,13 +37,13 @@ jobs: contents: write steps: - name: Checkout - uses: actions/checkout@v5 + uses: actions/checkout@v6 - name: Get latest version run: | git pull origin main echo "Current version: v$(./scripts/get-version.sh)" - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: "3.10" - name: Install dependencies diff --git a/.github/workflows/3.update-changelog.yml b/.github/workflows/3.update-changelog.yml index 26a69c93..29d99022 100644 --- a/.github/workflows/3.update-changelog.yml +++ b/.github/workflows/3.update-changelog.yml @@ -14,7 +14,7 @@ jobs: contents: write steps: - name: Checkout - uses: actions/checkout@v5 + uses: actions/checkout@v6 - name: Update changelog env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/publish-docs.yml b/.github/workflows/publish-docs.yml index bb55f6eb..aee75cf9 100644 --- a/.github/workflows/publish-docs.yml +++ b/.github/workflows/publish-docs.yml @@ -17,11 +17,11 @@ jobs: contents: write steps: - name: Checkout - uses: actions/checkout@v5 + uses: actions/checkout@v6 with: fetch-depth: 0 - name: Set up Python - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: "3.10" - name: Install dependencies diff --git a/src/challenges/ab_sniffer b/src/challenges/ab_sniffer index e8ad6273..cb511c26 160000 --- a/src/challenges/ab_sniffer +++ b/src/challenges/ab_sniffer @@ -1 +1 @@ -Subproject commit e8ad62737296a9ba0149e66f18ab42d0eb7b28de +Subproject commit cb511c26898416479f04e7b7f46c1bba15abeb63 diff --git a/src/challenges/ada_detection b/src/challenges/ada_detection index d813605a..54d92693 160000 --- a/src/challenges/ada_detection +++ b/src/challenges/ada_detection @@ -1 +1 @@ -Subproject commit d813605a21c758f7ccc8aace11d1c79f7ef12f90 +Subproject commit 54d9269313db51c7c3189f1f1221b31c3fd9e0ca From 12f42348b317fa08291cbb8805dc65aff7be7e55 Mon Sep 17 00:00:00 2001 From: aliyuldashev Date: Thu, 12 Feb 2026 09:45:28 +0900 Subject: [PATCH 5/6] feat: updating documentation for ada-2 --- docs/challenges/ada_detection/.nav.yml | 2 +- docs/challenges/ada_detection/README.md | 30 ++-- .../ada_detection/{ => depricated}/v1.md | 0 docs/challenges/ada_detection/v2.md | 145 ++++++++++++++++++ 4 files changed, 164 insertions(+), 13 deletions(-) rename docs/challenges/ada_detection/{ => depricated}/v1.md (100%) create mode 100644 docs/challenges/ada_detection/v2.md diff --git a/docs/challenges/ada_detection/.nav.yml b/docs/challenges/ada_detection/.nav.yml index 8b1811ac..f8751d64 100644 --- a/docs/challenges/ada_detection/.nav.yml +++ b/docs/challenges/ada_detection/.nav.yml @@ -1,4 +1,4 @@ nav: - README.md - - Anti-Detect Automation Detection v1: v1.md + - Anti-Detect Automation Detection v2: v2.md - "*" diff --git a/docs/challenges/ada_detection/README.md b/docs/challenges/ada_detection/README.md index ceebb232..1f4566db 100644 --- a/docs/challenges/ada_detection/README.md +++ b/docs/challenges/ada_detection/README.md @@ -2,7 +2,7 @@ ## Overview -The **Anti-Detect Automation Detection (AAD)** challenge evaluates a participant`s ability to reliably detect browser automation frameworks operating inside anti-detect browsers, while preserving human safety. +The **Anti-Detect Automation Detection (AAD)** challenge evaluates a participant's ability to reliably detect browser automation frameworks operating inside anti-detect browsers, while preserving human safety. Evaluation runs simulate real-world anti-detect usage where static signals are masked and fingerprints are fresh. Detection must rely on runtime behavior and orchestration patterns. @@ -17,13 +17,13 @@ Each evaluation run involves: !!! Info "NST-Browser Dependency" Participants need an API key from the [NSTBrowser](https://www.nstbrowser.io/en/pricing) dashboard (Professional plan recommended) for local testing. -## Features of ADA Detection v1 +## Features of ADA Detection v2 -- **Behavior-first detection**: Focus on runtime behavioral analysis inside NST-Browser; static driver fingerprints are spoofed. -- **Payload-based flow**: Detection scripts run in-page, send payloads to local `/_payload` when automation is confirmed, and must stay silent during human sessions. -- **Human safety gate**: Random human interactions are injected; flagging more than 2 humans as bots zeros the submission. -- **Target coverage update**: Required detectors include `automation`, `nodriver`, `playwright`, `patchright`, and `puppeteer`. -- **Fresh profiles, no state**: Each run uses a fresh profile with zero shared state between runs. +- **Hardened NSTBrowser Environment**: Advanced obfuscation including dynamic fingerprinting and hardware simulation (8GB RAM, 16 Cores). +- **Engine-Level Stealth**: `AutomationControlled` flags are disabled at the browser engine level to eliminate static detection signals. +- **Fail-Fast Scoring**: High-stakes evaluation where missing critical targets (Selenium) or exceeding miss limits results in an immediate 0.0 score. +- **Protocol-Level Verification**: Mandatory detection of low-level communication patterns (Webdriver and WebSocket). +- **Selenium Safety Gate**: Mandatory detection of `seleniumbase` and `selenium_driverless` is required for incentive eligibility. - **Similarity/time decay**: Similarity penalties apply to lookalike submissions; scores decay over 15 days to encourage refreshed heuristics. ## Evaluation Flow @@ -31,9 +31,9 @@ Each evaluation run involves: 1. **Submission Received**: Detection scripts are submitted via the `/score` endpoint. 2. **Task Generation**: A randomized sequence of multiple automation framework runs and human interactions is generated. 3. **NST-Browser Launch**: A clean instance is started for each task. -4. **Execution**: Automation frameworks connect via WebSocket, while humans interact manually. +4. **Execution**: Automation frameworks connect via WebSocket or WebDriver, while humans interact manually. 5. **Detection Phase**: Scripts may emit detection payloads to `/_payload`. -6. **Scoring**: Results are aggregated and normalized. +6. **Scoring**: Results are aggregated and normalized using the Fail-Fast pillars. ## Technical Constraints @@ -59,11 +59,16 @@ Submit your detection scripts as a JSON payload with the following structure: ```json { "detection_files": [ - { "file_name": "automation.js", "content": "..." }, { "file_name": "nodriver.js", "content": "..." }, { "file_name": "playwright.js", "content": "..." }, { "file_name": "patchright.js", "content": "..." }, - { "file_name": "puppeteer.js", "content": "..." } + { "file_name": "puppeteer.js", "content": "..." }, + { "file_name": "puppeteer_extra.js", "content": "..." }, + { "file_name": "zendriver.js", "content": "..." }, + { "file_name": "selenium_driverless.js", "content": "..." }, + { "file_name": "seleniumbase.js", "content": "..." }, + { "file_name": "webdriver.js", "content": "..." }, + { "file_name": "websocket.js", "content": "..." } ] } ``` @@ -72,7 +77,8 @@ Each file must be named exactly as shown and contain self-contained JavaScript ( ## Challenge Versions -- [**v1** (Active after Dec 15, 2025 10:00 UTC)](./v1.md) - Payload-based detection with human safety gate +- [**v2** (Active after Feb 14, 2026 14:00 UTC)](./v2.md) - Hardened detection with Fail-Fast scoring +- [**v1** (Deprecated)](./depricated/v1.md) ## Resources & Guides diff --git a/docs/challenges/ada_detection/v1.md b/docs/challenges/ada_detection/depricated/v1.md similarity index 100% rename from docs/challenges/ada_detection/v1.md rename to docs/challenges/ada_detection/depricated/v1.md diff --git a/docs/challenges/ada_detection/v2.md b/docs/challenges/ada_detection/v2.md new file mode 100644 index 00000000..6f1a0c5d --- /dev/null +++ b/docs/challenges/ada_detection/v2.md @@ -0,0 +1,145 @@ +--- +title: Anti-Detect Automation Detection v2 +--- + +# Anti-Detect Automation Detection v2 (Active after 2026 February 14 14:00 UTC) + +## Overview + +**ADA Detection v2** focuses on advanced behavioral detection inside **NSTBrowser** environments. This version introduces hardened detection requirements where static signals are masked and engine-level obfuscation is active. + +### Browser Hardening Features + +- **Dynamic Fingerprinting**: Every session generates unique, randomized browser identifiers to prevent static pattern matching. +- **Hardware Simulation**: Realistic signals are injected for `deviceMemory` (8GB), `hardwareConcurrency` (16), and screen resolution (1280x1024). +- **Engine-Level Stealth**: Disables `AutomationControlled` flags at the browser engine level, eliminating traditional automation leaks. + +Scoring has transitioned to a **Fail-Fast** model, emphasizing accuracy across three critical pillars: Human Detection, Framework Detection (with a Selenium Safety Gate), and Protocol Accuracy. + +For general challenge information, environment details, and plagiarism policies, please refer to the [AAD README](./README.md). + +--- + +## Target Frameworks & Protocols + +Participants must submit detection scripts for the following frameworks and protocols: + +- **Frameworks**: `nodriver`, `playwright`, `patchright`, `puppeteer`, `puppeteer_extra`, `zendriver`, `selenium_driverless`, `seleniumbase` +- **Protocols**: `webdriver`, `websocket` + +Missing any of these scripts results in an invalid submission. + +### Selenium Safety Gate + +Missing detection for either `seleniumbase` or `selenium_driverless` results in an immediate **final score of 0.0**. These frameworks are the primary automation vectors in v2; failure to detect them indicates a fundamental gap in the detection logic. + +--- + +## Submission Format + +Submissions must follow this structure: + +```json +{ + "detection_files": [ + { "file_name": "nodriver.js", "content": "/* logic */" }, + { "file_name": "playwright.js", "content": "/* logic */" }, + { "file_name": "patchright.js", "content": "/* logic */" }, + { "file_name": "puppeteer.js", "content": "/* logic */" }, + { "file_name": "puppeteer_extra.js", "content": "/* logic */" }, + { "file_name": "zendriver.js", "content": "/* logic */" }, + { "file_name": "selenium_driverless.js", "content": "/* logic */" }, + { "file_name": "seleniumbase.js", "content": "/* logic */" }, + { "file_name": "webdriver.js", "content": "/* logic */" }, + { "file_name": "websocket.js", "content": "/* logic */" } + ] +} +``` + +### Rules + +- File names must match the framework/protocol names exactly. +- Each file detects **only its own framework or protocol**. +- No extra files or outputs are allowed. + +--- + +## Scoring System: The Three Pillars + +ADA v2 uses a **Fail-Fast** scoring model. If a submission fails any of the three critical pillars, the final score is immediately set to **0.0**. + +### 1. Human Detection + +Miners must distinguish between automated tasks and human-injected sessions. + +- **Limit**: You are allowed a maximum of **1 mistake**. Exceeding this limit results in an immediate **score of 0.0**. +- **Weight**: Perfect detection grants 1.0 point for this pillar. Partial penalties apply for a single miss based on the ratio of human injections. + +### 2. Framework Detection + +Points are earned for correctly identifying the specific automation framework. + +- **Selenium Gate**: Missing `seleniumbase` or `selenium_driverless` zeros the entire score. +- **Density**: You earn **1.0 point** for a framework only if you detect it perfectly in **all 3 of its runs**. +- **Collision**: Reporting more than one framework or an incorrect protocol for a given session results in a collision penalty (earning only 0.1 points instead of 1.0). + +### 3. Protocol Accuracy + +Validates the low-level communication patterns of the browser. + +- **Webdriver Protocol**: Expected to be `true` for Selenium-based frameworks and `false` for others (human, playwright, etc.). +- **Websocket Protocol**: Expected to be `true` for non-Selenium frameworks (playwright, puppeteer, etc.) and `false` for human sessions and Selenium. +- **Threshold**: You are allowed a maximum of **1 miss** per protocol type. Exceeding this limit results in an immediate **score of 0.0**. + +### Final Formula + +The final score is normalized between 0.0 and 1.0 using the formula: + +```math +Final Score = (Human Score + Framework Score + Protocol Score) / 10 +``` + +__(Where 10 = 8 frameworks + 1 human pillar + 1 protocol pillar)__ + +--- + +## Incentive Eligibility + +To maintain high detection standards, a minimum performance threshold is required for incentives: + +- **Minimum Score**: **0.6** +- This threshold ensures that miners identify the main automation vectors (Selenium) and maintain high human safety while allowing for minor misses in other categories. + +--- + +## Similarity & Time Decay + +- **Similarity check:** Submissions are compared against other SDKs; high similarity incurs penalties. +- **Score decay:** Scores decay over 15 days to incentivize refreshed heuristics. + +### Example + +Assume: + +- 8 frameworks total +- Perfect human accuracy → 1.0 +- Perfect protocol accuracy → 1.0 +- 6 frameworks detected perfectly → 6.0 points + +```math +Final Score = (1.0 + 1.0 + 6.0) / 10 + = 8.0 / 10 + = 0.8 +``` + +Any excessive human misclassification, protocol misses, or missing Selenium detection would reduce this to **0.0**. + +--- + +## Submission Guide + +To build and submit your solution, please follow the [Building a Submission Commit](../../miner/workflow/3.build-and-publish.md) guide. + +## Submission Templates + +Templates and building instructions can be found in the [ADA Detection repository](https://github.com/RedTeamSubnet/ada-detection-challenge/tree/main/templates/commit). From ee0a375e9ba8d8c353a0214c4e2e43694c6f5437 Mon Sep 17 00:00:00 2001 From: aliyuldashev Date: Thu, 12 Feb 2026 14:39:04 +0900 Subject: [PATCH 6/6] chore: updating weights of a challenges --- .../challenge_pool/active_challenges.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/redteam_core/challenge_pool/active_challenges.yaml b/src/redteam_core/challenge_pool/active_challenges.yaml index b2f388fa..64fec046 100644 --- a/src/redteam_core/challenge_pool/active_challenges.yaml +++ b/src/redteam_core/challenge_pool/active_challenges.yaml @@ -1,7 +1,7 @@ -ada_detection_v1: - name: "ada_detection_v1" +ada_detection_v2: + name: "ada_detection_v2" description: "Detect anti-detect automation frameworks" - challenge_incentive_weight: 0.5 + challenge_incentive_weight: 0.6 challenge_image: "redteamsubnet61/ada_detection:v1.0.0-251215" target: ada_detection.controller.AADController challenge_manager: ada_detection.challenge_manager.AADChallengeManager @@ -17,7 +17,7 @@ ada_detection_v1: max_self_comparison_score: 0.7 min_acceptable_score: 0.6 challenge_container_run_kwargs: - name: "ada_detection_v1" + name: "ada_detection_v2" platform: "linux/amd64" privileged: true environment: @@ -37,7 +37,7 @@ ada_detection_v1: mem_limit: "12g" network: "redteam_local" environment: - CHALLENGE_NAME: "ada_detection_v1" + CHALLENGE_NAME: "ada_detection_v2" protocols: challenger: "http" challenger_ssl_verify: false @@ -52,7 +52,7 @@ ada_detection_v1: ab_sniffer_v5: name: "ab_sniffer_v5" description: "Detect the driver type of the framework" - challenge_incentive_weight: 0.5 + challenge_incentive_weight: 0.4 challenge_image: "redteamsubnet61/ab_sniffer:v5.0.0-251202" target: ab_sniffer.controller.ABSController challenge_manager: ab_sniffer.challenge_manager.ABSChallengeManager