From 611bea44a3137522a9ff22f799b61a7aa802c10c Mon Sep 17 00:00:00 2001
From: aoshen524 <aoshen524@gmail.com>
Date: Wed, 4 Feb 2026 23:06:32 +0000
Subject: [PATCH] feat(action): add fail action type for infeasible tasks

Add FAIL action type to allow models to signal a task is infeasible:

- Add FAIL="fail" to ActionType enum
- Set stop=True for fail() in output parser (same as finish())
- Handle FAIL in pyautogui/ydotool handlers (reset state like FINISH)
- Handle FAIL in socketio server (emit finish event)
- Add fail() to prompt template instruction
- Add test coverage for all changes

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 src/oagi/converters/oagi.py                  | 22 ++++++++++-------
 src/oagi/handler/pyautogui_action_handler.py |  4 ++--
 src/oagi/handler/ydotool_action_handler.py   |  4 ++--
 src/oagi/server/socketio_server.py           |  2 +-
 src/oagi/types/models/action.py              |  1 +
 src/oagi/utils/output_parser.py              |  3 ++-
 src/oagi/utils/prompt_builder.py             |  1 +
 tests/conftest.py                            | 16 +++++++++++++
 tests/test_actor.py                          | 21 ++++++++++++++++
 tests/test_oagi_action_converter.py          | 18 ++++++++++++++
 tests/test_pyautogui_action_handler.py       | 19 +++++++++++++++
 tests/utils/test_output_parser.py            | 25 ++++++++++++++++++++
 12 files changed, 121 insertions(+), 15 deletions(-)

diff --git a/src/oagi/converters/oagi.py b/src/oagi/converters/oagi.py
index de428a0..9dfefa6 100644
--- a/src/oagi/converters/oagi.py
+++ b/src/oagi/converters/oagi.py
@@ -51,21 +51,21 @@ def __call__(self, actions: list[Action]) -> list[str]:
         """
         converted: list[str] = []
         failed: list[tuple[str, str]] = []
-        has_finish = False
+        has_terminal = False
 
         if not actions:
             return converted
 
         for action in actions:
-            # Check for duplicate finish() during iteration
-            is_finish = action.type == ActionType.FINISH
-            if is_finish:
-                if has_finish:
+            # Check for duplicate finish()/fail() during iteration
+            is_terminal = action.type in (ActionType.FINISH, ActionType.FAIL)
+            if is_terminal:
+                if has_terminal:
                     raise ValueError(
-                        "Duplicate finish() detected. "
-                        "Only one finish() is allowed per action sequence."
+                        "Duplicate finish()/fail() detected. "
+                        "Only one finish() or fail() is allowed per action sequence."
                     )
-                has_finish = True
+                has_terminal = True
 
             try:
                 converted.extend(self._convert_action(action))
@@ -172,6 +172,10 @@ def _convert_single_action(self, action: Action) -> list[str]:
             self._log_info("Task completion action -> DONE")
             return ["DONE"]
 
+        if action_type == ActionType.FAIL.value:
+            self._log_info("Task infeasible action -> FAIL")
+            return ["FAIL"]
+
         if action_type == ActionType.CALL_USER.value:
             self._log_info("User intervention requested")
             return []
@@ -179,7 +183,7 @@ def _convert_single_action(self, action: Action) -> list[str]:
         raise ValueError(
             f"Unknown action type: '{action_type}'. "
             "Supported: click, left_double, left_triple, right_single, drag, "
-            "hotkey, type, scroll, wait, finish, call_user"
+            "hotkey, type, scroll, wait, finish, fail, call_user"
         )
 
     def serialize_actions(self, actions: list[Action]) -> list[dict[str, Any]]:
diff --git a/src/oagi/handler/pyautogui_action_handler.py b/src/oagi/handler/pyautogui_action_handler.py
index c0e1baa..e767637 100644
--- a/src/oagi/handler/pyautogui_action_handler.py
+++ b/src/oagi/handler/pyautogui_action_handler.py
@@ -250,8 +250,8 @@ def _execute_single_action(self, action: Action) -> None:
                 )
                 pyautogui.scroll(scroll_amount)
 
-            case ActionType.FINISH:
-                # Task completion - reset handler state
+            case ActionType.FINISH | ActionType.FAIL:
+                # Task completion or infeasible - reset handler state
                 self.reset()
 
             case ActionType.WAIT:
diff --git a/src/oagi/handler/ydotool_action_handler.py b/src/oagi/handler/ydotool_action_handler.py
index 354b842..49b8ab0 100644
--- a/src/oagi/handler/ydotool_action_handler.py
+++ b/src/oagi/handler/ydotool_action_handler.py
@@ -164,8 +164,8 @@ def _execute_action(self, action: Action) -> bool:
                 text = self.caps_manager.transform_text(text)
                 self._run_ydotool(["type", text], count=count)
 
-            case ActionType.FINISH:
-                # Task completion - reset handler state
+            case ActionType.FINISH | ActionType.FAIL:
+                # Task completion or infeasible - reset handler state
                 self.reset()
 
             case ActionType.WAIT:
diff --git a/src/oagi/server/socketio_server.py b/src/oagi/server/socketio_server.py
index a6a0b05..89e59bb 100644
--- a/src/oagi/server/socketio_server.py
+++ b/src/oagi/server/socketio_server.py
@@ -364,7 +364,7 @@ async def _emit_single_action(
                     timeout=self.config.socketio_timeout,
                 )
 
-            case ActionType.FINISH:
+            case ActionType.FINISH | ActionType.FAIL:
                 return await self.call(
                     "finish",
                     FinishEventData(**common).model_dump(),
diff --git a/src/oagi/types/models/action.py b/src/oagi/types/models/action.py
index 19b1981..d6fffd3 100644
--- a/src/oagi/types/models/action.py
+++ b/src/oagi/types/models/action.py
@@ -22,6 +22,7 @@ class ActionType(str, Enum):
     TYPE = "type"
     SCROLL = "scroll"
     FINISH = "finish"
+    FAIL = "fail"
     WAIT = "wait"
     CALL_USER = "call_user"
 
diff --git a/src/oagi/utils/output_parser.py b/src/oagi/utils/output_parser.py
index 5207625..86dddf5 100644
--- a/src/oagi/utils/output_parser.py
+++ b/src/oagi/utils/output_parser.py
@@ -45,7 +45,7 @@ def parse_raw_output(raw_output: str) -> Step:
             parsed_action = _parse_action(action_text.strip())
             if parsed_action:
                 actions.append(parsed_action)
-                if parsed_action.type == ActionType.FINISH:
+                if parsed_action.type in (ActionType.FINISH, ActionType.FAIL):
                     stop = True
 
     return Step(reason=reason, actions=actions, stop=stop)
@@ -105,6 +105,7 @@ def _parse_action(action_text: str) -> Action | None:
     - scroll(x, y, direction, c) # scroll at position
     - wait() # wait for a while
     - finish() # indicate task is finished
+    - fail() # indicate task is infeasible
 
     Args:
         action_text: String representation of a single action
diff --git a/src/oagi/utils/prompt_builder.py b/src/oagi/utils/prompt_builder.py
index 2e4c8f9..10cb92d 100644
--- a/src/oagi/utils/prompt_builder.py
+++ b/src/oagi/utils/prompt_builder.py
@@ -24,6 +24,7 @@
 8. scroll(x, y, direction, c) # scroll the mouse at position (x, y) in the direction of up or down for c times, where x and y are integers normalized between 0 and 1000
 9. wait() # wait for a while
 10. finish() # indicate the task is finished
+11. fail() # indicate the task is infeasible
 
 Directly output the text beginning with <|think_start|>, no additional text is needed for this scenario.
 
diff --git a/tests/conftest.py b/tests/conftest.py
index 8464a45..0cf00cf 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -132,6 +132,12 @@ def sample_raw_output_completed():
     return "<|think_start|>The task has been completed successfully<|think_end|>\n<|action_start|>finish()<|action_end|>"
 
 
+@pytest.fixture
+def sample_raw_output_failed():
+    """Sample raw output for infeasible task."""
+    return "<|think_start|>The task is infeasible<|think_end|>\n<|action_start|>fail()<|action_end|>"
+
+
 @pytest.fixture
 def sample_step(sample_action):
     """Sample Step object for testing."""
@@ -152,6 +158,16 @@ def completed_step():
     )
 
 
+@pytest.fixture
+def failed_step():
+    """Sample failed Step object for infeasible task."""
+    return Step(
+        reason="The task is infeasible",
+        actions=[Action(type=ActionType.FAIL, argument="", count=1)],
+        stop=True,
+    )
+
+
 @pytest.fixture
 def mock_error_response():
     """Mock error HTTP response."""
diff --git a/tests/test_actor.py b/tests/test_actor.py
index 44306ad..e10603c 100644
--- a/tests/test_actor.py
+++ b/tests/test_actor.py
@@ -219,6 +219,27 @@ def test_step_with_completed_response(
         assert len(result.actions) == 1
         assert result.actions[0].type == ActionType.FINISH
 
+    def test_step_with_failed_response(
+        self, actor, failed_step, sample_usage_obj, mock_upload_file_response
+    ):
+        actor.task_description = "Test task"
+        actor.task_id = "task-789"
+
+        # Setup mocks
+        actor.client.put_s3_presigned_url.return_value = mock_upload_file_response
+        actor.client.chat_completion.return_value = (
+            failed_step,
+            "<|think_start|>infeasible<|think_end|>\n<|action_start|>fail()<|action_end|>",
+            sample_usage_obj,
+        )
+
+        result = actor.step(b"image bytes")
+
+        assert result.stop is True
+        assert result.reason == "The task is infeasible"
+        assert len(result.actions) == 1
+        assert result.actions[0].type == ActionType.FAIL
+
     def test_step_handles_exception(self, actor, mock_upload_file_response):
         actor.task_description = "Test task"
         actor.client.put_s3_presigned_url.return_value = mock_upload_file_response
diff --git a/tests/test_oagi_action_converter.py b/tests/test_oagi_action_converter.py
index e14fb6d..6acafca 100644
--- a/tests/test_oagi_action_converter.py
+++ b/tests/test_oagi_action_converter.py
@@ -97,6 +97,19 @@ def test_finish_action(self, converter):
         result = converter([action])
         assert result[0] == "DONE"
 
+    def test_fail_action(self, converter):
+        action = Action(type=ActionType.FAIL, argument="", count=1)
+        result = converter([action])
+        assert result[0] == "FAIL"
+
+    def test_duplicate_terminal_actions_raises(self, converter):
+        actions = [
+            Action(type=ActionType.FINISH, argument="", count=1),
+            Action(type=ActionType.FAIL, argument="", count=1),
+        ]
+        with pytest.raises(ValueError, match="Duplicate finish\\(\\)/fail\\(\\)"):
+            converter(actions)
+
 
 class TestActionStringToStep:
     def test_pyautogui_command(self, converter):
@@ -114,6 +127,11 @@ def test_done_command(self, converter):
         assert step["type"] == "sleep"
         assert step["parameters"]["seconds"] == 0
 
+    def test_fail_command(self, converter):
+        step = converter.action_string_to_step("FAIL")
+        assert step["type"] == "sleep"
+        assert step["parameters"]["seconds"] == 0
+
 
 class TestMultipleActions:
     def test_action_count(self, converter):
diff --git a/tests/test_pyautogui_action_handler.py b/tests/test_pyautogui_action_handler.py
index bed2ec2..c8041f0 100644
--- a/tests/test_pyautogui_action_handler.py
+++ b/tests/test_pyautogui_action_handler.py
@@ -131,6 +131,11 @@ def test_finish_action(handler, mock_pyautogui):
     handler([action])
 
 
+def test_fail_action(handler, mock_pyautogui):
+    action = Action(type=ActionType.FAIL, argument="", count=1)
+    handler([action])
+
+
 def test_call_user_action(handler, mock_pyautogui, capsys):
     action = Action(type=ActionType.CALL_USER, argument="", count=1)
     handler([action])
@@ -453,6 +458,20 @@ def test_finish_action_resets_handler(self, mock_pyautogui):
         handler([finish_action])
         assert handler.caps_manager.caps_enabled is False
 
+    def test_fail_action_resets_handler(self, mock_pyautogui):
+        config = PyautoguiConfig(capslock_mode="session", post_batch_delay=0)
+        handler = PyautoguiActionHandler(config=config)
+
+        # Enable caps lock
+        caps_action = Action(type=ActionType.HOTKEY, argument="capslock", count=1)
+        handler([caps_action])
+        assert handler.caps_manager.caps_enabled is True
+
+        # FAIL action should also reset handler
+        fail_action = Action(type=ActionType.FAIL, argument="", count=1)
+        handler([fail_action])
+        assert handler.caps_manager.caps_enabled is False
+
 
 class TestAsyncHandlerReset:
     def test_async_handler_reset_delegates_to_sync_handler(self, mock_pyautogui):
diff --git a/tests/utils/test_output_parser.py b/tests/utils/test_output_parser.py
index d6f7acd..965d4fe 100644
--- a/tests/utils/test_output_parser.py
+++ b/tests/utils/test_output_parser.py
@@ -34,6 +34,15 @@ def test_parse_finish_action_sets_stop(self):
         assert step.actions[0].type == ActionType.FINISH
         assert step.stop is True
 
+    def test_parse_fail_action_sets_stop(self):
+        raw = "<|think_start|>Task is infeasible<|think_end|>\n<|action_start|>fail()<|action_end|>"
+        step = parse_raw_output(raw)
+
+        assert step.reason == "Task is infeasible"
+        assert len(step.actions) == 1
+        assert step.actions[0].type == ActionType.FAIL
+        assert step.stop is True
+
     def test_parse_multiple_actions_with_ampersand(self):
         raw = "<|think_start|>Do two things<|think_end|>\n<|action_start|>click(100, 200) & type(hello)<|action_end|>"
         step = parse_raw_output(raw)
@@ -147,6 +156,7 @@ class TestParseAction:
             ("type(hello world)", ActionType.TYPE, "hello world"),
             ("wait()", ActionType.WAIT, ""),
             ("finish()", ActionType.FINISH, ""),
+            ("fail()", ActionType.FAIL, ""),
         ],
     )
     def test_parse_basic_actions(self, action_text, expected_type, expected_arg):
@@ -254,3 +264,18 @@ def test_finish_with_other_actions(self):
 
         assert step.stop is True
         assert len(step.actions) == 2
+
+    def test_fail_with_other_actions(self):
+        raw = "<|think_start|>Test<|think_end|>\n<|action_start|>click(100, 200) & fail()<|action_end|>"
+        step = parse_raw_output(raw)
+
+        assert step.stop is True
+        assert len(step.actions) == 2
+        assert step.actions[1].type == ActionType.FAIL
+
+    def test_multiple_fail_actions_stop_true(self):
+        raw = "<|think_start|>Test<|think_end|>\n<|action_start|>fail() & fail()<|action_end|>"
+        step = parse_raw_output(raw)
+
+        assert step.stop is True
+        assert len(step.actions) == 2