Merge pull request #43 from PathOnAI/add-agent-template

TataKKKL · web-flow · commit 0eb31589208b · 2025-04-01T16:51:42.000-07:00
configure mcts and lats agents
diff --git a/visual-tree-search-backend/README.md b/visual-tree-search-backend/README.md
@@ -57,4 +57,29 @@ to test the message passing from the backend to the frontend
 ```
 curl -X POST http://localhost:3000/api/terminate-session/647f4021-2402-4733-84a3-255f0d20c151
 {"status":"success","message":"Session 647f4021-2402-4733-84a3-255f0d20c151 termination requested"}
+```
+
+## 6. Add more search agent
+```
+python run_demo_treesearch_async.py \
+    --browser-mode chromium \
+    --storage-state shopping.json \
+    --starting-url "http://128.105.145.205:7770/" \
+    --agent-type "LATSAgent" \
+    --action_generation_model "gpt-4o-mini" \
+    --goal "search running shoes, click on the first result" \
+    --iterations 3 \
+    --max_depth 3
+```
+
+```
+python run_demo_treesearch_async.py \
+    --browser-mode chromium \
+    --storage-state shopping.json \
+    --starting-url "http://128.105.145.205:7770/" \
+    --agent-type "MCTSAgent" \
+    --action_generation_model "gpt-4o-mini" \
+    --goal "search running shoes, click on the first result" \
+    --iterations 3 \
+    --max_depth 3
 ```
diff --git a/visual-tree-search-backend/app/api/lwats/agents_async/SimpleSearchAgents/lats_agent.py b/visual-tree-search-backend/app/api/lwats/agents_async/SimpleSearchAgents/lats_agent.py
@@ -0,0 +1,68 @@
+import logging
+import time
+from typing import Any, Dict, List, Optional
+from collections import deque
+from datetime import datetime
+import os
+import json
+import subprocess
+
+from openai import OpenAI
+from dotenv import load_dotenv
+load_dotenv()
+import aiohttp
+
+from ...core_async.config import AgentConfig
+
+from ...webagent_utils_async.action.highlevel import HighLevelActionSet
+from ...webagent_utils_async.utils.playwright_manager import AsyncPlaywrightManager, setup_playwright
+from ...webagent_utils_async.utils.utils import parse_function_args, locate_element
+from ...evaluation_async.evaluators import goal_finished_evaluator
+from ...replay_async import generate_feedback, playwright_step_execution
+from ...webagent_utils_async.action.prompt_functions import extract_top_actions
+from ...webagent_utils_async.browser_env.observation import extract_page_info
+from .lats_node import LATSNode
+from .tree_vis import better_print, print_trajectory, collect_all_nodes, GREEN, RESET, print_entire_tree
+from .trajectory_score import create_llm_prompt, score_trajectory_with_openai
+from ...webagent_utils_async.utils.utils import urls_to_images
+
+logger = logging.getLogger(__name__)
+openai_client = OpenAI()
+
+class LATSAgent:
+    def __init__(
+        self,
+        starting_url: str,
+        messages: list[dict[str, Any]],
+        goal: str,
+        images: list,
+        playwright_manager: AsyncPlaywrightManager,
+        config: AgentConfig,
+    ):
+        self.starting_url = starting_url
+        self.goal = goal
+        self.image_urls = images
+        self.images = urls_to_images(self.image_urls)
+        self.messages = messages
+        self.messages.append({"role": "user", "content": f"The goal is: {self.goal}"})
+
+        self.playwright_manager = playwright_manager
+
+        self.config = config
+
+        self.agent_type = ["bid", "nav", "file", "select_option"]
+        self.action_set = HighLevelActionSet(
+            subsets=self.agent_type, strict=False, multiaction=True, demo_mode="default"
+        )
+        self.root_node = LATSNode(
+            natural_language_description=None,
+            action=None,
+            prob=None,
+            element=None,
+            goal=self.goal,
+            parent=None
+        )
+        self.reset_url = os.environ["ACCOUNT_RESET_URL"]
+
+    async def run(self, websocket=None) -> List[Dict[str, Any]]:
+        pass
diff --git a/visual-tree-search-backend/app/api/lwats/agents_async/SimpleSearchAgents/mcts_agent.py b/visual-tree-search-backend/app/api/lwats/agents_async/SimpleSearchAgents/mcts_agent.py
@@ -0,0 +1,68 @@
+import logging
+import time
+from typing import Any, Dict, List, Optional
+from collections import deque
+from datetime import datetime
+import os
+import json
+import subprocess
+
+from openai import OpenAI
+from dotenv import load_dotenv
+load_dotenv()
+import aiohttp
+
+from ...core_async.config import AgentConfig
+
+from ...webagent_utils_async.action.highlevel import HighLevelActionSet
+from ...webagent_utils_async.utils.playwright_manager import AsyncPlaywrightManager, setup_playwright
+from ...webagent_utils_async.utils.utils import parse_function_args, locate_element
+from ...evaluation_async.evaluators import goal_finished_evaluator
+from ...replay_async import generate_feedback, playwright_step_execution
+from ...webagent_utils_async.action.prompt_functions import extract_top_actions
+from ...webagent_utils_async.browser_env.observation import extract_page_info
+from .lats_node import LATSNode
+from .tree_vis import better_print, print_trajectory, collect_all_nodes, GREEN, RESET, print_entire_tree
+from .trajectory_score import create_llm_prompt, score_trajectory_with_openai
+from ...webagent_utils_async.utils.utils import urls_to_images
+
+logger = logging.getLogger(__name__)
+openai_client = OpenAI()
+
+class MCTSAgent:
+    def __init__(
+        self,
+        starting_url: str,
+        messages: list[dict[str, Any]],
+        goal: str,
+        images: list,
+        playwright_manager: AsyncPlaywrightManager,
+        config: AgentConfig,
+    ):
+        self.starting_url = starting_url
+        self.goal = goal
+        self.image_urls = images
+        self.images = urls_to_images(self.image_urls)
+        self.messages = messages
+        self.messages.append({"role": "user", "content": f"The goal is: {self.goal}"})
+
+        self.playwright_manager = playwright_manager
+
+        self.config = config
+
+        self.agent_type = ["bid", "nav", "file", "select_option"]
+        self.action_set = HighLevelActionSet(
+            subsets=self.agent_type, strict=False, multiaction=True, demo_mode="default"
+        )
+        self.root_node = LATSNode(
+            natural_language_description=None,
+            action=None,
+            prob=None,
+            element=None,
+            goal=self.goal,
+            parent=None
+        )
+        self.reset_url = os.environ["ACCOUNT_RESET_URL"]
+
+    async def run(self, websocket=None) -> List[Dict[str, Any]]:
+        pass
diff --git a/visual-tree-search-backend/app/api/lwats/agents_async/SimpleSearchAgents/simple_search_agent.py b/visual-tree-search-backend/app/api/lwats/agents_async/SimpleSearchAgents/simple_search_agent.py
@@ -164,6 +164,9 @@ async def _reset_browser(self, websocket=None) -> Optional[str]:
             if self.config.browser_mode == "browserbase":
                 live_browser_url = await self.playwright_manager.get_live_browser_url()
                 session_id = await self.playwright_manager.get_session_id()
+            else:
+                session_id = None
+                live_browser_url = None
             await page.goto(self.starting_url, wait_until="networkidle")
             
             # Send success message if websocket is provided
diff --git a/visual-tree-search-backend/app/api/lwats/core_async/agent_factory.py b/visual-tree-search-backend/app/api/lwats/core_async/agent_factory.py
@@ -6,6 +6,8 @@
 
 from .config import AgentConfig
 from ..agents_async.SimpleSearchAgents.simple_search_agent import SimpleSearchAgent
+from ..agents_async.SimpleSearchAgents.lats_agent import LATSAgent
+from ..agents_async.SimpleSearchAgents.mcts_agent import MCTSAgent
 from ..webagent_utils_async.utils.utils import setup_logger
 from ..webagent_utils_async.utils.playwright_manager import setup_playwright
 
@@ -70,7 +72,8 @@ async def setup_search_agent(
         "content": SEARCH_AGENT_SYSTEM_PROMPT,
     }]
 
-    if agent_type == "SimpleSearchAgent":
+    if agent_type == "SimpleSearchAgent": 
+        print("SimpleSearchAgent")
         agent = SimpleSearchAgent(
             starting_url=starting_url,
             messages=messages,
@@ -79,6 +82,26 @@ async def setup_search_agent(
             playwright_manager=playwright_manager,
             config=agent_config,
         )
+    elif agent_type == "LATSAgent":
+        print("LATSAgent")
+        agent = LATSAgent(
+            starting_url=starting_url,
+            messages=messages,
+            goal=goal,
+            images = images,
+            playwright_manager=playwright_manager,
+            config=agent_config,
+        )
+    elif agent_type == "MCTSAgent":
+        print("MCTSAgent")
+        agent = MCTSAgent(
+            starting_url=starting_url,
+            messages=messages,
+            goal=goal,
+            images = images,
+            playwright_manager=playwright_manager,
+            config=agent_config,
+        )
     else:
         error_message = f"Unsupported agent type: {agent_type}. Please use 'FunctionCallingAgent', 'HighLevelPlanningAgent', 'ContextAwarePlanningAgent', 'PromptAgent' or 'PromptSearchAgent' ."
         logger.error(error_message)
diff --git a/visual-tree-search-backend/app/api/run_demo_treesearch_async.py b/visual-tree-search-backend/app/api/run_demo_treesearch_async.py
@@ -22,14 +22,14 @@ async def main(args):
     
     agent_config = AgentConfig(**filter_valid_config_args(args.__dict__))
     print(agent_config)
+
     agent, playwright_manager = await setup_search_agent(
         agent_type=args.agent_type,
         starting_url=args.starting_url,
         goal=args.goal,
         images=args.images,
         agent_config=agent_config
     )
-    print(agent_config)
     
     # Run the search
     results = await agent.run()
diff --git a/visual-tree-search-backend/app/api/run_demo_treesearch_sync.py b/visual-tree-search-backend/app/api/run_demo_treesearch_sync.py
diff --git a/visual-tree-search-backend/app/api/shopping.json b/visual-tree-search-backend/app/api/shopping.json