@@ -92,9 +92,6 @@ async def run(self, websocket=None) -> List[Dict[str, Any]]:
9292 return await self .dfs_with_websocket (websocket )
9393 else :
9494 return await self .dfs ()
95- elif algorithm == "rmcts" :
96- logger .info ("Starting Reflective MCTS algorithm" )
97- return await self .rmcts ()
9895 else :
9996 error_msg = f"Unsupported algorithm: { algorithm } "
10097 logger .error (error_msg )
@@ -1149,226 +1146,3 @@ def _get_tree_data(self):
11491146 tree_data .append (node_data )
11501147
11511148 return tree_data
1152-
1153- async def rmcts (self ) -> List [Dict [str , Any ]]:
1154- """
1155- Performs Monte Carlo Tree Search starting from the root node.
1156- Uses GPT-4 for node selection and reflection-based backpropagation.
1157-
1158- Returns:
1159- List[Dict[str, Any]]: List of actions in the best path found
1160- """
1161- best_score = float ('-inf' )
1162- best_path = None
1163- visited = set () # Track visited nodes to avoid cycles
1164- max_iterations = self .config .iterations # Use configured number of iterations
1165-
1166- try :
1167- # Initial browser setup
1168- live_browser_url , session_id = await self ._reset_browser ()
1169- if not live_browser_url :
1170- logger .error ("Failed to initialize browser" )
1171- return []
1172-
1173- for iteration in range (max_iterations ):
1174- logger .info (f"\n { '=' * 50 } " )
1175- logger .info (f"RMCTS Iteration { iteration + 1 } /{ max_iterations } " )
1176- logger .info (f"{ '=' * 50 } \n " )
1177-
1178- # Selection: Use GPT-4 to select a promising path
1179- current_node = self .root_node
1180- path = [current_node ]
1181- selection_depth = 0
1182-
1183- while current_node .children and not current_node .is_terminal :
1184- logger .info (f"\n Selection Step { selection_depth + 1 } :" )
1185- logger .info (f"Current node action: { current_node .action } " )
1186- logger .info (f"Number of children: { len (current_node .children )} " )
1187-
1188- # Get trajectory for GPT-4 to evaluate
1189- trajectory = []
1190- for node in path [1 :]: # Skip root node
1191- trajectory .append ({
1192- "natural_language_description" : node .natural_language_description ,
1193- "action" : node .action ,
1194- "feedback" : node .feedback
1195- })
1196-
1197- # Create prompt for GPT-4 to select next node
1198- prompt = f"""Given the current trajectory and goal, select the most promising child node to explore next.
1199- Consider the overall progress, efficiency, and likelihood of success.
1200-
1201- Goal: { self .goal }
1202-
1203- Current Trajectory:
1204- { json .dumps (trajectory , indent = 2 )}
1205-
1206- Available Children:
1207- { json .dumps ([{
1208- 'action' : child .action ,
1209- 'description' : child .natural_language_description ,
1210- 'visits' : child .visits ,
1211- 'value' : child .value
1212- } for child in current_node .children ], indent = 2 )}
1213-
1214- Return a JSON response with:
1215- {{
1216- "selected_child_index": int, # Index of the selected child
1217- "explanation": str # Brief explanation of the selection
1218- }}"""
1219-
1220- try :
1221- response = openai_client .chat .completions .create (
1222- model = self .config .evaluation_model ,
1223- messages = [
1224- {"role" : "system" , "content" : "You are an expert at selecting promising paths in a search tree." },
1225- {"role" : "user" , "content" : prompt }
1226- ],
1227- response_format = {"type" : "json_object" }
1228- )
1229-
1230- selection = json .loads (response .choices [0 ].message .content )
1231- selected_index = selection ["selected_child_index" ]
1232-
1233- if 0 <= selected_index < len (current_node .children ):
1234- current_node = current_node .children [selected_index ]
1235- path .append (current_node )
1236- logger .info (f"Selected child { selected_index + 1 } : { current_node .action } " )
1237- logger .info (f"Selection explanation: { selection ['explanation' ]} " )
1238- else :
1239- logger .warning (f"Invalid child index { selected_index } , breaking selection" )
1240- break
1241-
1242- except Exception as e :
1243- logger .error (f"Error in node selection: { str (e )} " )
1244- break
1245-
1246- selection_depth += 1
1247-
1248- # Expansion: Expand the selected node if possible
1249- if not current_node .is_terminal and current_node .depth < self .config .max_depth :
1250- logger .info (f"\n Expansion Step:" )
1251- logger .info (f"Expanding node: { current_node .action } " )
1252- try :
1253- await self .expand (current_node )
1254- logger .info (f"Successfully expanded node with { len (current_node .children )} children" )
1255- except Exception as e :
1256- logger .error (f"Error expanding node: { str (e )} " )
1257- current_node .is_terminal = True
1258-
1259- # Simulation: Evaluate the current path
1260- logger .info (f"\n Simulation Step:" )
1261- logger .info (f"Evaluating path of length { len (path ) - 1 } " )
1262- try :
1263- trajectory = []
1264- for node in path [1 :]: # Skip root node
1265- trajectory .append ({
1266- "natural_language_description" : node .natural_language_description ,
1267- "action" : node .action ,
1268- "feedback" : node .feedback
1269- })
1270-
1271- # Score the trajectory
1272- prompt = create_llm_prompt (trajectory , self .goal )
1273- result = score_trajectory_with_openai (prompt , openai_client , model = self .config .evaluation_model )
1274- score = result ["overall_score" ]
1275-
1276- logger .info (f"Simulation Results:" )
1277- logger .info (f"Overall Score: { score :.3f} " )
1278- logger .info (f"Efficiency Score: { result ['efficiency_score' ]:.3f} " )
1279- logger .info (f"Accuracy Score: { result ['accuracy_score' ]:.3f} " )
1280- logger .info (f"Robustness Score: { result ['robustness_score' ]:.3f} " )
1281-
1282- # Update best path if this score is better
1283- if score > best_score :
1284- best_score = score
1285- best_path = path
1286- logger .info (f"\n New best path found!" )
1287- logger .info (f"Previous best score: { best_score :.3f} " )
1288- logger .info (f"New best score: { score :.3f} " )
1289-
1290- # Reflection-based backpropagation
1291- if score < 0.75 : # If the path is not satisfactory
1292- logger .info (f"\n Reflection Step (Score { score :.3f} < 0.75):" )
1293- # Generate reflection prompt
1294- reflection_prompt = f"""Analyze the current trajectory and suggest improvements.
1295-
1296- Goal: { self .goal }
1297-
1298- Current Trajectory:
1299- { json .dumps (trajectory , indent = 2 )}
1300-
1301- Score: { score }
1302-
1303- Return a JSON response with:
1304- {{
1305- "backtrack_to_step": int, # Which step to backtrack to (0-based index)
1306- "reason": str, # Why backtrack to this step
1307- "suggested_improvements": [str] # List of suggested improvements
1308- }}"""
1309-
1310- try :
1311- reflection = openai_client .chat .completions .create (
1312- model = self .config .evaluation_model ,
1313- messages = [
1314- {"role" : "system" , "content" : "You are an expert at analyzing and improving search trajectories." },
1315- {"role" : "user" , "content" : reflection_prompt }
1316- ],
1317- response_format = {"type" : "json_object" }
1318- )
1319-
1320- reflection_result = json .loads (reflection .choices [0 ].message .content )
1321- backtrack_step = reflection_result ["backtrack_to_step" ]
1322-
1323- # Backtrack to the suggested step
1324- if 0 <= backtrack_step < len (path ):
1325- current_node = path [backtrack_step ]
1326- # Remove nodes after the backtrack point
1327- while len (path ) > backtrack_step + 1 :
1328- path .pop ()
1329- logger .info (f"Backtracking to step { backtrack_step } " )
1330- logger .info (f"Reason: { reflection_result ['reason' ]} " )
1331- logger .info ("Suggested improvements:" )
1332- for improvement in reflection_result ["suggested_improvements" ]:
1333- logger .info (f"- { improvement } " )
1334-
1335- except Exception as e :
1336- logger .error (f"Error in reflection: { str (e )} " )
1337-
1338- # If we've found a satisfactory solution, return it
1339- if score >= 0.75 :
1340- logger .info (f"\n Found satisfactory solution with score { score :.3f} " )
1341- return [{"action" : node .action } for node in path [1 :]]
1342-
1343- except Exception as e :
1344- logger .error (f"Error in simulation: { str (e )} " )
1345- continue
1346-
1347- # Update node statistics
1348- logger .info (f"\n Backpropagation Step:" )
1349- for node in path :
1350- old_value = node .value
1351- node .visits += 1
1352- node .value = (node .value * (node .visits - 1 ) + score ) / node .visits
1353- logger .info (f"Node { node .action } :" )
1354- logger .info (f" Visits: { node .visits } " )
1355- logger .info (f" Value: { old_value :.3f} -> { node .value :.3f} " )
1356-
1357- # If we've exhausted all iterations and haven't found a perfect solution,
1358- # return the best path we found
1359- if best_path :
1360- logger .info (f"\n Search complete. Returning best path found with score { best_score :.3f} " )
1361- return [{"action" : node .action } for node in best_path [1 :]]
1362-
1363- # If no path was found at all
1364- logger .warning ("\n No valid path found" )
1365- return []
1366-
1367- except Exception as e :
1368- error_msg = f"Error in RMCTS search: { str (e )} "
1369- logger .error (error_msg )
1370- if best_path :
1371- logger .info (f"\n Returning best path found before error with score { best_score :.3f} " )
1372- return [{"action" : node .action } for node in best_path [1 :]]
1373- return []
1374-
0 commit comments