warestack · dkargatzis · Nov 10, 2025 · Oct 1, 2025 · Oct 6, 2025 · Oct 10, 2025
@@ -0,0 +1,80 @@
+# =============================================================================
+# WATCHFLOW CONFIGURATION - AI Provider Abstraction Example
+# =============================================================================
+
+# GitHub Configuration (required)
+APP_NAME_GITHUB=your_app_name
+APP_CLIENT_ID_GITHUB=your_client_id
+APP_CLIENT_SECRET_GITHUB=your_client_secret
+PRIVATE_KEY_BASE64_GITHUB=your_private_key_base64
+REDIRECT_URI_GITHUB=your_redirect_uri
+WEBHOOK_SECRET_GITHUB=your_webhook_secret
+
+# =============================================================================
+# AI PROVIDER CONFIGURATION (NEW - PR #18)
+# =============================================================================
+
+# AI Provider Selection
+AI_PROVIDER=openai  # Options: openai, bedrock, garden
+
+# Common AI Settings (defaults for all agents)
+AI_MODEL=gpt-4.1-mini
+AI_MAX_TOKENS=4096
+AI_TEMPERATURE=0.1
+
+# OpenAI Configuration (when AI_PROVIDER=openai)
+OPENAI_API_KEY=your_openai_api_key_here
+
+# AWS Bedrock Configuration (when AI_PROVIDER=bedrock)
+# BEDROCK_REGION=us-east-1
+# BEDROCK_MODEL_ID=anthropic.claude-3-sonnet-20240229-v1:0
+# AWS_ACCESS_KEY_ID=your_aws_access_key
+# AWS_SECRET_ACCESS_KEY=your_aws_secret_key
+
+# GCP Model Garden Configuration (when AI_PROVIDER=garden)
+# GCP_PROJECT_ID=your-gcp-project-id
+# GCP_LOCATION=us-central1
+# GOOGLE_APPLICATION_CREDENTIALS=/path/to/service-account-key.json
+
+# =============================================================================
+# PER-AGENT AI CONFIGURATION (NEW - PR #18 Enhancement)
+# =============================================================================
+
+# Engine Agent Configuration
+AI_ENGINE_MAX_TOKENS=2000
+AI_ENGINE_TEMPERATURE=0.1
+
+# Feasibility Agent Configuration  
+AI_FEASIBILITY_MAX_TOKENS=4096
+AI_FEASIBILITY_TEMPERATURE=0.1
+
+# Acknowledgment Agent Configuration
+AI_ACKNOWLEDGMENT_MAX_TOKENS=2000
+AI_ACKNOWLEDGMENT_TEMPERATURE=0.1
+
+# =============================================================================
+# EXISTING CONFIGURATION
+# =============================================================================
+
+# LangSmith Configuration
+LANGCHAIN_TRACING_V2=false
+LANGCHAIN_ENDPOINT=https://api.smith.langchain.com
+LANGCHAIN_API_KEY=
+LANGCHAIN_PROJECT=watchflow-dev
+
+# CORS Configuration
+CORS_HEADERS=["*"]
+CORS_ORIGINS=["http://localhost:3000", "http://127.0.0.1:3000", "http://localhost:5500", "https://warestack.github.io", "https://watchflow.dev"]
+
+# Repository Configuration
+REPO_CONFIG_BASE_PATH=.watchflow
+REPO_CONFIG_RULES_FILE=rules.yaml
+
+# Logging Configuration
+LOG_LEVEL=INFO
+LOG_FORMAT=%(asctime)s - %(name)s - %(levelname)s - %(message)s
+LOG_FILE_PATH=
+
+# Development Settings
+DEBUG=false
+ENVIRONMENT=development
@@ -123,6 +123,10 @@ dependencies = [
     "langchain-openai>=0.0.5",
     "langgraph>=0.0.20",
     "openai>=1.3.0",
+    "langchain-aws>=0.2.34",
+    "boto3>=1.40.43",
+    "anthropic[vertex]>=0.69.0",
+    "langchain-google-vertexai>=2.1.2",
 ]
 
 [project.optional-dependencies]

@@ -8,8 +8,10 @@
 from langchain_core.messages import HumanMessage, SystemMessage
 from langgraph.graph import StateGraph
 
+from src.agents.acknowledgment_agent.models import AcknowledgmentContext, AcknowledgmentEvaluation
 from src.agents.acknowledgment_agent.prompts import create_evaluation_prompt, get_system_prompt
 from src.agents.base import AgentResult, BaseAgent
+from src.core.ai import get_chat_model
 
 logger = logging.getLogger(__name__)
 
@@ -27,7 +29,7 @@ class AcknowledgmentAgent(BaseAgent):
 
     def __init__(self, max_retries: int = 3, timeout: float = 30.0):
         # Call super class __init__ first
-        super().__init__(max_retries=max_retries)
+        super().__init__(max_retries=max_retries, agent_name="acknowledgment_agent")
         self.timeout = timeout
         logger.info(f"🧠 Acknowledgment agent initialized with timeout: {timeout}s")
 
@@ -36,7 +38,6 @@ def _build_graph(self) -> StateGraph:
         Build a simple LangGraph workflow for acknowledgment evaluation.
         Since this agent is primarily LLM-based, we create a minimal graph.
         """
-        from .models import AcknowledgmentContext
 
         # Create a simple state graph
         workflow = StateGraph(AcknowledgmentContext)
@@ -103,14 +104,8 @@ async def evaluate_acknowledgment(
             # Get LLM evaluation with structured output
             logger.info("🧠 Requesting LLM evaluation with structured output...")
 
-            # Use the same pattern as engine agent: direct structured output
-            from langchain_openai import ChatOpenAI
-
-            from src.core.config import config
-
-            from .models import AcknowledgmentEvaluation
-
-            llm = ChatOpenAI(api_key=config.ai.api_key, model=config.ai.model, max_tokens=2000, temperature=0.1)
+            # Use the same pattern as other agents: direct get_chat_model call
+            llm = get_chat_model(agent="acknowledgment_agent")
             structured_llm = llm.with_structured_output(AcknowledgmentEvaluation)
 
             messages = [SystemMessage(content=get_system_prompt()), HumanMessage(content=evaluation_prompt)]

@@ -5,7 +5,7 @@
 import asyncio
 import logging
 
-from .agent import AcknowledgmentAgent
+from src.agents.acknowledgment_agent.agent import AcknowledgmentAgent
 
 # Set up logging
 logging.basicConfig(level=logging.INFO)

@@ -7,9 +7,7 @@
 from abc import ABC, abstractmethod
 from typing import Any, TypeVar
 
-from langchain_openai import ChatOpenAI
-
-from src.core.config import config
+from src.core.ai import get_chat_model
 
 logger = logging.getLogger(__name__)
 
@@ -43,17 +41,13 @@ class BaseAgent(ABC):
     - Performance metrics tracking
     """
 
-    def __init__(self, max_retries: int = 3, retry_delay: float = 1.0):
+    def __init__(self, max_retries: int = 3, retry_delay: float = 1.0, agent_name: str | None = None):
         self.max_retries = max_retries
         self.retry_delay = retry_delay
-        self.llm = ChatOpenAI(
-            api_key=config.ai.api_key,
-            model=config.ai.model,
-            max_tokens=config.ai.max_tokens,
-            temperature=config.ai.temperature,
-        )
+        self.agent_name = agent_name
+        self.llm = get_chat_model(agent=agent_name)
         self.graph = self._build_graph()
-        logger.info(f"🔧 {self.__class__.__name__} initialized with max_retries={max_retries}")
+        logger.info(f"🔧 {self.__class__.__name__} initialized with max_retries={max_retries}, agent_name={agent_name}")
 
     @abstractmethod
     def _build_graph(self):

@@ -43,8 +43,8 @@ class RuleEngineAgent(BaseAgent):
     5. This provides 80% speed/cost savings while maintaining 100% flexibility
     """
 
-    def __init__(self, max_retries: int = 3, timeout: float = 60.0):
-        super().__init__(max_retries=max_retries)
+    def __init__(self, max_retries: int = 3, timeout: float = 300.0):
+        super().__init__(max_retries=max_retries, agent_name="engine_agent")
         self.timeout = timeout
 
         logger.info("🔧 Rule Engine agent initializing...")
@@ -111,7 +111,11 @@ async def execute(self, event_type: str, event_data: dict[str, Any], rules: list
             logger.info(f"🔧 Rule Engine evaluation completed in {execution_time:.2f}s")
 
             # Extract violations from result
-            violations = result.violations if hasattr(result, "violations") else []
+            violations = []
+            if isinstance(result, dict):
+                violations = result.get("violations", [])
+            elif hasattr(result, "violations"):
+                violations = result.violations
 
             logger.info(f"🔧 Rule Engine extracted {len(violations)} violations")
 

@@ -3,12 +3,12 @@
 """
 
 import asyncio
+import json
 import logging
 import time
 from typing import Any
 
 from langchain_core.messages import HumanMessage, SystemMessage
-from langchain_openai import ChatOpenAI
 
 from src.agents.engine_agent.models import (
     EngineState,
@@ -24,7 +24,7 @@
     create_validation_strategy_prompt,
     get_llm_evaluation_system_prompt,
 )
-from src.core.config import config
+from src.core.ai import get_chat_model
 from src.rules.validators import VALIDATOR_REGISTRY
 
 logger = logging.getLogger(__name__)
@@ -73,7 +73,7 @@ async def select_validation_strategy(state: EngineState) -> EngineState:
         logger.info(f"🎯 Selecting validation strategies for {len(state.rule_descriptions)} rules using LLM")
 
         # Use LLM to analyze rules and select validation strategies
-        llm = ChatOpenAI(api_key=config.ai.api_key, model=config.ai.model, max_tokens=2000, temperature=0.1)
+        llm = get_chat_model(agent="engine_agent")
 
         for rule_desc in state.rule_descriptions:
             # Create prompt for strategy selection
@@ -88,8 +88,23 @@ async def select_validation_strategy(state: EngineState) -> EngineState:
                 structured_llm = llm.with_structured_output(StrategySelectionResponse)
                 strategy_result = await structured_llm.ainvoke(messages)
 
-                rule_desc.validation_strategy = strategy_result.strategy
-                rule_desc.validator_name = strategy_result.validator_name
+                # Handle both structured response and BaseMessage cases
+                if hasattr(strategy_result, "strategy"):
+                    # It's a structured response
+                    rule_desc.validation_strategy = strategy_result.strategy
+                    rule_desc.validator_name = strategy_result.validator_name
+                else:
+                    # It's a BaseMessage, try to parse the content
+                    import json
+
+                    try:
+                        content = json.loads(strategy_result.content)
+                        rule_desc.validation_strategy = ValidationStrategy(content.get("strategy", "hybrid"))
+                        rule_desc.validator_name = content.get("validator_name")
+                    except (json.JSONDecodeError, ValueError):
+                        # Fallback to default values
+                        rule_desc.validation_strategy = ValidationStrategy.HYBRID
+                        rule_desc.validator_name = None
 
                 logger.info(f"🎯 Rule '{rule_desc.description[:50]}...' using {rule_desc.validation_strategy} strategy")
                 if rule_desc.validator_name:
@@ -183,7 +198,7 @@ async def execute_llm_fallback(state: EngineState) -> EngineState:
             return state
 
         # Execute LLM evaluations concurrently (with rate limiting)
-        llm = ChatOpenAI(api_key=config.ai.api_key, model=config.ai.model, max_tokens=2000, temperature=0.1)
+        llm = get_chat_model(agent="engine_agent")
 
         llm_tasks = []
         for rule_desc in llm_rules:
@@ -205,8 +220,12 @@ async def execute_llm_fallback(state: EngineState) -> EngineState:
                         state.analysis_steps.append(f"🧠 LLM failed: {rule_desc.description[:50]}...")
                     else:
                         if result.get("is_violated", False):
-                            state.violations.append(result.get("violation", {}))
+                            violation = result.get("violation", {})
+                            state.violations.append(violation)
                             state.analysis_steps.append(f"🧠 LLM violation: {rule_desc.description[:50]}...")
+                            logger.info(
+                                f"🚨 Violation detected: {rule_desc.description[:50]}... - {violation.get('message', 'No message')[:100]}..."
+                            )
                         else:
                             state.analysis_steps.append(f"🧠 LLM passed: {rule_desc.description[:50]}...")
 
@@ -267,7 +286,7 @@ async def _execute_single_validator(rule_desc: RuleDescription, event_data: dict
 
 
 async def _execute_single_llm_evaluation(
-    rule_desc: RuleDescription, event_data: dict[str, Any], event_type: str, llm: ChatOpenAI
+    rule_desc: RuleDescription, event_data: dict[str, Any], event_type: str, llm: Any
 ) -> dict[str, Any]:
     """Execute a single LLM evaluation."""
     start_time = time.time()
@@ -283,13 +302,45 @@ async def _execute_single_llm_evaluation(
 
         execution_time = (time.time() - start_time) * 1000
 
-        if evaluation_result.is_violated:
+        # Handle both structured response and BaseMessage cases
+        if hasattr(evaluation_result, "is_violated"):
+            # It's a structured response
+            is_violated = evaluation_result.is_violated
+            message = evaluation_result.message
+            details = evaluation_result.details
+            how_to_fix = evaluation_result.how_to_fix
+        else:
+            # It's a BaseMessage, try to parse the content
+            try:
+                content = json.loads(evaluation_result.content)
+                is_violated = content.get("is_violated", False)
+                message = content.get("message", "No message provided")
+                details = content.get("details", {})
+                how_to_fix = content.get("how_to_fix")
+            except (json.JSONDecodeError, ValueError) as e:
+                # Try to extract violation info from partial JSON
+                logger.warning(f"⚠️ Failed to parse LLM response for rule '{rule_desc.description[:30]}...': {e}")
+
+                # Check if we can extract basic violation info from truncated JSON
+                content_str = evaluation_result.content
+                if '"rule_violation": true' in content_str or '"is_violated": true' in content_str:
+                    is_violated = True
+                    message = "Rule violation detected (truncated response)"
+                    details = {"truncated": True, "raw_content": content_str[:500]}
+                    how_to_fix = "Review the rule requirements"
+                else:
+                    is_violated = False
+                    message = "Failed to parse LLM response"
+                    details = {}
+                    how_to_fix = None
+
+        if is_violated:
             violation = {
                 "rule_description": rule_desc.description,
                 "severity": rule_desc.severity,
-                "message": evaluation_result.message,
-                "details": evaluation_result.details,
-                "how_to_fix": evaluation_result.how_to_fix or "",
+                "message": message,
+                "details": details,
+                "how_to_fix": how_to_fix or "",
                 "docs_url": "",
                 "validation_strategy": ValidationStrategy.LLM_REASONING,
                 "execution_time_ms": execution_time,
@@ -301,7 +352,21 @@ async def _execute_single_llm_evaluation(
     except Exception as e:
         execution_time = (time.time() - start_time) * 1000
         logger.error(f"❌ LLM evaluation error for rule '{rule_desc.description[:50]}...': {e}")
-        return {"is_violated": False, "error": str(e), "execution_time_ms": execution_time}
+        return {
+            "is_violated": False,
+            "error": str(e),
+            "execution_time_ms": execution_time,
+            "violation": {
+                "rule_description": rule_desc.description,
+                "severity": rule_desc.severity,
+                "message": f"LLM evaluation failed: {str(e)}",
+                "details": {"error_type": type(e).__name__, "error_message": str(e)},
+                "how_to_fix": "Review the rule configuration and try again",
+                "docs_url": "",
+                "validation_strategy": ValidationStrategy.LLM_REASONING,
+                "execution_time_ms": execution_time,
+            },
+        }
 
 
 async def _generate_dynamic_how_to_fix(
@@ -310,7 +375,7 @@ async def _generate_dynamic_how_to_fix(
     """Generate dynamic 'how to fix' message using LLM."""
 
     try:
-        llm = ChatOpenAI(api_key=config.ai.api_key, model=config.ai.model, max_tokens=1000, temperature=0.1)
+        llm = get_chat_model(agent="engine_agent", max_tokens=1000)
 
         # Create prompt for how to fix generation
         how_to_fix_prompt = create_how_to_fix_prompt(rule_desc, event_data, validator_name)
@@ -325,7 +390,20 @@ async def _generate_dynamic_how_to_fix(
         structured_llm = llm.with_structured_output(HowToFixResponse)
         how_to_fix_result = await structured_llm.ainvoke(messages)
 
-        return how_to_fix_result.how_to_fix
+        # Handle both structured response and BaseMessage cases
+        if hasattr(how_to_fix_result, "how_to_fix"):
+            return how_to_fix_result.how_to_fix
+        else:
+            # It's a BaseMessage, try to parse the content
+            import json
+
+            try:
+                content = json.loads(how_to_fix_result.content)
+                return content.get(
+                    "how_to_fix", f"Review and address the requirements for rule: {rule_desc.description}"
+                )
+            except (json.JSONDecodeError, ValueError):
+                return f"Review and address the requirements for rule: {rule_desc.description}"
 
     except Exception as e:
         logger.error(f"❌ Error generating how to fix message: {e}")