diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 0000000..8b73604 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,42 @@ +name: Tests + +on: + push: + branches: [ main ] + pull_request: + branches: [ main ] + +jobs: + test: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.12"] + + steps: + - uses: actions/checkout@v4 + + - name: Install uv + uses: astral-sh/setup-uv@v3 + with: + version: "latest" + + - name: Set up Python ${{ matrix.python-version }} + run: uv python install ${{ matrix.python-version }} + + - name: Install dependencies + run: uv sync --all-extras + + - name: Run all tests + run: | + echo "Running unit tests..." + uv run pytest tests/unit/ -v --tb=short + echo "Running integration tests (mocked - no real API calls)..." + uv run pytest tests/integration/ -v --tb=short + + - name: Upload coverage reports + uses: codecov/codecov-action@v4 + if: matrix.python-version == '3.12' + with: + file: ./coverage.xml + fail_ci_if_error: false diff --git a/.gitignore b/.gitignore index eef4aab..acc6a5d 100644 --- a/.gitignore +++ b/.gitignore @@ -174,6 +174,7 @@ PLANNING.md .pdm-build/ .ruff_cache/ .vscode/ +.kiro # Copilot .github/instructions/ diff --git a/README.md b/README.md index e17d4d4..26ea2fa 100644 --- a/README.md +++ b/README.md @@ -113,6 +113,48 @@ rules: 2. **Try acknowledgment workflow**: Comment `@watchflow acknowledge` when rules are violated 3. **Verify rule enforcement**: Check that blocking rules prevent merging +## ๐Ÿงช Testing + +The project includes comprehensive tests that run **without making real API calls** by default: + +### Running Tests + +```bash +# Run all tests (mocked - no API costs) +pytest + +# Run only unit tests (very fast) +pytest tests/unit/ + +# Run only integration tests (mocked) +pytest tests/integration/ +``` + +### Test Structure + +``` +tests/ +โ”œโ”€โ”€ unit/ # โšก Fast unit tests (mocked OpenAI) +โ”‚ โ””โ”€โ”€ test_feasibility_agent.py +โ””โ”€โ”€ integration/ # ๐ŸŒ Full HTTP stack tests (mocked OpenAI) + โ””โ”€โ”€ test_rules_api.py +``` + +### Real API Testing (Local Development Only) + +If you want to test with **real OpenAI API calls** locally: + +```bash +# Set environment variables +export OPENAI_API_KEY="your-api-key" +export INTEGRATION_TEST_REAL_API=true + +# Run integration tests with real API calls (costs money!) +pytest tests/integration/ -m integration +``` + +**โš ๏ธ Warning:** Real API tests make actual OpenAI calls and will cost money. They're disabled by default in CI/CD. + ## Configuration For advanced configuration options, see the [Configuration Guide](docs/getting-started/configuration.md). diff --git a/pyproject.toml b/pyproject.toml index 50cabe7..a80a335 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -200,13 +200,14 @@ python_functions = ["test_*"] addopts = [ "--strict-markers", "--strict-config", - "--cov=backend", + "--cov=src", "--cov-report=term-missing", "--cov-report=html", "--cov-report=xml", ] asyncio_mode = "auto" + [tool.coverage.run] source = ["backend"] omit = [ diff --git a/src/agents/feasibility_agent/__init__.py b/src/agents/feasibility_agent/__init__.py index 9b8fd20..701b27f 100644 --- a/src/agents/feasibility_agent/__init__.py +++ b/src/agents/feasibility_agent/__init__.py @@ -6,6 +6,5 @@ """ from .agent import RuleFeasibilityAgent -from .models import FeasibilityResult -__all__ = ["RuleFeasibilityAgent", "FeasibilityResult"] +__all__ = ["RuleFeasibilityAgent"] diff --git a/src/agents/feasibility_agent/agent.py b/src/agents/feasibility_agent/agent.py index 20606d0..ad7d079 100644 --- a/src/agents/feasibility_agent/agent.py +++ b/src/agents/feasibility_agent/agent.py @@ -8,7 +8,7 @@ from src.agents.base import AgentResult, BaseAgent -from .models import FeasibilityResult, FeasibilityState +from .models import FeasibilityState from .nodes import analyze_rule_feasibility, generate_yaml_config logger = logging.getLogger(__name__) @@ -27,11 +27,19 @@ def _build_graph(self) -> StateGraph: workflow.add_node("analyze_feasibility", analyze_rule_feasibility) workflow.add_node("generate_yaml", generate_yaml_config) - # Add edges + # Add edges with conditional logic workflow.add_edge(START, "analyze_feasibility") - workflow.add_edge("analyze_feasibility", "generate_yaml") + + # Conditional edge: only generate YAML if feasible + workflow.add_conditional_edges( + "analyze_feasibility", + lambda state: "generate_yaml" if state.is_feasible else END, + {"generate_yaml": "generate_yaml", END: END}, + ) + workflow.add_edge("generate_yaml", END) + logger.info("๐Ÿ”ง FeasibilityAgent graph built with conditional structured output workflow") return workflow.compile() async def execute(self, rule_description: str) -> AgentResult: @@ -39,39 +47,33 @@ async def execute(self, rule_description: str) -> AgentResult: Check if a rule description is feasible and return YAML or feedback. """ try: + logger.info(f"๐Ÿš€ Starting feasibility analysis for rule: {rule_description[:100]}...") + # Prepare initial state initial_state = FeasibilityState(rule_description=rule_description) # Run the graph result = await self.graph.ainvoke(initial_state) + # Convert dict result back to FeasibilityState if needed + if isinstance(result, dict): + result = FeasibilityState(**result) + + logger.info(f"โœ… Feasibility analysis completed: feasible={result.is_feasible}, type={result.rule_type}") + # Convert to AgentResult return AgentResult( - success=result.get("is_feasible", False), - message=result.get("feedback", ""), + success=result.is_feasible, + message=result.feedback, data={ - "is_feasible": result.get("is_feasible", False), - "yaml_content": result.get("yaml_content", ""), - "confidence_score": result.get("confidence_score", 0.0), - "rule_type": result.get("rule_type", ""), - "analysis_steps": result.get("analysis_steps", []), + "is_feasible": result.is_feasible, + "yaml_content": result.yaml_content, + "confidence_score": result.confidence_score, + "rule_type": result.rule_type, + "analysis_steps": result.analysis_steps, }, ) except Exception as e: - logger.error(f"Error in rule feasibility check: {e}") + logger.error(f"โŒ Error in rule feasibility check: {e}") return AgentResult(success=False, message=f"Feasibility check failed: {str(e)}", data={}) - - async def check_feasibility(self, rule_description: str) -> FeasibilityResult: - """ - Legacy method for backwards compatibility. - """ - result = await self.execute(rule_description) - - return FeasibilityResult( - is_feasible=result.data.get("is_feasible", False), - yaml_content=result.data.get("yaml_content", ""), - feedback=result.message, - confidence_score=result.data.get("confidence_score"), - rule_type=result.data.get("rule_type"), - ) diff --git a/src/agents/feasibility_agent/models.py b/src/agents/feasibility_agent/models.py index 8e720e7..d28c452 100644 --- a/src/agents/feasibility_agent/models.py +++ b/src/agents/feasibility_agent/models.py @@ -5,14 +5,20 @@ from pydantic import BaseModel, Field -class FeasibilityResult(BaseModel): - """Result of checking if a rule is feasible.""" - - is_feasible: bool - yaml_content: str - feedback: str - confidence_score: float | None = None - rule_type: str | None = None +class FeasibilityAnalysis(BaseModel): + """Structured output model for rule feasibility analysis.""" + + is_feasible: bool = Field(description="Whether the rule is feasible to implement with Watchflow") + rule_type: str = Field(description="Type of rule (time_restriction, branch_pattern, title_pattern, etc.)") + confidence_score: float = Field(description="Confidence score from 0.0 to 1.0", ge=0.0, le=1.0) + feedback: str = Field(description="Detailed feedback on implementation considerations") + analysis_steps: list[str] = Field(description="Step-by-step analysis breakdown", default_factory=list) + + +class YamlGeneration(BaseModel): + """Structured output model for YAML configuration generation.""" + + yaml_content: str = Field(description="Generated Watchflow YAML rule configuration") class FeasibilityState(BaseModel): diff --git a/src/agents/feasibility_agent/nodes.py b/src/agents/feasibility_agent/nodes.py index ba40154..3aaf4cd 100644 --- a/src/agents/feasibility_agent/nodes.py +++ b/src/agents/feasibility_agent/nodes.py @@ -2,25 +2,24 @@ LangGraph nodes for the Rule Feasibility Agent. """ -import json import logging from langchain_openai import ChatOpenAI from src.core.config import config -from .models import FeasibilityState +from .models import FeasibilityAnalysis, FeasibilityState, YamlGeneration from .prompts import RULE_FEASIBILITY_PROMPT, YAML_GENERATION_PROMPT logger = logging.getLogger(__name__) -def analyze_rule_feasibility(state: FeasibilityState) -> FeasibilityState: +async def analyze_rule_feasibility(state: FeasibilityState) -> FeasibilityState: """ - Analyze whether a rule description is feasible to implement. + Analyze whether a rule description is feasible to implement using structured output. """ try: - # Create LLM client directly using centralized config + # Create LLM client with structured output llm = ChatOpenAI( api_key=config.ai.api_key, model=config.ai.model, @@ -28,76 +27,46 @@ def analyze_rule_feasibility(state: FeasibilityState) -> FeasibilityState: temperature=config.ai.temperature, ) + # Use structured output instead of manual JSON parsing + structured_llm = llm.with_structured_output(FeasibilityAnalysis) + # Analyze rule feasibility prompt = RULE_FEASIBILITY_PROMPT.format(rule_description=state.rule_description) - response = llm.invoke(prompt) - - # Log the raw response for debugging - logger.info(f"Raw LLM response: {response.content}") - - # Check if response is empty - if not response.content or response.content.strip() == "": - logger.error("LLM returned empty response") - state.is_feasible = False - state.feedback = "Analysis failed: LLM returned empty response" - return state - - # Try to parse JSON with better error handling - try: - result = json.loads(response.content.strip()) - except json.JSONDecodeError as json_error: - logger.error(f"Failed to parse JSON response: {json_error}") - logger.error(f"Response content: {response.content}") - - # Try to extract JSON from markdown code blocks if present - content = response.content.strip() - if content.startswith("```json"): - content = content[7:] # Remove ```json - elif content.startswith("```"): - content = content[3:] # Remove ``` - if content.endswith("```"): - content = content[:-3] # Remove trailing ``` - - try: - result = json.loads(content.strip()) - logger.info("Successfully extracted JSON from markdown code blocks") - except json.JSONDecodeError: - # If all parsing attempts fail, set default values - logger.error("All JSON parsing attempts failed") - state.is_feasible = False - state.feedback = ( - f"Analysis failed: Could not parse LLM response as JSON. Raw response: {response.content[:200]}..." - ) - return state - - # Update state with analysis results - state.is_feasible = result.get("is_feasible", False) - state.rule_type = result.get("rule_type", "") - state.confidence_score = result.get("confidence_score", 0.0) - state.yaml_content = result.get("yaml_content", "") - state.feedback = result.get("feedback", "") - state.analysis_steps = result.get("analysis_steps", []) - - logger.info(f"Rule feasibility analysis completed: {state.is_feasible}") + # Get structured response - no more JSON parsing needed! + result = await structured_llm.ainvoke(prompt) + + # Update state with analysis results - now type-safe! + state.is_feasible = result.is_feasible + state.rule_type = result.rule_type + state.confidence_score = result.confidence_score + state.feedback = result.feedback + state.analysis_steps = result.analysis_steps + + logger.info(f"๐Ÿ” Rule feasibility analysis completed: {state.is_feasible}") + logger.info(f"๐Ÿ” Rule type identified: {state.rule_type}") + logger.info(f"๐Ÿ” Confidence score: {state.confidence_score}") except Exception as e: - logger.error(f"Error in rule feasibility analysis: {e}") + logger.error(f"โŒ Error in rule feasibility analysis: {e}") state.is_feasible = False state.feedback = f"Analysis failed: {str(e)}" + state.confidence_score = 0.0 return state -def generate_yaml_config(state: FeasibilityState) -> FeasibilityState: +async def generate_yaml_config(state: FeasibilityState) -> FeasibilityState: """ - Generate YAML configuration for feasible rules. + Generate YAML configuration for feasible rules using structured output. + This node only runs if the rule is feasible. """ if not state.is_feasible or not state.rule_type: + logger.info("๐Ÿ”ง Skipping YAML generation - rule not feasible or no rule type") return state try: - # Create LLM client directly using centralized config + # Create LLM client with structured output llm = ChatOpenAI( api_key=config.ai.api_key, model=config.ai.model, @@ -105,15 +74,22 @@ def generate_yaml_config(state: FeasibilityState) -> FeasibilityState: temperature=config.ai.temperature, ) + # Use structured output for YAML generation + structured_llm = llm.with_structured_output(YamlGeneration) + prompt = YAML_GENERATION_PROMPT.format(rule_type=state.rule_type, rule_description=state.rule_description) - response = llm.invoke(prompt) - state.yaml_content = response.content.strip() + # Get structured response + result = await structured_llm.ainvoke(prompt) + + # Update state with generated YAML + state.yaml_content = result.yaml_content.strip() - logger.info(f"YAML configuration generated for rule type: {state.rule_type}") + logger.info(f"๐Ÿ”ง YAML configuration generated for rule type: {state.rule_type}") + logger.info(f"๐Ÿ”ง Generated YAML length: {len(state.yaml_content)} characters") except Exception as e: - logger.error(f"Error generating YAML configuration: {e}") + logger.error(f"โŒ Error generating YAML configuration: {e}") state.feedback += f"\nYAML generation failed: {str(e)}" return state diff --git a/src/agents/feasibility_agent/prompts.py b/src/agents/feasibility_agent/prompts.py index e84bd91..e769449 100644 --- a/src/agents/feasibility_agent/prompts.py +++ b/src/agents/feasibility_agent/prompts.py @@ -10,28 +10,17 @@ Please analyze this rule and determine: 1. Is it feasible to implement with Watchflow's rule system? 2. What type of rule is it (time restriction, branch pattern, approval requirement, etc.)? -3. Generate appropriate Watchflow YAML configuration if feasible -4. Provide feedback on implementation considerations +3. Provide feedback on implementation considerations Consider the following rule types: -- Time restrictions (weekends, holidays, specific hours) -- Branch naming conventions and patterns -- PR title patterns and requirements -- Label requirements -- File size limits -- Approval requirements -- Commit message conventions -- Branch protection rules - -Respond in the following JSON format: -{{ - "is_feasible": boolean, - "rule_type": "string", - "confidence_score": float (0.0-1.0), - "yaml_content": "string (if feasible)", - "feedback": "string", - "analysis_steps": ["step1", "step2", ...] -}} +- time_restriction: Rules about when actions can occur (weekends, hours, days) +- branch_pattern: Rules about branch naming conventions +- title_pattern: Rules about PR title formatting +- label_requirement: Rules requiring specific labels +- file_size: Rules about file size limits +- approval_requirement: Rules about required approvals +- commit_message: Rules about commit message format +- branch_protection: Rules about protected branches FEEDBACK GUIDELINES: Keep feedback concise and practical. Focus on: @@ -42,6 +31,8 @@ - Severity and enforcement level recommendations Keep feedback under 200 words and avoid technical jargon. + +Provide your analysis with step-by-step reasoning in the analysis_steps field. """ RULE_TYPE_ANALYSIS_PROMPT = """ @@ -99,5 +90,5 @@ - commit_message: use "pattern" parameter with regex pattern - branch_protection: use "protected_branches" parameter with array of branch names -Generate ONLY the YAML rule configuration, no explanations or additional text. +Return only the YAML rule configuration content. """ diff --git a/src/api/rules.py b/src/api/rules.py index 78857a8..653cfba 100644 --- a/src/api/rules.py +++ b/src/api/rules.py @@ -16,8 +16,12 @@ async def evaluate_rule(request: RuleEvaluationRequest): # Create agent instance (uses centralized config) agent = RuleFeasibilityAgent() - # Use the new method signature - result = await agent.check_feasibility(rule_description=request.rule_text) + # Use the execute method + result = await agent.execute(rule_description=request.rule_text) # Return the result in the expected format - return {"supported": result.is_feasible, "snippet": result.yaml_content, "feedback": result.feedback} + return { + "supported": result.data.get("is_feasible", False), + "snippet": result.data.get("yaml_content", ""), + "feedback": result.message, + } diff --git a/src/api/scheduler.py b/src/api/scheduler.py index eb17a5c..5cdfaa8 100644 --- a/src/api/scheduler.py +++ b/src/api/scheduler.py @@ -2,7 +2,7 @@ from fastapi import APIRouter, BackgroundTasks -from src.tasks.scheduler.deployment_scheduler import deployment_scheduler +from src.tasks.scheduler.deployment_scheduler import get_deployment_scheduler router = APIRouter() @@ -10,18 +10,18 @@ @router.get("/status") async def get_scheduler_status() -> dict[str, Any]: """Get scheduler status and pending deployments.""" - return deployment_scheduler.get_status() + return get_deployment_scheduler().get_status() @router.post("/check-deployments") async def check_pending_deployments(background_tasks: BackgroundTasks): """Manually re-evaluate the status of pending deployments.""" - background_tasks.add_task(deployment_scheduler._check_pending_deployments) + background_tasks.add_task(get_deployment_scheduler()._check_pending_deployments) return {"status": "scheduled", "message": "Deployment statuses will be updated on GitHub accordingly."} @router.get("/pending-deployments") async def get_pending_deployments(): """Get list of pending deployments.""" - status = deployment_scheduler.get_status() + status = get_deployment_scheduler().get_status() return {"pending_count": status["pending_count"], "deployments": status["pending_deployments"]} diff --git a/src/core/config.py b/src/core/config.py index 2580c17..0cced6b 100644 --- a/src/core/config.py +++ b/src/core/config.py @@ -97,7 +97,10 @@ def __init__(self): # CORS configuration cors_headers = os.getenv("CORS_HEADERS", '["*"]') - cors_origins = os.getenv("CORS_ORIGINS", '["http://localhost:3000", "http://127.0.0.1:3000"]') + cors_origins = os.getenv( + "CORS_ORIGINS", + '["http://localhost:3000", "http://127.0.0.1:3000", "http://localhost:5500", "https://warestack.github.io"]', + ) try: self.cors = CORSConfig( @@ -108,7 +111,12 @@ def __init__(self): # Fallback to default values if JSON parsing fails self.cors = CORSConfig( headers=["*"], - origins=["http://localhost:3000", "http://127.0.0.1:3000", "http://localhost:5500"], + origins=[ + "http://localhost:3000", + "http://127.0.0.1:3000", + "http://localhost:5500", + "https://warestack.github.io", + ], ) self.repo_config = RepoConfig( diff --git a/src/event_processors/deployment_protection_rule.py b/src/event_processors/deployment_protection_rule.py index 5dbeb92..4bad3d3 100644 --- a/src/event_processors/deployment_protection_rule.py +++ b/src/event_processors/deployment_protection_rule.py @@ -4,7 +4,7 @@ from src.agents.engine_agent.agent import RuleEngineAgent from src.event_processors.base import BaseEventProcessor, ProcessingResult -from src.tasks.scheduler.deployment_scheduler import deployment_scheduler +from src.tasks.scheduler.deployment_scheduler import get_deployment_scheduler from src.tasks.task_queue import Task logger = logging.getLogger(__name__) @@ -115,7 +115,7 @@ async def process(self, task: Task) -> ProcessingResult: else: time_based_violations = self._check_time_based_violations(violations) if time_based_violations: - await deployment_scheduler.add_pending_deployment( + await get_deployment_scheduler().add_pending_deployment( { "deployment_id": deployment_id, "repo": task.repo_full_name, diff --git a/src/main.py b/src/main.py index 7b7df7d..d3f96f3 100644 --- a/src/main.py +++ b/src/main.py @@ -1,5 +1,6 @@ import asyncio import logging +from contextlib import asynccontextmanager from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware @@ -8,7 +9,7 @@ from src.api.scheduler import router as scheduler_api_router from src.core.config import config from src.core.models import EventType -from src.tasks.scheduler.deployment_scheduler import deployment_scheduler +from src.tasks.scheduler.deployment_scheduler import get_deployment_scheduler from src.tasks.task_queue import task_queue from src.webhooks.dispatcher import dispatcher from src.webhooks.handlers.check_run import CheckRunEventHandler @@ -28,50 +29,18 @@ format="%(asctime)s %(levelname)8s %(message)s", ) -app = FastAPI( - title="Watchflow", - description="Agentic GitHub Guardrails.", - version="0.1.0", -) - -# --- CORS Configuration --- - -app.add_middleware( - CORSMiddleware, - allow_origins=config.cors.origins, - allow_credentials=True, - allow_methods=["*"], - allow_headers=config.cors.headers, -) - -# --- Include Routers --- - -app.include_router(webhook_router, prefix="/webhooks", tags=["GitHub Webhooks"]) -app.include_router(rules_api_router, prefix="/api/v1", tags=["Public API"]) -app.include_router(scheduler_api_router, prefix="/api/v1/scheduler", tags=["Scheduler API"]) - -# --- Root Endpoint --- - - -@app.get("/", tags=["Health Check"]) -async def read_root(): - """A simple health check endpoint to confirm the service is running.""" - return {"status": "ok", "message": "Watchflow agents are running."} - - -# --- Application Lifecycle --- - -@app.on_event("startup") -async def startup_event(): - """Application startup logic.""" +@asynccontextmanager +async def lifespan(_app: FastAPI): + """Application lifespan manager for startup and shutdown logic.""" + # Startup logic print("Watchflow application starting up...") # Start background task workers await task_queue.start_workers(num_workers=5) # Start deployment scheduler - await deployment_scheduler.start() + await get_deployment_scheduler().start() # Register event handlers pull_request_handler = PullRequestEventHandler() @@ -95,17 +64,16 @@ async def startup_event(): print("Event handlers registered, background workers started, and deployment scheduler started.") # Start the deployment scheduler - asyncio.create_task(deployment_scheduler.start_background_scheduler()) + asyncio.create_task(get_deployment_scheduler().start_background_scheduler()) logging.info("๐Ÿš€ Deployment scheduler started") + yield -@app.on_event("shutdown") -async def shutdown_event(): - """Application shutdown logic.""" + # Shutdown logic print("Watchflow application shutting down...") # Stop deployment scheduler - await deployment_scheduler.stop() + await get_deployment_scheduler().stop() # Stop background workers await task_queue.stop_workers() @@ -113,6 +81,38 @@ async def shutdown_event(): print("Background workers and deployment scheduler stopped.") +app = FastAPI( + title="Watchflow", + description="Agentic GitHub Guardrails.", + version="0.1.0", + lifespan=lifespan, +) + +# --- CORS Configuration --- + +app.add_middleware( + CORSMiddleware, + allow_origins=config.cors.origins, + allow_credentials=True, + allow_methods=["*"], + allow_headers=config.cors.headers, +) + +# --- Include Routers --- + +app.include_router(webhook_router, prefix="/webhooks", tags=["GitHub Webhooks"]) +app.include_router(rules_api_router, prefix="/api/v1", tags=["Public API"]) +app.include_router(scheduler_api_router, prefix="/api/v1/scheduler", tags=["Scheduler API"]) + +# --- Root Endpoint --- + + +@app.get("/", tags=["Health Check"]) +async def read_root(): + """A simple health check endpoint to confirm the service is running.""" + return {"status": "ok", "message": "Watchflow agents are running."} + + # --- Health Check Endpoints --- @@ -140,4 +140,4 @@ async def health_tasks(): @app.get("/health/scheduler", tags=["Health Check"]) async def health_scheduler(): """Check the status of the deployment scheduler.""" - return deployment_scheduler.get_status() + return get_deployment_scheduler().get_status() diff --git a/src/tasks/scheduler/deployment_scheduler.py b/src/tasks/scheduler/deployment_scheduler.py index 203e6fe..fd3ba02 100644 --- a/src/tasks/scheduler/deployment_scheduler.py +++ b/src/tasks/scheduler/deployment_scheduler.py @@ -16,8 +16,15 @@ def __init__(self): self.running = False self.pending_deployments: list[dict[str, Any]] = [] self.scheduler_task = None - # Create instance of RuleAnalysisAgent - self.engine_agent = RuleEngineAgent() + # Lazy-load engine agent to avoid API key validation at import time + self._engine_agent = None + + @property + def engine_agent(self) -> RuleEngineAgent: + """Lazy-load the engine agent to avoid API key validation at import time.""" + if self._engine_agent is None: + self._engine_agent = RuleEngineAgent() + return self._engine_agent async def start(self): """Start the scheduler.""" @@ -351,5 +358,13 @@ async def stop_background_scheduler(self): await self.stop() -# Global instance -deployment_scheduler = DeploymentScheduler() +# Global instance - lazy loaded to avoid API key validation at import time +deployment_scheduler = None + + +def get_deployment_scheduler() -> DeploymentScheduler: + """Get the global deployment scheduler instance, creating it if needed.""" + global deployment_scheduler + if deployment_scheduler is None: + deployment_scheduler = DeploymentScheduler() + return deployment_scheduler diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..d4839a6 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ +# Tests package diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py new file mode 100644 index 0000000..a265048 --- /dev/null +++ b/tests/integration/__init__.py @@ -0,0 +1 @@ +# Integration tests package diff --git a/tests/integration/test_rules_api.py b/tests/integration/test_rules_api.py new file mode 100644 index 0000000..0b23cbb --- /dev/null +++ b/tests/integration/test_rules_api.py @@ -0,0 +1,117 @@ +""" +Integration tests for the rules API endpoint. +These tests verify the complete HTTP stack but mock OpenAI calls by default. +Set INTEGRATION_TEST_REAL_API=true to make real OpenAI calls. +""" + +import os +from unittest.mock import patch + +import pytest +from fastapi.testclient import TestClient + +from src.agents.base import AgentResult +from src.main import app + + +class TestRulesAPIIntegration: + """Integration test suite for the rules API with mocked external calls (safe for CI).""" + + @pytest.fixture + def client(self): + """Create test client.""" + return TestClient(app) + + def test_evaluate_feasible_rule_integration(self, client): + """Test successful rule evaluation through the complete stack (mocked OpenAI).""" + # Mock OpenAI unless real API testing is explicitly enabled + if not os.getenv("INTEGRATION_TEST_REAL_API", "false").lower() == "true": + with ( + patch("src.agents.base.BaseAgent._validate_config"), + patch("src.agents.base.BaseAgent._create_llm_client"), + patch("src.agents.feasibility_agent.agent.RuleFeasibilityAgent.execute") as mock_execute, + ): + # Mock the agent result directly + mock_result = AgentResult( + success=True, + message="Rule is feasible and can be implemented.", + data={ + "is_feasible": True, + "rule_type": "time_restriction", + "confidence_score": 0.9, + "yaml_content": """- id: "no-deployments-weekends" + name: "No Weekend Deployments" + description: "Prevent deployments on weekends" + enabled: true + severity: "high" + event_types: ["deployment"] + parameters: + days: ["saturday", "sunday"]""", + "analysis_steps": ["Analyzed rule feasibility", "Generated YAML configuration"], + }, + ) + mock_execute.return_value = mock_result + + response = client.post("/api/v1/rules/evaluate", json={"rule_text": "No deployments on weekends"}) + else: + # Real API call - requires OPENAI_API_KEY + if not os.getenv("OPENAI_API_KEY"): + pytest.skip("Real API testing enabled but OPENAI_API_KEY not set") + + response = client.post("/api/v1/rules/evaluate", json={"rule_text": "No deployments on weekends"}) + + assert response.status_code == 200 + data = response.json() + assert data["supported"] is True + assert len(data["snippet"]) > 0 + assert "weekend" in data["snippet"].lower() or "saturday" in data["snippet"].lower() + assert len(data["feedback"]) > 0 + + def test_evaluate_unfeasible_rule_integration(self, client): + """Test unfeasible rule evaluation through the complete stack (mocked OpenAI).""" + # Mock OpenAI unless real API testing is explicitly enabled + if not os.getenv("INTEGRATION_TEST_REAL_API", "false").lower() == "true": + with ( + patch("src.agents.base.BaseAgent._validate_config"), + patch("src.agents.base.BaseAgent._create_llm_client"), + patch("src.agents.feasibility_agent.agent.RuleFeasibilityAgent.execute") as mock_execute, + ): + # Mock the agent result directly + mock_result = AgentResult( + success=False, + message="Rule is not feasible.", + data={ + "is_feasible": False, + "rule_type": "undefined", + "confidence_score": 0.1, + "yaml_content": "", + "analysis_steps": ["Analyzed rule feasibility", "Determined rule is not implementable"], + }, + ) + mock_execute.return_value = mock_result + + response = client.post( + "/api/v1/rules/evaluate", json={"rule_text": "This rule is completely impossible to implement"} + ) + else: + # Real API call - requires OPENAI_API_KEY + if not os.getenv("OPENAI_API_KEY"): + pytest.skip("Real API testing enabled but OPENAI_API_KEY not set") + + response = client.post( + "/api/v1/rules/evaluate", json={"rule_text": "This rule is completely impossible to implement"} + ) + + assert response.status_code == 200 + data = response.json() + # Note: For mocked tests, we control the response, for real API this might vary + if not os.getenv("INTEGRATION_TEST_REAL_API", "false").lower() == "true": + assert data["supported"] is False + assert data["snippet"] == "" + assert len(data["feedback"]) > 0 + + def test_evaluate_rule_missing_text_integration(self, client): + """Test API validation for missing rule text (no external API calls needed).""" + response = client.post("/api/v1/rules/evaluate", json={}) + + assert response.status_code == 422 # Validation error diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/unit/test_feasibility_agent.py b/tests/unit/test_feasibility_agent.py new file mode 100644 index 0000000..cd14ed4 --- /dev/null +++ b/tests/unit/test_feasibility_agent.py @@ -0,0 +1,249 @@ +""" +Unit tests for the Rule Feasibility Agent with structured output. +These tests mock external dependencies (OpenAI API) for fast, isolated testing. +""" + +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from src.agents.feasibility_agent.agent import RuleFeasibilityAgent +from src.agents.feasibility_agent.models import FeasibilityAnalysis, YamlGeneration + + +class TestRuleFeasibilityAgent: + """Test suite for RuleFeasibilityAgent with structured output.""" + + @pytest.fixture + def agent(self): + """Create agent instance for testing.""" + # Mock both config validation and LLM client creation to avoid requiring API key + with ( + patch("src.agents.base.BaseAgent._validate_config"), + patch("src.agents.base.BaseAgent._create_llm_client", return_value=MagicMock()), + ): + return RuleFeasibilityAgent() + + @pytest.fixture + def mock_feasible_analysis(self): + """Mock successful feasibility analysis.""" + return FeasibilityAnalysis( + is_feasible=True, + rule_type="time_restriction", + confidence_score=0.95, + feedback="This rule can be implemented using Watchflow's time restriction feature.", + analysis_steps=[ + "Identified rule as time-based restriction", + "Confirmed Watchflow supports time restrictions", + "Mapped to deployment event with weekend exclusion", + ], + ) + + @pytest.fixture + def mock_unfeasible_analysis(self): + """Mock unsuccessful feasibility analysis.""" + return FeasibilityAnalysis( + is_feasible=False, + rule_type="undefined", + confidence_score=1.0, + feedback="This rule cannot be implemented as it lacks actionable criteria.", + analysis_steps=[ + "Analyzed rule description", + "Found no actionable conditions", + "Determined rule is not implementable", + ], + ) + + @pytest.fixture + def mock_yaml_generation(self): + """Mock YAML generation result.""" + return YamlGeneration( + yaml_content="""- id: "no-deployments-weekends" + name: "No Weekend Deployments" + description: "Prevent deployments on weekends" + enabled: true + severity: "high" + event_types: ["deployment"] + parameters: + days: ["saturday", "sunday"]""" + ) + + @pytest.mark.asyncio + async def test_feasible_rule_execution(self, agent, mock_feasible_analysis, mock_yaml_generation): + """Test successful execution of a feasible rule.""" + with patch("src.agents.feasibility_agent.nodes.ChatOpenAI") as mock_openai: + # Mock the structured LLM calls + mock_analysis_llm = AsyncMock() + mock_analysis_llm.ainvoke.return_value = mock_feasible_analysis + + mock_yaml_llm = AsyncMock() + mock_yaml_llm.ainvoke.return_value = mock_yaml_generation + + mock_openai.return_value.with_structured_output.side_effect = [ + mock_analysis_llm, # First call for analysis + mock_yaml_llm, # Second call for YAML + ] + + # Execute the agent + result = await agent.execute("No deployments on weekends") + + # Assertions + assert result.success is True + assert result.data["is_feasible"] is True + assert result.data["rule_type"] == "time_restriction" + assert result.data["confidence_score"] == 0.95 + assert "weekend" in result.data["yaml_content"].lower() + assert len(result.data["analysis_steps"]) == 3 + + # Verify both LLM calls were made (analysis + YAML) + assert mock_analysis_llm.ainvoke.call_count == 1 + assert mock_yaml_llm.ainvoke.call_count == 1 + + @pytest.mark.asyncio + async def test_unfeasible_rule_execution(self, agent, mock_unfeasible_analysis): + """Test execution of an unfeasible rule (should skip YAML generation).""" + with patch("src.agents.feasibility_agent.nodes.ChatOpenAI") as mock_openai: + # Mock only the analysis LLM call + mock_analysis_llm = AsyncMock() + mock_analysis_llm.ainvoke.return_value = mock_unfeasible_analysis + + mock_openai.return_value.with_structured_output.return_value = mock_analysis_llm + + # Execute the agent + result = await agent.execute("This is impossible to implement") + + # Assertions + assert result.success is False # Success should be False for unfeasible rules + assert result.data["is_feasible"] is False + assert result.data["rule_type"] == "undefined" + assert result.data["confidence_score"] == 1.0 + assert result.data["yaml_content"] == "" # No YAML should be generated + + # Verify only analysis LLM call was made (no YAML generation) + assert mock_analysis_llm.ainvoke.call_count == 1 + + @pytest.mark.asyncio + async def test_error_handling_in_analysis(self, agent): + """Test error handling when analysis fails.""" + with patch("src.agents.feasibility_agent.nodes.ChatOpenAI") as mock_openai: + # Mock LLM to raise an exception + mock_analysis_llm = AsyncMock() + mock_analysis_llm.ainvoke.side_effect = Exception("OpenAI API error") + + mock_openai.return_value.with_structured_output.return_value = mock_analysis_llm + + # Execute the agent + result = await agent.execute("Test rule") + + # Assertions + assert result.success is False + assert "Analysis failed" in result.message + assert result.data["is_feasible"] is False + assert result.data["confidence_score"] == 0.0 + + @pytest.mark.asyncio + async def test_error_handling_in_yaml_generation(self, agent, mock_feasible_analysis): + """Test error handling when YAML generation fails.""" + with patch("src.agents.feasibility_agent.nodes.ChatOpenAI") as mock_openai: + # Mock analysis to succeed, YAML generation to fail + mock_analysis_llm = AsyncMock() + mock_analysis_llm.ainvoke.return_value = mock_feasible_analysis + + mock_yaml_llm = AsyncMock() + mock_yaml_llm.ainvoke.side_effect = Exception("YAML generation failed") + + mock_openai.return_value.with_structured_output.side_effect = [mock_analysis_llm, mock_yaml_llm] + + # Execute the agent + result = await agent.execute("No deployments on weekends") + + # Assertions + assert result.success is True # Analysis succeeded + assert result.data["is_feasible"] is True + assert "YAML generation failed" in result.message # Error should be in feedback + + def test_agent_initialization(self, agent): + """Test that the agent initializes correctly.""" + assert agent is not None + assert agent.graph is not None + assert agent.llm is not None + + @pytest.mark.asyncio + async def test_various_rule_types(self, agent): + """Test different types of rules to ensure proper classification.""" + test_cases = [ + {"rule": "All PRs need 2 approvals", "expected_type": "approval_requirement", "should_be_feasible": True}, + {"rule": "PR titles must start with JIRA-", "expected_type": "title_pattern", "should_be_feasible": True}, + {"rule": "Files over 10MB not allowed", "expected_type": "file_size", "should_be_feasible": True}, + ] + + for case in test_cases: + with patch("src.agents.feasibility_agent.nodes.ChatOpenAI") as mock_openai: + # Mock analysis response + mock_analysis = FeasibilityAnalysis( + is_feasible=case["should_be_feasible"], + rule_type=case["expected_type"], + confidence_score=0.9, + feedback=f"Rule can be implemented as {case['expected_type']}", + analysis_steps=["Analysis step"], + ) + + mock_yaml = YamlGeneration(yaml_content="mock yaml content") + + mock_analysis_llm = AsyncMock() + mock_analysis_llm.ainvoke.return_value = mock_analysis + + mock_yaml_llm = AsyncMock() + mock_yaml_llm.ainvoke.return_value = mock_yaml + + mock_openai.return_value.with_structured_output.side_effect = [mock_analysis_llm, mock_yaml_llm] + + # Execute + result = await agent.execute(case["rule"]) + + # Verify + assert result.data["rule_type"] == case["expected_type"] + assert result.data["is_feasible"] == case["should_be_feasible"] + + +class TestFeasibilityModels: + """Test the Pydantic models for structured output.""" + + def test_feasibility_analysis_model(self): + """Test FeasibilityAnalysis model validation.""" + # Valid model + analysis = FeasibilityAnalysis( + is_feasible=True, + rule_type="time_restriction", + confidence_score=0.95, + feedback="Test feedback", + analysis_steps=["step1", "step2"], + ) + + assert analysis.is_feasible is True + assert analysis.rule_type == "time_restriction" + assert analysis.confidence_score == 0.95 + + def test_feasibility_analysis_validation(self): + """Test FeasibilityAnalysis model validation constraints.""" + # Test confidence score validation + with pytest.raises(ValueError): + FeasibilityAnalysis( + is_feasible=True, + rule_type="test", + confidence_score=1.5, # Invalid: > 1.0 + feedback="test", + ) + + with pytest.raises(ValueError): + FeasibilityAnalysis( + is_feasible=True, + rule_type="test", + confidence_score=-0.1, # Invalid: < 0.0 + feedback="test", + ) + + def test_yaml_generation_model(self): + """Test YamlGeneration model.""" + yaml_gen = YamlGeneration(yaml_content="test: yaml") + assert yaml_gen.yaml_content == "test: yaml"