Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
name: Tests

on:
push:
branches: [ main ]
pull_request:
branches: [ main ]

jobs:
test:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.12"]

steps:
- uses: actions/checkout@v4

- name: Install uv
uses: astral-sh/setup-uv@v3
with:
version: "latest"

- name: Set up Python ${{ matrix.python-version }}
run: uv python install ${{ matrix.python-version }}

- name: Install dependencies
run: uv sync --all-extras

- name: Run all tests
run: |
echo "Running unit tests..."
uv run pytest tests/unit/ -v --tb=short
echo "Running integration tests (mocked - no real API calls)..."
uv run pytest tests/integration/ -v --tb=short

- name: Upload coverage reports
uses: codecov/codecov-action@v4
if: matrix.python-version == '3.12'
with:
file: ./coverage.xml
fail_ci_if_error: false
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ PLANNING.md
.pdm-build/
.ruff_cache/
.vscode/
.kiro

# Copilot
.github/instructions/
Expand Down
42 changes: 42 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,48 @@ rules:
2. **Try acknowledgment workflow**: Comment `@watchflow acknowledge` when rules are violated
3. **Verify rule enforcement**: Check that blocking rules prevent merging

## πŸ§ͺ Testing

The project includes comprehensive tests that run **without making real API calls** by default:

### Running Tests

```bash
# Run all tests (mocked - no API costs)
pytest

# Run only unit tests (very fast)
pytest tests/unit/

# Run only integration tests (mocked)
pytest tests/integration/
```

### Test Structure

```
tests/
β”œβ”€β”€ unit/ # ⚑ Fast unit tests (mocked OpenAI)
β”‚ └── test_feasibility_agent.py
└── integration/ # 🌐 Full HTTP stack tests (mocked OpenAI)
└── test_rules_api.py
```

### Real API Testing (Local Development Only)

If you want to test with **real OpenAI API calls** locally:

```bash
# Set environment variables
export OPENAI_API_KEY="your-api-key"
export INTEGRATION_TEST_REAL_API=true

# Run integration tests with real API calls (costs money!)
pytest tests/integration/ -m integration
```

**⚠️ Warning:** Real API tests make actual OpenAI calls and will cost money. They're disabled by default in CI/CD.

## Configuration

For advanced configuration options, see the [Configuration Guide](docs/getting-started/configuration.md).
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -200,13 +200,14 @@ python_functions = ["test_*"]
addopts = [
"--strict-markers",
"--strict-config",
"--cov=backend",
"--cov=src",
"--cov-report=term-missing",
"--cov-report=html",
"--cov-report=xml",
]
asyncio_mode = "auto"


[tool.coverage.run]
source = ["backend"]
omit = [
Expand Down
3 changes: 1 addition & 2 deletions src/agents/feasibility_agent/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,5 @@
"""

from .agent import RuleFeasibilityAgent
from .models import FeasibilityResult

__all__ = ["RuleFeasibilityAgent", "FeasibilityResult"]
__all__ = ["RuleFeasibilityAgent"]
52 changes: 27 additions & 25 deletions src/agents/feasibility_agent/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from src.agents.base import AgentResult, BaseAgent

from .models import FeasibilityResult, FeasibilityState
from .models import FeasibilityState
from .nodes import analyze_rule_feasibility, generate_yaml_config

logger = logging.getLogger(__name__)
Expand All @@ -27,51 +27,53 @@ def _build_graph(self) -> StateGraph:
workflow.add_node("analyze_feasibility", analyze_rule_feasibility)
workflow.add_node("generate_yaml", generate_yaml_config)

# Add edges
# Add edges with conditional logic
workflow.add_edge(START, "analyze_feasibility")
workflow.add_edge("analyze_feasibility", "generate_yaml")

# Conditional edge: only generate YAML if feasible
workflow.add_conditional_edges(
"analyze_feasibility",
lambda state: "generate_yaml" if state.is_feasible else END,
{"generate_yaml": "generate_yaml", END: END},
)

workflow.add_edge("generate_yaml", END)

logger.info("πŸ”§ FeasibilityAgent graph built with conditional structured output workflow")
return workflow.compile()

async def execute(self, rule_description: str) -> AgentResult:
"""
Check if a rule description is feasible and return YAML or feedback.
"""
try:
logger.info(f"πŸš€ Starting feasibility analysis for rule: {rule_description[:100]}...")

# Prepare initial state
initial_state = FeasibilityState(rule_description=rule_description)

# Run the graph
result = await self.graph.ainvoke(initial_state)

# Convert dict result back to FeasibilityState if needed
if isinstance(result, dict):
result = FeasibilityState(**result)

logger.info(f"βœ… Feasibility analysis completed: feasible={result.is_feasible}, type={result.rule_type}")

# Convert to AgentResult
return AgentResult(
success=result.get("is_feasible", False),
message=result.get("feedback", ""),
success=result.is_feasible,
message=result.feedback,
data={
"is_feasible": result.get("is_feasible", False),
"yaml_content": result.get("yaml_content", ""),
"confidence_score": result.get("confidence_score", 0.0),
"rule_type": result.get("rule_type", ""),
"analysis_steps": result.get("analysis_steps", []),
"is_feasible": result.is_feasible,
"yaml_content": result.yaml_content,
"confidence_score": result.confidence_score,
"rule_type": result.rule_type,
"analysis_steps": result.analysis_steps,
},
)

except Exception as e:
logger.error(f"Error in rule feasibility check: {e}")
logger.error(f"❌ Error in rule feasibility check: {e}")
return AgentResult(success=False, message=f"Feasibility check failed: {str(e)}", data={})

async def check_feasibility(self, rule_description: str) -> FeasibilityResult:
"""
Legacy method for backwards compatibility.
"""
result = await self.execute(rule_description)

return FeasibilityResult(
is_feasible=result.data.get("is_feasible", False),
yaml_content=result.data.get("yaml_content", ""),
feedback=result.message,
confidence_score=result.data.get("confidence_score"),
rule_type=result.data.get("rule_type"),
)
22 changes: 14 additions & 8 deletions src/agents/feasibility_agent/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,20 @@
from pydantic import BaseModel, Field


class FeasibilityResult(BaseModel):
"""Result of checking if a rule is feasible."""

is_feasible: bool
yaml_content: str
feedback: str
confidence_score: float | None = None
rule_type: str | None = None
class FeasibilityAnalysis(BaseModel):
"""Structured output model for rule feasibility analysis."""

is_feasible: bool = Field(description="Whether the rule is feasible to implement with Watchflow")
rule_type: str = Field(description="Type of rule (time_restriction, branch_pattern, title_pattern, etc.)")
confidence_score: float = Field(description="Confidence score from 0.0 to 1.0", ge=0.0, le=1.0)
feedback: str = Field(description="Detailed feedback on implementation considerations")
analysis_steps: list[str] = Field(description="Step-by-step analysis breakdown", default_factory=list)


class YamlGeneration(BaseModel):
"""Structured output model for YAML configuration generation."""

yaml_content: str = Field(description="Generated Watchflow YAML rule configuration")


class FeasibilityState(BaseModel):
Expand Down
100 changes: 38 additions & 62 deletions src/agents/feasibility_agent/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,118 +2,94 @@
LangGraph nodes for the Rule Feasibility Agent.
"""

import json
import logging

from langchain_openai import ChatOpenAI

from src.core.config import config

from .models import FeasibilityState
from .models import FeasibilityAnalysis, FeasibilityState, YamlGeneration
from .prompts import RULE_FEASIBILITY_PROMPT, YAML_GENERATION_PROMPT

logger = logging.getLogger(__name__)


def analyze_rule_feasibility(state: FeasibilityState) -> FeasibilityState:
async def analyze_rule_feasibility(state: FeasibilityState) -> FeasibilityState:
"""
Analyze whether a rule description is feasible to implement.
Analyze whether a rule description is feasible to implement using structured output.
"""
try:
# Create LLM client directly using centralized config
# Create LLM client with structured output
llm = ChatOpenAI(
api_key=config.ai.api_key,
model=config.ai.model,
max_tokens=config.ai.max_tokens,
temperature=config.ai.temperature,
)

# Use structured output instead of manual JSON parsing
structured_llm = llm.with_structured_output(FeasibilityAnalysis)

# Analyze rule feasibility
prompt = RULE_FEASIBILITY_PROMPT.format(rule_description=state.rule_description)

response = llm.invoke(prompt)

# Log the raw response for debugging
logger.info(f"Raw LLM response: {response.content}")

# Check if response is empty
if not response.content or response.content.strip() == "":
logger.error("LLM returned empty response")
state.is_feasible = False
state.feedback = "Analysis failed: LLM returned empty response"
return state

# Try to parse JSON with better error handling
try:
result = json.loads(response.content.strip())
except json.JSONDecodeError as json_error:
logger.error(f"Failed to parse JSON response: {json_error}")
logger.error(f"Response content: {response.content}")

# Try to extract JSON from markdown code blocks if present
content = response.content.strip()
if content.startswith("```json"):
content = content[7:] # Remove ```json
elif content.startswith("```"):
content = content[3:] # Remove ```
if content.endswith("```"):
content = content[:-3] # Remove trailing ```

try:
result = json.loads(content.strip())
logger.info("Successfully extracted JSON from markdown code blocks")
except json.JSONDecodeError:
# If all parsing attempts fail, set default values
logger.error("All JSON parsing attempts failed")
state.is_feasible = False
state.feedback = (
f"Analysis failed: Could not parse LLM response as JSON. Raw response: {response.content[:200]}..."
)
return state

# Update state with analysis results
state.is_feasible = result.get("is_feasible", False)
state.rule_type = result.get("rule_type", "")
state.confidence_score = result.get("confidence_score", 0.0)
state.yaml_content = result.get("yaml_content", "")
state.feedback = result.get("feedback", "")
state.analysis_steps = result.get("analysis_steps", [])

logger.info(f"Rule feasibility analysis completed: {state.is_feasible}")
# Get structured response - no more JSON parsing needed!
result = await structured_llm.ainvoke(prompt)

# Update state with analysis results - now type-safe!
state.is_feasible = result.is_feasible
state.rule_type = result.rule_type
state.confidence_score = result.confidence_score
state.feedback = result.feedback
state.analysis_steps = result.analysis_steps

logger.info(f"πŸ” Rule feasibility analysis completed: {state.is_feasible}")
logger.info(f"πŸ” Rule type identified: {state.rule_type}")
logger.info(f"πŸ” Confidence score: {state.confidence_score}")

except Exception as e:
logger.error(f"Error in rule feasibility analysis: {e}")
logger.error(f"❌ Error in rule feasibility analysis: {e}")
state.is_feasible = False
state.feedback = f"Analysis failed: {str(e)}"
state.confidence_score = 0.0

return state


def generate_yaml_config(state: FeasibilityState) -> FeasibilityState:
async def generate_yaml_config(state: FeasibilityState) -> FeasibilityState:
"""
Generate YAML configuration for feasible rules.
Generate YAML configuration for feasible rules using structured output.
This node only runs if the rule is feasible.
"""
if not state.is_feasible or not state.rule_type:
logger.info("πŸ”§ Skipping YAML generation - rule not feasible or no rule type")
return state

try:
# Create LLM client directly using centralized config
# Create LLM client with structured output
llm = ChatOpenAI(
api_key=config.ai.api_key,
model=config.ai.model,
max_tokens=config.ai.max_tokens,
temperature=config.ai.temperature,
)

# Use structured output for YAML generation
structured_llm = llm.with_structured_output(YamlGeneration)

prompt = YAML_GENERATION_PROMPT.format(rule_type=state.rule_type, rule_description=state.rule_description)

response = llm.invoke(prompt)
state.yaml_content = response.content.strip()
# Get structured response
result = await structured_llm.ainvoke(prompt)

# Update state with generated YAML
state.yaml_content = result.yaml_content.strip()

logger.info(f"YAML configuration generated for rule type: {state.rule_type}")
logger.info(f"πŸ”§ YAML configuration generated for rule type: {state.rule_type}")
logger.info(f"πŸ”§ Generated YAML length: {len(state.yaml_content)} characters")

except Exception as e:
logger.error(f"Error generating YAML configuration: {e}")
logger.error(f"❌ Error generating YAML configuration: {e}")
state.feedback += f"\nYAML generation failed: {str(e)}"

return state
Loading