Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 16 additions & 41 deletions backend/mainService/app.py
Original file line number Diff line number Diff line change
@@ -1,59 +1,34 @@
import os
from fastapi import FastAPI
from contextlib import asynccontextmanager
from src.config.playwright_driver import PlaywrightDriver as ASD
from src.config.async_http_session import AsyncHTTPClient
from fastapi.middleware.cors import CORSMiddleware
from src.config.startup import startup_event
from src.controllers.citation_controller import router as citation_router
from src.controllers.health_controller import router as health_router
from src.llm.Pinecone import PineconeOperations
from src.llm.chat_llm.Groq_llm import Summarize_llm
from src.llm.chat_llm.Azure_llm import Citation
from src.utils.index_operation import start
from dotenv import load_dotenv
from src.scraper.async_content_scraper import AsyncContentScraper
from fastapi.middleware.cors import CORSMiddleware
import nltk
from src.utils.concurrent_resources import cleanup_resources


# Detect if running in Azure Functions (serverless)
IS_SERVERLESS = os.getenv("SERVERLESS").lower() == "true"

origins = [
"http://localhost:5173", # Frontend running on localhost (React, Vue, etc.)
"https://cite-me.vercel.app"
]

# Conditionally assign lifespan
lifespan = startup_event if not IS_SERVERLESS else None

@asynccontextmanager
async def startup_event(app: FastAPI):
load_dotenv()
nltk.download('punkt')
nltk.download('punkt_tab')

app.state.playwright_driver = await ASD.create()
app.state.pc = await PineconeOperations.create()
app.state.summarize_llm = Summarize_llm()
app.state.citation_llm = Citation()
# Initialize the async content scraper using its async context manager
async with AsyncContentScraper(playwright_driver=app.state.playwright_driver) as content_scraper:
app.state.async_content_scraper = content_scraper
start()
yield
# Exiting the async with block automatically calls __aexit__
await app.state.playwright_driver.quit()
await app.state.pc.cleanup()
await AsyncHTTPClient.close_session()
cleanup_resources() # Clean up thread pool and other concurrent resources


app = FastAPI(lifespan=startup_event)
# Create FastAPI instance
app = FastAPI(title="Citation API", version="1.0.0", lifespan=lifespan)

# Middleware configuration
app.add_middleware(
CORSMiddleware,
allow_origins=["*"], # Allow specific origins. modify this to allow only the your desired origins
allow_credentials=True, # Allow cookies & authentication headers
allow_methods=["POST", "GET", "OPTIONS", "HEAD"], # Allow all HTTP methods (GET, POST, PUT, DELETE, etc.)
allow_headers=["*"], # Allow all headers
allow_origins=["*"],
allow_credentials=True,
allow_methods=["POST", "GET", "OPTIONS", "HEAD"],
allow_headers=["*"],
)

# Include routers with prefixes
# Include routers
app.include_router(health_router, tags=["Health"])
app.include_router(citation_router, prefix="/citation", tags=["Citation"])

8 changes: 4 additions & 4 deletions backend/mainService/requirements-test.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
pytest
pytest-asyncio
pytest-cov
pytest-mock
pytest==8.3.5
pytest-asyncio==0.26.0
pytest-cov==4.1.0
pytest-mock==3.12.0
44 changes: 37 additions & 7 deletions backend/mainService/src/config/log_config.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,24 @@
"""
Logging Configuration Module

This module handles the configuration of the application's logging system.
It sets up both file(optional) and stream handlers with a standardized format for
consistent logging throughout the application.

Key Functions:
- get_logger: Returns a configured logger instance

Configuration:
- Log level: INFO
- Log format: Timestamp - Logger Name - Level - Message
- Handlers: File handler

Features:
- Centralized logging configuration
- Easy logger instance creation
- Both file and stream output
- Standardized log format
"""
import os
import logging
from datetime import datetime
Expand All @@ -8,28 +29,37 @@
def setup_logging(
log_level=logging.INFO,
log_dir: str = 'logs',
filename: Optional[str] = 'log') -> Logger:
filename: Optional[str] = 'log',
logToFile: Optional[bool] = False,
) -> Logger:

"""
Set up a standardized logging configuration for the entire project.

Args:
log_level (int): Logging level (default: logging.INFO)
log_dir (str): Directory to store log files (default: 'logs')
filename (str): Base filename for log files (default: 'log')
logToFile (bool): Whether to log to file (default: False)
"""
# Ensure logs directory exists
os.makedirs(log_dir, exist_ok=True)

# Create a unique log filename with timestamp
timestamp = datetime.now().strftime("%Y%m%U")
log_filename = os.path.join(log_dir, f'{filename}_{timestamp}.log')

# Configure logging
logging.basicConfig(
level=log_level,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler(log_filename), # Log to file
logging.StreamHandler() # Also log to console
]
)
return logging.getLogger(filename)
logger = logging.getLogger(filename)

if logToFile:
# Ensure logs directory exists
os.makedirs(log_dir, exist_ok=True)
log_filename = os.path.join(log_dir, f'{filename}_{timestamp}.log')
logger.addHandler(logging.FileHandler(log_filename))


return logger
31 changes: 31 additions & 0 deletions backend/mainService/src/config/startup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from src.llm.Pinecone import PineconeOperations
from src.llm.chat_llm.Groq_llm import Summarize_llm
from src.llm.chat_llm.Azure_llm import Citation
from dotenv import load_dotenv
from src.scraper.async_content_scraper import AsyncContentScraper
import nltk
from src.utils.concurrent_resources import cleanup_resources
from contextlib import asynccontextmanager
from src.config.playwright_driver import PlaywrightDriver as ASD
from src.config.async_http_session import AsyncHTTPClient
from fastapi import FastAPI

@asynccontextmanager
async def startup_event(app: FastAPI):
load_dotenv()
nltk.download('punkt')
nltk.download('punkt_tab')

app.state.playwright_driver = await ASD.create()
app.state.pc = await PineconeOperations.create()
app.state.summarize_llm = Summarize_llm()
app.state.citation_llm = Citation()
# Initialize the async content scraper using its async context manager
async with AsyncContentScraper(playwright_driver=app.state.playwright_driver) as content_scraper:
app.state.async_content_scraper = content_scraper
yield
# Exiting the async with block automatically calls __aexit__
await app.state.playwright_driver.quit()
await app.state.pc.cleanup()
await AsyncHTTPClient.close_session()
cleanup_resources() # Clean up thread pool and other concurrent resources
1 change: 0 additions & 1 deletion backend/mainService/src/scraper/async_content_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,6 @@ async def __aenter__(self):
async def __aexit__(self, exc_type, exc_val, exc_tb):
try:
if self._context:
await self.scraper_driver.quit()
await self._context.close()
except Exception as e:
# Log the exception even if it occurred during cleanup
Expand Down
11 changes: 2 additions & 9 deletions backend/mainService/src/services/citation_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,9 @@
from src.config.log_config import setup_logging
from src.llm.chat_llm.Azure_llm import Citation
from src.config.config import LlmConfig as LLMEC
from src.config.config import concurrency_config, search_config
from src.config.config import search_config,scraper_config
from src.custom_exceptions.llm_exceptions import CitationGenerationError
from src.llm.embedding_utils.reranker import rerank, format_for_rerank
from src.utils.index_operation import add_index_to_memory
from concurrent.futures import ThreadPoolExecutor
from langchain_core.documents import Document
from src.services.source_credibility_metric_service import get_credibility_metrics, calculate_overall_score
from src.models.schema import Source
Expand All @@ -23,9 +21,6 @@
log_filename = os.path.basename(__file__)
logger = setup_logging(filename=log_filename)

_index_executor = ThreadPoolExecutor(
max_workers=concurrency_config.HANDLE_INDEX_DELETE_WORKERS)


class CitationService:
"""
Expand Down Expand Up @@ -229,7 +224,7 @@ async def _process_documents(

try:
cleaned_result = search_results["cleaned_result"]
async with asyncio.timeout(15): # 15 second timeout
async with asyncio.timeout((scraper_config.TIMEOUT_DURATION*2)/1000): # 20 second timeout
download_results = await self.scraper.get_pdfs(
target_urls=cleaned_result.get("links"),
storage_path=search_results["search_key"]
Expand Down Expand Up @@ -301,8 +296,6 @@ async def _create_and_populate_index(
if not index:
logger.exception("Index creation failed")
return False
# Add index to memory
_index_executor.submit(add_index_to_memory, index_name)

# Populate index
return await self._populate_index(processed_docs["batches"])
Expand Down
60 changes: 0 additions & 60 deletions backend/mainService/src/utils/index_operation.py

This file was deleted.

File renamed without changes.
4 changes: 2 additions & 2 deletions backend/metricsService/requirements-test.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
pytest==7.4.3
pytest-asyncio==0.21.1
pytest==8.3.5
pytest-asyncio==0.26.0
pytest-cov==4.1.0
httpx==0.25.2
pytest-mock==3.12.0
67 changes: 56 additions & 11 deletions backend/metricsService/src/utils/logging_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Logging Configuration Module

This module handles the configuration of the application's logging system.
It sets up both file and stream handlers with a standardized format for
It sets up both file(optional) and stream handlers with a standardized format for
consistent logging throughout the application.

Key Functions:
Expand All @@ -11,7 +11,7 @@
Configuration:
- Log level: INFO
- Log format: Timestamp - Logger Name - Level - Message
- Handlers: File handler (app.log)
- Handlers: File handler

Features:
- Centralized logging configuration
Expand All @@ -20,16 +20,61 @@
- Standardized log format
"""

import os
import logging
from datetime import datetime
from typing import Optional
from logging import Logger

file_handler = logging.FileHandler('app.log')
stream_handler = logging.StreamHandler()
logger = None # Global logger instance

logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[file_handler, stream_handler]
)
def setup_logging(
log_level=logging.INFO,
log_dir: str = 'logs',
filename: Optional[str] = 'log',
logToFile: Optional[bool] = False,
) -> Logger:

"""
Set up a standardized logging configuration for the entire project.

Args:
log_level (int): Logging level (default: logging.INFO)
log_dir (str): Directory to store log files (default: 'logs')
filename (str): Base filename for log files (default: 'log')
logToFile (bool): Whether to log to file (default: False)
"""

global logger
# Create a unique log filename with timestamp
timestamp = datetime.now().strftime("%Y%m%U")

# Configure logging
logging.basicConfig(
level=log_level,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.StreamHandler() # Also log to console
]
)
logger = logging.getLogger(filename)

if logToFile:
# Ensure logs directory exists
os.makedirs(log_dir, exist_ok=True)
log_filename = os.path.join(log_dir, f'{filename}_{timestamp}.log')
logger.addHandler(logging.FileHandler(log_filename))


def get_logger(filename:str) -> Logger:
"""
Get the configured logger instance.

Returns:
Logger: Configured logger instance
"""
global logger
if logger is None:
setup_logging(filename=filename)
return logger

def get_logger(name):
return logging.getLogger(name)
Loading
Loading