2025-02-11 23:14:01 +05:30

240 lines
8.5 KiB
Python

from typing import List, Any, Dict, Literal, Optional
from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
from langgraph.graph import END
from agents.base import BaseAgent, AgentState
from tools.web.duck_duck_go_web_search import duck_duck_go_web_search
import json
import re
import logging
from datetime import datetime
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
class ScoutAgent(BaseAgent):
def __init__(self):
system_prompt = """# Scout Agent
## Role & Objective
You are a professional web researcher focused on delivering accurate, comprehensive, and well-structured information for any query.
## Core Principles
1. Research Quality
- Thorough search across credible sources
- Fact verification and cross-referencing
- Focus on recent, reliable information
2. Response Structure
- Clear, logical organization
- Key points with bullet points
- Supporting evidence and examples
- Source citations when relevant
3. Content Balance
- Accuracy over speculation
- Clarity over complexity
- Concise yet comprehensive
- Neutral and objective tone
## Process Flow
1. Analyze query intent
2. Gather relevant information
3. Synthesize findings
4. Present structured response with:
- Main concept explanation
- Key facts and details
- Practical implications
- Related context
Your responses should be informative, clear, and well-organized, focusing on providing maximum value with optimal efficiency."""
super().__init__(
name="scout",
system_prompt=system_prompt,
tools=[duck_duck_go_web_search],
max_iterations=1
)
def _clean_text(self, text: str) -> str:
try:
# Remove HTML tags
text = re.sub(r'<[^>]+>', '', text)
# Normalize whitespace
text = re.sub(r'\s+', ' ', text).strip()
# Remove special characters and normalize
text = re.sub(r'[^\w\s.,!?-]', '', text)
return text
except Exception as e:
logger.warning(f"Text cleaning error: {e}")
return text
def _format_query(self, query: str) -> str:
query = query.lower().strip()
# Common query transformations
patterns = [
(r'^what\s+is\s+', ''),
(r'^who\s+is\s+', ''),
(r'^how\s+does\s+', ''),
(r'^why\s+', '')
]
for pattern, repl in patterns:
query = re.sub(pattern, repl, query).strip()
# Enhance query with descriptive terms
enhance_terms = [
"definition", "explanation", "overview",
"key concepts", "main features", "important aspects"
]
return f"{query} {' '.join(enhance_terms)}"
def _process_search_results(self, search_results: List[Dict[str, Any]], query: str) -> Dict[str, Any]:
try:
compiled_info = []
sources = []
for result in search_results:
# Only add non-empty, unique snippets
snippet = self._clean_text(result.get('snippet', ''))
if snippet and snippet not in compiled_info:
compiled_info.append(snippet)
# Collect unique sources
link = result.get('link', '')
if link and link not in sources:
sources.append(link)
# Limit sources and info
sources = sources[:3]
compiled_info = compiled_info[:5]
if not compiled_info:
return {
"status": "no_results",
"query": query,
"timestamp": datetime.now().isoformat(),
"error": "No information found",
"suggestions": [
f"Ask about specific aspects of {query}",
"Use more specific terms",
"Rephrase your question"
]
}
return {
"status": "success",
"query": query,
"timestamp": datetime.now().isoformat(),
"compiled_info": compiled_info,
"sources": sources
}
except Exception as e:
logger.error(f"Search result processing error: {e}")
return {
"status": "error",
"query": query,
"timestamp": datetime.now().isoformat(),
"error": str(e),
"suggestions": [
"Try a different search approach",
"Check your internet connection",
"Simplify your query"
]
}
def _process_step(self, state: AgentState) -> AgentState:
logger.info(f"{self.name} is researching...")
messages = state.get('messages', [])
try:
# Extract and format query
query = messages[-1].content if messages and hasattr(messages[-1], 'content') else "No query provided"
formatted_query = self._format_query(query)
# Perform web search
search_results = duck_duck_go_web_search.invoke({
"query": formatted_query,
"max_results": 3
})
# Process search results
processed_results = self._process_search_results(search_results, query)
# Handle no results scenario
if processed_results["status"] == "no_results":
return {
"messages": messages + [AIMessage(content=json.dumps(processed_results))],
"iterations": 1
}
# Generate comprehensive response
response_prompt = f"""Based on the following information, provide a comprehensive response about: {query}
Information:
{chr(10).join(processed_results['compiled_info'])}
Please structure your response as:
1. Direct, concise explanation and example (2-3 sentences)
2. Key characteristics or facts (3-4 bullet points)
3. Additional contextual information
4. Practical applications or implications (if relevant)
and dont show these points as heading instead directly show your response in place of these points.
Focus on clarity, accuracy, and providing meaningful insights."""
# Generate LLM response
llm_response = self.llm.invoke(response_prompt)
# Prepare final response
response_data = {
"status": "success",
"query": query,
"timestamp": datetime.now().isoformat(),
"message": llm_response.content,
"sources": processed_results.get("sources", []),
"key_points": [
point.strip() for point in llm_response.content.split('\n')
if point.strip() and not point.strip().startswith('1.') and not point.strip().startswith('2.')
]
}
return {
"messages": messages + [AIMessage(content=json.dumps(response_data))],
"iterations": 1
}
except Exception as e:
logger.error(f"Research process error: {e}")
error_response = {
"status": "error",
"query": query,
"timestamp": datetime.now().isoformat(),
"error": str(e),
"message": "Error occurred during research",
"suggestions": [
"Try being more specific",
"Rephrase your question",
"Check your internet connection"
]
}
return {
"messages": messages + [SystemMessage(content=json.dumps(error_response))],
"iterations": 1
}
def _should_continue(self, state: AgentState) -> Literal["continue", END]:
"""Always end after one iteration."""
return END
def scout(task: str) -> str:
"""Execute research task and return findings."""
agent = ScoutAgent()
return agent.process(task)