from typing import List, Any, Dict, Literal, Optional from langchain_core.messages import HumanMessage, SystemMessage, AIMessage from langgraph.graph import END from agents.base import BaseAgent, AgentState from tools.web.duck_duck_go_web_search import duck_duck_go_web_search import json import re import logging from datetime import datetime # Configure logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) class ScoutAgent(BaseAgent): def __init__(self): system_prompt = """# Scout Agent ## Role & Objective You are a professional web researcher focused on delivering accurate, comprehensive, and well-structured information for any query. ## Core Principles 1. Research Quality - Thorough search across credible sources - Fact verification and cross-referencing - Focus on recent, reliable information 2. Response Structure - Clear, logical organization - Key points with bullet points - Supporting evidence and examples - Source citations when relevant 3. Content Balance - Accuracy over speculation - Clarity over complexity - Concise yet comprehensive - Neutral and objective tone ## Process Flow 1. Analyze query intent 2. Gather relevant information 3. Synthesize findings 4. Present structured response with: - Main concept explanation - Key facts and details - Practical implications - Related context Your responses should be informative, clear, and well-organized, focusing on providing maximum value with optimal efficiency.""" super().__init__( name="scout", system_prompt=system_prompt, tools=[duck_duck_go_web_search], max_iterations=1 ) def _clean_text(self, text: str) -> str: try: # Remove HTML tags text = re.sub(r'<[^>]+>', '', text) # Normalize whitespace text = re.sub(r'\s+', ' ', text).strip() # Remove special characters and normalize text = re.sub(r'[^\w\s.,!?-]', '', text) return text except Exception as e: logger.warning(f"Text cleaning error: {e}") return text def _format_query(self, query: str) -> str: query = query.lower().strip() # Common query transformations patterns = [ (r'^what\s+is\s+', ''), (r'^who\s+is\s+', ''), (r'^how\s+does\s+', ''), (r'^why\s+', '') ] for pattern, repl in patterns: query = re.sub(pattern, repl, query).strip() # Enhance query with descriptive terms enhance_terms = [ "definition", "explanation", "overview", "key concepts", "main features", "important aspects" ] return f"{query} {' '.join(enhance_terms)}" def _process_search_results(self, search_results: List[Dict[str, Any]], query: str) -> Dict[str, Any]: try: compiled_info = [] sources = [] for result in search_results: # Only add non-empty, unique snippets snippet = self._clean_text(result.get('snippet', '')) if snippet and snippet not in compiled_info: compiled_info.append(snippet) # Collect unique sources link = result.get('link', '') if link and link not in sources: sources.append(link) # Limit sources and info sources = sources[:3] compiled_info = compiled_info[:5] if not compiled_info: return { "status": "no_results", "query": query, "timestamp": datetime.now().isoformat(), "error": "No information found", "suggestions": [ f"Ask about specific aspects of {query}", "Use more specific terms", "Rephrase your question" ] } return { "status": "success", "query": query, "timestamp": datetime.now().isoformat(), "compiled_info": compiled_info, "sources": sources } except Exception as e: logger.error(f"Search result processing error: {e}") return { "status": "error", "query": query, "timestamp": datetime.now().isoformat(), "error": str(e), "suggestions": [ "Try a different search approach", "Check your internet connection", "Simplify your query" ] } def _process_step(self, state: AgentState) -> AgentState: logger.info(f"{self.name} is researching...") messages = state.get('messages', []) try: # Extract and format query query = messages[-1].content if messages and hasattr(messages[-1], 'content') else "No query provided" formatted_query = self._format_query(query) # Perform web search search_results = duck_duck_go_web_search.invoke({ "query": formatted_query, "max_results": 3 }) # Process search results processed_results = self._process_search_results(search_results, query) # Handle no results scenario if processed_results["status"] == "no_results": return { "messages": messages + [AIMessage(content=json.dumps(processed_results))], "iterations": 1 } # Generate comprehensive response response_prompt = f"""Based on the following information, provide a comprehensive response about: {query} Information: {chr(10).join(processed_results['compiled_info'])} Please structure your response as: 1. Direct, concise explanation and example (2-3 sentences) 2. Key characteristics or facts (3-4 bullet points) 3. Additional contextual information 4. Practical applications or implications (if relevant) and dont show these points as heading instead directly show your response in place of these points. Focus on clarity, accuracy, and providing meaningful insights.""" # Generate LLM response llm_response = self.llm.invoke(response_prompt) # Prepare final response response_data = { "status": "success", "query": query, "timestamp": datetime.now().isoformat(), "message": llm_response.content, "sources": processed_results.get("sources", []), "key_points": [ point.strip() for point in llm_response.content.split('\n') if point.strip() and not point.strip().startswith('1.') and not point.strip().startswith('2.') ] } return { "messages": messages + [AIMessage(content=json.dumps(response_data))], "iterations": 1 } except Exception as e: logger.error(f"Research process error: {e}") error_response = { "status": "error", "query": query, "timestamp": datetime.now().isoformat(), "error": str(e), "message": "Error occurred during research", "suggestions": [ "Try being more specific", "Rephrase your question", "Check your internet connection" ] } return { "messages": messages + [SystemMessage(content=json.dumps(error_response))], "iterations": 1 } def _should_continue(self, state: AgentState) -> Literal["continue", END]: """Always end after one iteration.""" return END def scout(task: str) -> str: """Execute research task and return findings.""" agent = ScoutAgent() return agent.process(task)