import requests from bs4 import BeautifulSoup from langchain_core.tools import tool @tool def fetch_web_page_content(url: str) -> str: """Fetch and process the content of a web page.""" try: # Add user agent to avoid blocks headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' } response = requests.get(url, headers=headers, timeout=10) response.raise_for_status() soup = BeautifulSoup(response.text, 'html.parser') # Remove unwanted elements for element in soup(['script', 'style', 'header', 'footer', 'nav']): element.decompose() # Extract text content text = soup.get_text(separator='\n', strip=True) # Clean up text lines = (line.strip() for line in text.splitlines()) chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) text = '\n'.join(chunk for chunk in chunks if chunk) return text except Exception as e: return f"Error fetching web page: {str(e)}"