from smolagents import Tool import requests import os from dotenv import load_dotenv # Load environment variables from .env if present load_dotenv() class BrightDataScraperTool(Tool): name = "brightdata_web_scraper" description = """ Scrape any webpage and return content in Markdown format. This tool can bypass bot detection and CAPTCHAs. Use this when you need to extract content from websites. """ inputs = { "url": { "type": "string", "description": "The URL of the webpage to scrape", } } output_type = "string" def forward(self, url: str) -> str: """ Scrape a webpage using Bright Data's API. Args: url: The URL to scrape Returns: The scraped content in Markdown format """ api_token = os.getenv("BRIGHT_DATA_API_TOKEN") unlocker_zone = os.getenv("BRIGHT_DATA_UNLOCKER_ZONE", "web_unlocker_1") if not api_token: raise ValueError("BRIGHT_DATA_API_TOKEN not found in environment variables") api_url = "https://api.brightdata.com/request" headers = { "Authorization": f"Bearer {api_token}", "Content-Type": "application/json", } payload = { "url": url, "zone": unlocker_zone, "format": "raw", "data_format": "markdown", } try: response = requests.post(api_url, json=payload, headers=headers) response.raise_for_status() return response.text except requests.exceptions.RequestException as e: return f"Error scraping URL: {str(e)}"