from smolagents import Tool import requests import json import os from dotenv import load_dotenv # Load environment variables from .env if present load_dotenv() class BrightDataSearchTool(Tool): name = "brightdata_search_engine" description = """ Search Google, Bing, or Yandex and get structured results. Returns search results with URLs, titles, and descriptions. Ideal for gathering current information and news. """ inputs = { "query": { "type": "string", "description": "The search query", }, "engine": { "type": "string", "description": "Search engine to use: 'google', 'bing', or 'yandex'. Default is 'google'", "nullable": True, "default": "google", }, } output_type = "string" def forward(self, query: str, engine: str = "google") -> str: """ Search using Bright Data's search API. Args: query: The search query. engine: Search engine to use (google, bing, or yandex). Returns: JSON string with search results or markdown for non-Google engines. """ api_token = os.getenv("BRIGHT_DATA_API_TOKEN") unlocker_zone = os.getenv("BRIGHT_DATA_UNLOCKER_ZONE", "web_unlocker1") if not api_token: raise ValueError("BRIGHT_DATA_API_TOKEN not found in environment variables") search_urls = { "google": f"https://www.google.com/search?q={requests.utils.quote(query)}&brd_json=1", "bing": f"https://www.bing.com/search?q={requests.utils.quote(query)}", "yandex": f"https://yandex.com/search/?text={requests.utils.quote(query)}", } search_url = search_urls.get(engine.lower(), search_urls["google"]) is_google = engine.lower() == "google" api_url = "https://api.brightdata.com/request" headers = { "Authorization": f"Bearer {api_token}", "Content-Type": "application/json", } payload = { "url": search_url, "zone": unlocker_zone, "format": "raw", } if not is_google: payload["data_format"] = "markdown" try: response = requests.post(api_url, json=payload, headers=headers) response.raise_for_status() if is_google: data = response.json() results = { "organic": data.get("organic", []), "images": [img.get("link") for img in data.get("images", [])], "related": data.get("related", []), "ai_overview": data.get("ai_overview"), } return json.dumps(results, indent=2) # Return markdown for Bing/Yandex return response.text except requests.exceptions.RequestException as e: return json.dumps({"error": str(e)})