Spaces:
Sleeping
Sleeping
| from smolagents import Tool | |
| import requests | |
| import json | |
| import os | |
| from dotenv import load_dotenv | |
| # Load environment variables from .env if present | |
| load_dotenv() | |
| class BrightDataSearchTool(Tool): | |
| name = "brightdata_search_engine" | |
| description = """ | |
| Search Google, Bing, or Yandex and get structured results. | |
| Returns search results with URLs, titles, and descriptions. | |
| Ideal for gathering current information and news. | |
| """ | |
| inputs = { | |
| "query": { | |
| "type": "string", | |
| "description": "The search query", | |
| }, | |
| "engine": { | |
| "type": "string", | |
| "description": "Search engine to use: 'google', 'bing', or 'yandex'. Default is 'google'", | |
| "nullable": True, | |
| "default": "google", | |
| }, | |
| } | |
| output_type = "string" | |
| def forward(self, query: str, engine: str = "google") -> str: | |
| """ | |
| Search using Bright Data's search API. | |
| Args: | |
| query: The search query. | |
| engine: Search engine to use (google, bing, or yandex). | |
| Returns: | |
| JSON string with search results or markdown for non-Google engines. | |
| """ | |
| api_token = os.getenv("BRIGHT_DATA_API_TOKEN") | |
| unlocker_zone = os.getenv("BRIGHT_DATA_UNLOCKER_ZONE", "web_unlocker1") | |
| if not api_token: | |
| raise ValueError("BRIGHT_DATA_API_TOKEN not found in environment variables") | |
| search_urls = { | |
| "google": f"https://www.google.com/search?q={requests.utils.quote(query)}&brd_json=1", | |
| "bing": f"https://www.bing.com/search?q={requests.utils.quote(query)}", | |
| "yandex": f"https://yandex.com/search/?text={requests.utils.quote(query)}", | |
| } | |
| search_url = search_urls.get(engine.lower(), search_urls["google"]) | |
| is_google = engine.lower() == "google" | |
| api_url = "https://api.brightdata.com/request" | |
| headers = { | |
| "Authorization": f"Bearer {api_token}", | |
| "Content-Type": "application/json", | |
| } | |
| payload = { | |
| "url": search_url, | |
| "zone": unlocker_zone, | |
| "format": "raw", | |
| } | |
| if not is_google: | |
| payload["data_format"] = "markdown" | |
| try: | |
| response = requests.post(api_url, json=payload, headers=headers) | |
| response.raise_for_status() | |
| if is_google: | |
| data = response.json() | |
| results = { | |
| "organic": data.get("organic", []), | |
| "images": [img.get("link") for img in data.get("images", [])], | |
| "related": data.get("related", []), | |
| "ai_overview": data.get("ai_overview"), | |
| } | |
| return json.dumps(results, indent=2) | |
| # Return markdown for Bing/Yandex | |
| return response.text | |
| except requests.exceptions.RequestException as e: | |
| return json.dumps({"error": str(e)}) | |