chiara5122 commited on
Commit
12d1dd4
·
1 Parent(s): ffc2937

added custom tools

Browse files
tools.py CHANGED
@@ -1,4 +1,9 @@
1
  from typing import List
 
 
 
 
 
2
 
3
  from smolagents import (
4
  DuckDuckGoSearchTool,
@@ -20,5 +25,10 @@ def get_tools() -> List[Tool]:
20
  PythonInterpreterTool(),
21
  WikipediaSearchTool(),
22
  VisitWebpageTool(),
 
 
 
 
 
23
  ]
24
  return tools
 
1
  from typing import List
2
+ from tools.describe_image_tool import DescribeImageTool
3
+ from tools.openai_speech_to_text_tool import OpenAISpeechToTextTool
4
+ from tools.read_file_tool import ReadFileTool
5
+ from tools.youtube_transcription_tool import YouTubeTranscriptionTool
6
+ from tools.table_extractor_tool import TableExtractorTool
7
 
8
  from smolagents import (
9
  DuckDuckGoSearchTool,
 
25
  PythonInterpreterTool(),
26
  WikipediaSearchTool(),
27
  VisitWebpageTool(),
28
+ DescribeImageTool(),
29
+ OpenAISpeechToTextTool(),
30
+ ReadFileTool(),
31
+ YouTubeTranscriptionTool(),
32
+ TableExtractorTool(),
33
  ]
34
  return tools
tools/describe_image_tool.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import os
3
+ from openai import OpenAI
4
+ from smolagents import Tool
5
+
6
+ client = OpenAI()
7
+
8
+
9
+ class DescribeImageTool(Tool):
10
+ """
11
+ Tool to analyze and describe any image using GPT-4 Vision API.
12
+
13
+ Args:
14
+ image_path (str): Path to the image file.
15
+ description_type (str): Type of description to generate. Options:
16
+ - "general": General description of the image
17
+ - "detailed": Detailed analysis of the image
18
+ - "chess": Analysis of a chess position
19
+ - "text": Extract and describe text from the image
20
+ - "custom": Custom description based on user prompt
21
+
22
+ Returns:
23
+ str: Description of the image based on the requested type.
24
+ """
25
+
26
+ name = "describe_image"
27
+ description = "Analyzes and describes images using GPT-4 Vision API"
28
+ inputs = {
29
+ "image_path": {"type": "string", "description": "Path to the image file"},
30
+ "description_type": {
31
+ "type": "string",
32
+ "description": "Type of description to generate (general, detailed, chess, text, custom)",
33
+ "nullable": True,
34
+ },
35
+ "custom_prompt": {
36
+ "type": "string",
37
+ "description": "Custom prompt for description (only used when description_type is 'custom')",
38
+ "nullable": True,
39
+ },
40
+ }
41
+ output_type = "string"
42
+
43
+ def encode_image(self, image_path: str) -> str:
44
+ """Encode image to base64 string."""
45
+ with open(image_path, "rb") as image_file:
46
+ return base64.b64encode(image_file.read()).decode("utf-8")
47
+
48
+ def get_prompt(self, description_type: str, custom_prompt: str = None) -> str:
49
+ """Get appropriate prompt based on description type."""
50
+ prompts = {
51
+ "general": "Provide a general description of this image. Focus on the main subjects, colors, and overall scene.",
52
+ "detailed": """Analyze this image in detail. Include:
53
+ 1. Main subjects and their relationships
54
+ 2. Colors, lighting, and composition
55
+ 3. Any text or symbols present
56
+ 4. Context or possible meaning
57
+ 5. Notable details or interesting elements""",
58
+ "chess": """Analyze this chess position and provide a detailed description including:
59
+ 1. List of pieces on the board for both white and black
60
+ 2. Whose turn it is to move
61
+ 3. Basic evaluation of the position
62
+ 4. Any immediate tactical opportunities or threats
63
+ 5. Suggested next moves with brief explanations""",
64
+ "text": "Extract and describe any text present in this image. If there are multiple pieces of text, organize them clearly.",
65
+ }
66
+ return (
67
+ custom_prompt
68
+ if description_type == "custom"
69
+ else prompts.get(description_type, prompts["general"])
70
+ )
71
+
72
+ def forward(
73
+ self,
74
+ image_path: str,
75
+ description_type: str = "general",
76
+ custom_prompt: str = None,
77
+ ) -> str:
78
+ try:
79
+ if not os.path.exists(image_path):
80
+ return f"Error: Image file not found at {image_path}"
81
+
82
+ # Encode the image
83
+ base64_image = self.encode_image(image_path)
84
+
85
+ # Get appropriate prompt
86
+ prompt = self.get_prompt(description_type, custom_prompt)
87
+
88
+ # Make the API call
89
+ response = client.chat.completions.create(
90
+ model="gpt-4.1",
91
+ messages=[
92
+ {
93
+ "role": "user",
94
+ "content": [
95
+ {"type": "text", "text": prompt},
96
+ {
97
+ "type": "image_url",
98
+ "image_url": {
99
+ "url": f"data:image/jpeg;base64,{base64_image}"
100
+ },
101
+ },
102
+ ],
103
+ }
104
+ ],
105
+ max_tokens=1000,
106
+ )
107
+
108
+ return response.choices[0].message.content
109
+
110
+ except Exception as e:
111
+ return f"Error analyzing image: {str(e)}"
tools/openai_speech_to_text_tool.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import whisper
3
+ from smolagents import Tool
4
+
5
+
6
+ class OpenAISpeechToTextTool(Tool):
7
+ """
8
+ Tool to convert speech to text using OpenAI's Whisper model.
9
+
10
+ Args:
11
+ audio_path (str): Path to the audio file.
12
+
13
+ Returns:
14
+ str: Transcribed text from the audio file.
15
+ """
16
+
17
+ name = "transcribe_audio"
18
+ description = "Transcribes audio to text and returns the text"
19
+ inputs = {
20
+ "audio_path": {"type": "string", "description": "Path to the audio file"},
21
+ }
22
+ output_type = "string"
23
+
24
+ def forward(self, audio_path: str) -> str:
25
+ try:
26
+ model = whisper.load_model("small")
27
+
28
+ if not os.path.exists(audio_path):
29
+ return f"Error: Audio file not found at {audio_path}"
30
+
31
+ result = model.transcribe(audio_path)
32
+ return result["text"]
33
+ except Exception as e:
34
+ return f"Error transcribing audio: {str(e)}"
tools/read_file_tool.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents import Tool
2
+
3
+ class ReadFileTool(Tool):
4
+ """
5
+ Tool to read a file and return its content.
6
+
7
+ Args:
8
+ file_path (str): Path to the file to read.
9
+
10
+ Returns:
11
+ str: Content of the file or error message.
12
+ """
13
+
14
+ name = "read_file"
15
+ description = "Reads a file and returns its content"
16
+ inputs = {
17
+ "file_path": {"type": "string", "description": "Path to the file to read"},
18
+ }
19
+ output_type = "string"
20
+
21
+ def forward(self, file_path: str) -> str:
22
+ try:
23
+ with open(file_path, "r") as file:
24
+ return file.read()
25
+ except Exception as e:
26
+ return f"Error reading file: {str(e)}"
tools/table_extractor_tool.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents import Tool
2
+ from tabula import read_pdf
3
+ import pandas as pd
4
+ from typing import Optional, Dict, Any
5
+
6
+ class TableExtractorTool(Tool):
7
+ """
8
+ Tool to extract tables from PDFs/webpages and answer queries about them.
9
+
10
+ Args:
11
+ file_path (str): Path to PDF file (optional)
12
+ url (str): URL of webpage containing tables (optional)
13
+ query (str): Natural language question about the table data (optional)
14
+
15
+ Returns:
16
+ str: Extracted table data or answer to query
17
+ """
18
+
19
+ name = "extract_table"
20
+ description = "Extracts tables from PDFs or webpages and answers questions about the data"
21
+
22
+ inputs = {
23
+ "file_path": {
24
+ "type": "string",
25
+ "description": "Path to PDF file (either file_path or url required)",
26
+ "required": False
27
+ },
28
+ "url": {
29
+ "type": "string",
30
+ "description": "URL of webpage containing tables (either file_path or url required)",
31
+ "required": False
32
+ },
33
+ "query": {
34
+ "type": "string",
35
+ "description": "Natural language question about the table data",
36
+ "required": False
37
+ }
38
+ }
39
+
40
+ output_type = "string"
41
+
42
+ def forward(self, file_path: Optional[str] = None,
43
+ url: Optional[str] = None,
44
+ query: Optional[str] = None) -> str:
45
+
46
+ # Validate input
47
+ if not file_path and not url:
48
+ return "Error: Either file_path or url must be provided"
49
+
50
+ try:
51
+ # Case 1: Extract from PDF
52
+ if file_path and file_path.endswith(".pdf"):
53
+ tables = read_pdf(file_path, pages="all", multiple_tables=True)
54
+ df = pd.concat(tables) if tables else None
55
+
56
+ # Case 2: Extract from HTML (webpage)
57
+ elif url:
58
+ dfs = pd.read_html(url)
59
+ df = dfs[0] if dfs else None
60
+
61
+ if df is None:
62
+ return "No tables found in the input source"
63
+
64
+ # Answer query if provided
65
+ if query:
66
+ return self._answer_query(df, query)
67
+ return df.to_string()
68
+
69
+ except Exception as e:
70
+ return f"Error processing table data: {str(e)}"
71
+
72
+ def _answer_query(self, df: pd.DataFrame, query: str) -> str:
73
+ """Helper method to answer questions about the table data"""
74
+ try:
75
+ query = query.lower()
76
+
77
+ # Example simple queries - you could expand this or integrate an LLM
78
+ if "total" in query and "sum" in query:
79
+ if "revenue" in query:
80
+ col = "Revenue"
81
+ elif "sales" in query:
82
+ col = "Sales"
83
+ else:
84
+ # Try to find a numeric column
85
+ numeric_cols = df.select_dtypes(include=['number']).columns
86
+ col = numeric_cols[0] if len(numeric_cols) > 0 else None
87
+
88
+ if col:
89
+ return f"Total {col}: {df[col].sum()}"
90
+
91
+ elif "average" in query or "mean" in query:
92
+ # Find the most likely column referenced in query
93
+ for col in df.columns:
94
+ if col.lower() in query:
95
+ return f"Average {col}: {df[col].mean():.2f}"
96
+
97
+ # Default to first numeric column
98
+ numeric_cols = df.select_dtypes(include=['number']).columns
99
+ if len(numeric_cols) > 0:
100
+ return f"Average {numeric_cols[0]}: {df[numeric_cols[0]].mean():.2f}"
101
+
102
+ # Fallback: return the table
103
+ return f"Here's the table data:\n{df.to_string()}\n\nQuery '{query}' not fully understood."
104
+
105
+ except Exception as e:
106
+ return f"Error answering query: {str(e)}\nTable data:\n{df.to_string()}"
tools/youtube_transcription_tool.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents import Tool
2
+ from youtube_transcript_api import YouTubeTranscriptApi
3
+
4
+
5
+ class YouTubeTranscriptionTool(Tool):
6
+ """
7
+ Tool to fetch the transcript of a YouTube video given its URL.
8
+
9
+ Args:
10
+ video_url (str): YouTube video URL.
11
+
12
+ Returns:
13
+ str: Transcript of the video as a single string.
14
+ """
15
+
16
+ name = "youtube_transcription"
17
+ description = "Fetches the transcript of a YouTube video given its URL"
18
+ inputs = {
19
+ "video_url": {"type": "string", "description": "YouTube video URL"},
20
+ }
21
+ output_type = "string"
22
+
23
+ def forward(self, video_url: str) -> str:
24
+ video_id = video_url.strip().split("v=")[-1]
25
+ transcript = YouTubeTranscriptApi.get_transcript(video_id)
26
+ return " ".join([entry["text"] for entry in transcript])