jlgaralc commited on
Commit
c9e0cf1
·
1 Parent(s): 9e25975

Modified files

Browse files
Files changed (1) hide show
  1. agent.py +320 -115
agent.py CHANGED
@@ -4,14 +4,16 @@ import logging
4
  import urllib.parse as urlparse
5
  import io
6
  import contextlib
 
7
  from functools import lru_cache, wraps
 
8
 
9
  from dotenv import load_dotenv
10
  from requests.exceptions import RequestException
11
  import serpapi
12
  from llama_index.core import VectorStoreIndex, download_loader
13
  from llama_index.core.schema import Document
14
- from youtube_transcript_api import YouTubeTranscriptApi
15
 
16
  from smolagents import (CodeAgent, InferenceClientModel, ToolCallingAgent,
17
  WebSearchTool, WikipediaTool, tool)
@@ -37,6 +39,13 @@ def load_api_keys():
37
  raise ValueError("One or more API keys are missing. Please check your .env file.")
38
  return keys
39
 
 
 
 
 
 
 
 
40
  # --- Decorators ---
41
 
42
  def retry(max_retries=3, initial_delay=1, backoff=2):
@@ -45,8 +54,7 @@ def retry(max_retries=3, initial_delay=1, backoff=2):
45
  @wraps(func)
46
  def wrapper(*args, **kwargs):
47
  delay = initial_delay
48
- # Define specific, retry-able exceptions
49
- retryable_exceptions = (RequestException, SerpApiClientException, YouTubeTranscriptApiError)
50
  for attempt in range(1, max_retries + 1):
51
  try:
52
  return func(*args, **kwargs)
@@ -63,113 +71,280 @@ def retry(max_retries=3, initial_delay=1, backoff=2):
63
  return wrapper
64
  return decorator
65
 
66
- # --- Main Agent Initialization (as called by app.py) ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
  def initialize_agent():
69
  """
70
- Initializes a multi-disciplinary agent with a toolset and reasoning framework
71
- designed for the benchmark's question categories.
72
  """
 
73
  api_keys = load_api_keys()
74
 
75
  # --- Caching Layer for LlamaIndex ---
76
  @lru_cache(maxsize=32)
77
  @retry()
78
  def get_webpage_index(url: str) -> VectorStoreIndex:
 
79
  logging.info(f"Indexing webpage: {url}")
80
- loader_cls = download_loader("BeautifulSoupWebReader")
81
- loader = loader_cls()
82
- docs = loader.load_data(urls=[url])
83
- return VectorStoreIndex.from_documents(docs)
 
 
 
 
 
 
84
 
85
  @lru_cache(maxsize=32)
86
  @retry()
87
  def get_youtube_index(video_id: str) -> VectorStoreIndex:
 
88
  logging.info(f"Indexing YouTube video: {video_id}")
89
- transcript = YouTubeTranscriptApi.get_transcript(video_id)
90
- text = ' '.join([t['text'] for t in transcript])
91
- doc = Document(text=text, doc_id=f"youtube_{video_id}")
92
- return VectorStoreIndex.from_documents([doc])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
 
94
- # --- Specialized Tool Definitions ---
95
 
96
- # 1. Web Search Tools
97
  @tool
98
  @retry()
99
  def google_search(query: str) -> str:
100
- """Use for general knowledge questions, finding facts, or when you don't have a specific URL.
 
 
101
 
102
  Args:
103
- query (str): The search query.
104
  """
105
- client = serpapi.Client(api_key=api_keys['serpapi'])
106
- results = client.search(q=query, engine="google")
107
- if organic_results := results.get('organic_results'):
108
- md = ["### Top Search Results"]
109
- for res in organic_results[:5]:
110
- md.append(f"- **{res.get('title', 'N/A')}**: {res.get('snippet', 'No snippet available.')}\n [Source]({res.get('link', '#')})")
111
- return "\n\n".join(md)
112
- return "No results found."
113
 
114
  @tool
115
  def query_webpage(url: str, query: str) -> str:
116
- """Use when you need to answer a specific question about the content of a single webpage URL.
 
 
117
 
118
  Args:
119
- url (str): The URL of the webpage to query.
120
- query (str): The specific question to ask about the webpage's content.
121
  """
122
  try:
 
 
 
123
  index = get_webpage_index(url)
124
- return str(index.as_query_engine().query(query))
 
 
 
 
 
125
  except Exception as e:
126
- return f"Error querying webpage {url}: {e}"
 
 
127
 
128
- # 2. YouTube Tool
129
  @tool
130
  def query_youtube_video(video_url_or_id: str, query: str) -> str:
131
- """Use for questions about the content of a YouTube video. Accepts a full URL or a video ID.
 
 
132
 
133
  Args:
134
- video_url_or_id (str): The full URL or just the video ID of the YouTube video.
135
- query (str): The specific question to ask about the video's content.
136
  """
137
  try:
138
- video_id = video_url_or_id
139
- if "youtube.com" in video_url_or_id or "youtu.be" in video_url_or_id:
140
- parsed_url = urlparse.urlparse(video_url_or_id)
141
- video_id = urlparse.parse_qs(parsed_url.query).get('v', [None])[0]
142
- if not video_id:
143
- video_id = parsed_url.path.lstrip('/')
144
  if not video_id:
145
- return "Error: Could not extract a valid YouTube video ID."
 
146
  index = get_youtube_index(video_id)
147
- return str(index.as_query_engine().query(query))
 
 
 
 
 
148
  except YouTubeTranscriptApiError as e:
149
- return f"Error fetching transcript for video {video_id}: {e}"
150
  except Exception as e:
151
- return f"Error querying YouTube video {video_id}: {e}"
 
 
152
 
153
- # 3. Coding Tool
154
  @tool
155
  def run_python_code(code: str) -> str:
156
  """
157
- Executes a string of Python code and returns its standard output.
158
- Use this for coding challenges, calculations, or data manipulation.
159
- The code is executed in a restricted environment; it cannot access external files.
160
 
161
  Args:
162
- code (str): The Python code to execute as a single string.
163
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
  output = io.StringIO()
165
  try:
166
  with contextlib.redirect_stdout(output):
167
- exec(code, {})
168
- return output.getvalue()
 
169
  except Exception as e:
170
- return f"Error executing code: {e}"
171
 
172
- # --- Model and Agent Initialization ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
 
174
  try:
175
  model = InferenceClientModel(
@@ -182,94 +357,124 @@ def initialize_agent():
182
  logging.error(f"Failed to load model: {e}")
183
  raise
184
 
185
- # A single, powerful worker agent with a diverse toolset
186
  worker_agent = ToolCallingAgent(
187
  tools=[
188
  google_search,
 
189
  query_webpage,
190
  query_youtube_video,
191
  run_python_code,
192
  WikipediaTool(),
193
  ],
194
  model=model,
195
- max_steps=5, # Sub-tasks should be short and focused
196
- name="multi_tool_worker",
197
- description="A specialized worker agent that can search the web, query Wikipedia, analyze videos, and execute code."
198
  )
199
 
200
- # The manager agent acts as a strategic dispatcher.
201
  manager = CodeAgent(
202
  model=model,
203
  managed_agents=[worker_agent],
204
  tools=[WebSearchTool()],
205
- additional_authorized_imports=["time", "numpy", "pandas", "requests", "serpapi", "llama_index", "beautifulsoup4", "markdownify", "lxml", "json", "urllib.parse", "youtube_transcript_api", "together"],
206
- instructions="""You are a master AI assistant responsible for answering a user's question. Your goal is to provide a single, precise, and final answer by generating and executing Python code.
207
-
208
- **Your Strategic Thought Process:**
209
-
210
- 1. **ANALYZE THE QUESTION:**
211
- * Deeply understand the user's question, identifying all constraints, keywords, and the required format for the final answer.
212
- * Determine the core task: Is it a factual lookup, data extraction, code execution, or video analysis?
213
-
214
- 2. **PLAN AND EXECUTE USING YOUR CAPABILITIES:**
215
- * You will write Python code to find the answer. You have two ways to get information: direct tool use or delegation.
216
-
217
- * **A) Direct Tool Use (For simple web search):**
218
- * You can directly call `WebSearchTool(query: str)` in your code for quick, general web searches.
219
-
220
- * **B) Delegation to a Specialized Worker (For all other tasks):**
221
- * For more complex tasks, you MUST delegate to the `multi_tool_worker` agent.
222
- * To do this, write code that calls `multi_tool_worker.run(task_description: str)`.
223
- * The `multi_tool_worker` is a `ToolCallingAgent` that can use the following tools based on your `task_description`:
224
- * `google_search`: For detailed web searches.
225
- * `wikipedia_search`: For encyclopedic facts (people, places, topics).
226
- * `query_webpage`: To ask questions about a specific webpage URL.
227
- * `query_youtube_video`: To ask questions about a specific YouTube video.
228
- * `run_python_code`: For complex calculations or data manipulation.
229
-
230
- * **Example Thought Process & Code:**
231
- * **User Question:** "What is the discography of Mercedes Sosa according to Wikipedia?"
232
- * **Your Plan:** This is a specific factual query best suited for Wikipedia. I must delegate this to the `multi_tool_worker`.
233
- * **Your Code:**
234
- ```python
235
- discography = multi_tool_worker.run("Search Wikipedia for 'Mercedes Sosa discography'")
236
- print(discography)
237
- ```
238
-
239
- 3. **FORMULATE THE FINAL ANSWER (Precision & Format):**
240
- * Once you have definitively found the answer, format it *exactly* as requested in the original question.
241
- * **Your final output must be ONLY the answer itself.** Do not include any extra text, explanations, conversational filler, or prefixes like "FINAL ANSWER:".
242
- * Example: If the question asks for a number and the answer is "123", your final output must be `123`.
243
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
  )
245
- logging.info("Multi-task agent initialized successfully.")
 
246
  return manager
247
 
248
- # --- Main Execution Block for Local Testing ---
249
 
250
  def main():
251
- """Main function for local testing of the agent."""
252
  configure_logging()
253
  try:
254
  agent = initialize_agent()
255
  if agent:
256
- # Example prompts for each category
257
- prompts = {
258
- "Web Search": "Who is the current CEO of OpenAI?",
259
- "YouTube": "What is the main topic of the video https://www.youtube.com/watch?v=bZQun8Y4L2A regarding AI models?",
260
- "Coding": "Write a Python script that calculates and prints the factorial of 5.",
261
- }
 
262
 
263
- for category, prompt in prompts.items():
264
- logging.info(f"\n--- Testing Category: {category} ---")
265
- logging.info(f"Prompt: {prompt}")
266
- response = agent.run(prompt)
267
- logging.info(f"Agent's Final Answer: {response}")
268
- logging.info("-" * (30 + len(category)))
 
 
 
 
 
 
 
269
 
270
  except Exception as e:
271
- logging.critical(f"An unhandled error occurred during local testing: {e}", exc_info=True)
272
 
273
  if __name__ == "__main__":
274
- # This allows you to test the agent's logic by running `python agent.py` locally.
275
  main()
 
4
  import urllib.parse as urlparse
5
  import io
6
  import contextlib
7
+ import re
8
  from functools import lru_cache, wraps
9
+ from typing import Optional, Dict, Any
10
 
11
  from dotenv import load_dotenv
12
  from requests.exceptions import RequestException
13
  import serpapi
14
  from llama_index.core import VectorStoreIndex, download_loader
15
  from llama_index.core.schema import Document
16
+ from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
17
 
18
  from smolagents import (CodeAgent, InferenceClientModel, ToolCallingAgent,
19
  WebSearchTool, WikipediaTool, tool)
 
39
  raise ValueError("One or more API keys are missing. Please check your .env file.")
40
  return keys
41
 
42
+ # --- Custom Exceptions ---
43
+ class SerpApiClientException(Exception):
44
+ pass
45
+
46
+ class YouTubeTranscriptApiError(Exception):
47
+ pass
48
+
49
  # --- Decorators ---
50
 
51
  def retry(max_retries=3, initial_delay=1, backoff=2):
 
54
  @wraps(func)
55
  def wrapper(*args, **kwargs):
56
  delay = initial_delay
57
+ retryable_exceptions = (RequestException, SerpApiClientException, YouTubeTranscriptApiError, TranscriptsDisabled, NoTranscriptFound)
 
58
  for attempt in range(1, max_retries + 1):
59
  try:
60
  return func(*args, **kwargs)
 
71
  return wrapper
72
  return decorator
73
 
74
+ # --- Helper Functions ---
75
+
76
+ def extract_video_id(url_or_id: str) -> Optional[str]:
77
+ """Extract YouTube video ID from various URL formats."""
78
+ if not url_or_id:
79
+ return None
80
+
81
+ # If it's already just an ID (11 characters, alphanumeric + underscore/dash)
82
+ if re.match(r'^[a-zA-Z0-9_-]{11}$', url_or_id):
83
+ return url_or_id
84
+
85
+ # Extract from various YouTube URL formats
86
+ patterns = [
87
+ r'(?:youtube\.com/watch\?v=|youtu\.be/|youtube\.com/embed/)([a-zA-Z0-9_-]{11})',
88
+ r'youtube\.com/.*[?&]v=([a-zA-Z0-9_-]{11})',
89
+ ]
90
+
91
+ for pattern in patterns:
92
+ match = re.search(pattern, url_or_id)
93
+ if match:
94
+ return match.group(1)
95
+
96
+ return None
97
+
98
+ def clean_search_results(results: Dict[str, Any]) -> str:
99
+ """Clean and format search results from SerpAPI."""
100
+ if not results:
101
+ return "No results found."
102
+
103
+ formatted_results = []
104
+
105
+ # Handle organic results
106
+ if organic_results := results.get('organic_results', []):
107
+ formatted_results.append("### Web Results")
108
+ for i, res in enumerate(organic_results[:5], 1):
109
+ title = res.get('title', 'N/A')
110
+ snippet = res.get('snippet', 'No description available.')
111
+ link = res.get('link', '#')
112
+ formatted_results.append(f"{i}. **{title}**\n {snippet}\n Source: {link}")
113
+
114
+ # Handle knowledge graph
115
+ if knowledge_graph := results.get('knowledge_graph'):
116
+ formatted_results.append("\n### Knowledge Graph")
117
+ if title := knowledge_graph.get('title'):
118
+ formatted_results.append(f"**{title}**")
119
+ if description := knowledge_graph.get('description'):
120
+ formatted_results.append(f"{description}")
121
+
122
+ # Handle answer box
123
+ if answer_box := results.get('answer_box'):
124
+ formatted_results.append("\n### Direct Answer")
125
+ if answer := answer_box.get('answer'):
126
+ formatted_results.append(f"{answer}")
127
+ elif snippet := answer_box.get('snippet'):
128
+ formatted_results.append(f"{snippet}")
129
+
130
+ return "\n\n".join(formatted_results) if formatted_results else "No relevant results found."
131
+
132
+ # --- Main Agent Initialization ---
133
 
134
  def initialize_agent():
135
  """
136
+ Initializes a multi-disciplinary agent optimized for GAIA benchmark questions.
 
137
  """
138
+ configure_logging()
139
  api_keys = load_api_keys()
140
 
141
  # --- Caching Layer for LlamaIndex ---
142
  @lru_cache(maxsize=32)
143
  @retry()
144
  def get_webpage_index(url: str) -> VectorStoreIndex:
145
+ """Create a searchable index from a webpage."""
146
  logging.info(f"Indexing webpage: {url}")
147
+ try:
148
+ loader_cls = download_loader("BeautifulSoupWebReader")
149
+ loader = loader_cls()
150
+ docs = loader.load_data(urls=[url])
151
+ if not docs:
152
+ raise ValueError(f"No content could be extracted from {url}")
153
+ return VectorStoreIndex.from_documents(docs)
154
+ except Exception as e:
155
+ logging.error(f"Failed to index webpage {url}: {e}")
156
+ raise
157
 
158
  @lru_cache(maxsize=32)
159
  @retry()
160
  def get_youtube_index(video_id: str) -> VectorStoreIndex:
161
+ """Create a searchable index from a YouTube video transcript."""
162
  logging.info(f"Indexing YouTube video: {video_id}")
163
+ try:
164
+ # Try to get transcript in English first, then any available language
165
+ try:
166
+ transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
167
+ except (TranscriptsDisabled, NoTranscriptFound):
168
+ # Try to get any available transcript
169
+ transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
170
+ transcript = transcript_list.find_transcript(['en']).fetch()
171
+
172
+ if not transcript:
173
+ raise YouTubeTranscriptApiError(f"No transcript available for video {video_id}")
174
+
175
+ text = ' '.join([entry['text'] for entry in transcript])
176
+ if not text.strip():
177
+ raise YouTubeTranscriptApiError(f"Empty transcript for video {video_id}")
178
+
179
+ doc = Document(text=text, doc_id=f"youtube_{video_id}")
180
+ return VectorStoreIndex.from_documents([doc])
181
+ except Exception as e:
182
+ logging.error(f"Failed to index YouTube video {video_id}: {e}")
183
+ raise YouTubeTranscriptApiError(f"Could not process YouTube video {video_id}: {e}")
184
 
185
+ # --- Enhanced Tool Definitions ---
186
 
 
187
  @tool
188
  @retry()
189
  def google_search(query: str) -> str:
190
+ """
191
+ Perform a comprehensive Google search with enhanced result formatting.
192
+ Use for general knowledge questions, current events, or when you need factual information.
193
 
194
  Args:
195
+ query (str): The search query
196
  """
197
+ try:
198
+ client = serpapi.Client(api_key=api_keys['serpapi'])
199
+ results = client.search(q=query, engine="google", num=10)
200
+ return clean_search_results(results)
201
+ except Exception as e:
202
+ logging.error(f"Google search failed for query '{query}': {e}")
203
+ return f"Search failed: {e}"
 
204
 
205
  @tool
206
  def query_webpage(url: str, query: str) -> str:
207
+ """
208
+ Extract specific information from a webpage by asking a targeted question.
209
+ Best for when you have a specific URL and need detailed information from it.
210
 
211
  Args:
212
+ url (str): The complete URL of the webpage
213
+ query (str): Specific question about the webpage content
214
  """
215
  try:
216
+ if not url.startswith(('http://', 'https://')):
217
+ url = 'https://' + url
218
+
219
  index = get_webpage_index(url)
220
+ query_engine = index.as_query_engine(
221
+ similarity_top_k=5,
222
+ response_mode="tree_summarize"
223
+ )
224
+ response = query_engine.query(query)
225
+ return str(response)
226
  except Exception as e:
227
+ error_msg = f"Error querying webpage {url}: {e}"
228
+ logging.error(error_msg)
229
+ return error_msg
230
 
 
231
  @tool
232
  def query_youtube_video(video_url_or_id: str, query: str) -> str:
233
+ """
234
+ Extract information from YouTube video transcripts by asking specific questions.
235
+ Handles various YouTube URL formats and video IDs.
236
 
237
  Args:
238
+ video_url_or_id (str): YouTube URL or video ID
239
+ query (str): Specific question about the video content
240
  """
241
  try:
242
+ video_id = extract_video_id(video_url_or_id)
 
 
 
 
 
243
  if not video_id:
244
+ return f"Error: Could not extract valid YouTube video ID from '{video_url_or_id}'"
245
+
246
  index = get_youtube_index(video_id)
247
+ query_engine = index.as_query_engine(
248
+ similarity_top_k=5,
249
+ response_mode="tree_summarize"
250
+ )
251
+ response = query_engine.query(query)
252
+ return str(response)
253
  except YouTubeTranscriptApiError as e:
254
+ return f"YouTube transcript error for {video_url_or_id}: {e}"
255
  except Exception as e:
256
+ error_msg = f"Error querying YouTube video {video_url_or_id}: {e}"
257
+ logging.error(error_msg)
258
+ return error_msg
259
 
 
260
  @tool
261
  def run_python_code(code: str) -> str:
262
  """
263
+ Execute Python code in a safe environment and return the output.
264
+ Perfect for calculations, data processing, and algorithmic problems.
265
+ Available modules: math, datetime, json, re, collections, itertools, numpy, pandas
266
 
267
  Args:
268
+ code (str): Python code to execute
269
  """
270
+ # Create a safe execution environment with useful modules
271
+ safe_globals = {
272
+ '__builtins__': {
273
+ 'print': print, 'len': len, 'range': range, 'enumerate': enumerate,
274
+ 'zip': zip, 'map': map, 'filter': filter, 'sum': sum, 'max': max, 'min': min,
275
+ 'abs': abs, 'round': round, 'sorted': sorted, 'reversed': reversed,
276
+ 'int': int, 'float': float, 'str': str, 'bool': bool, 'list': list,
277
+ 'dict': dict, 'set': set, 'tuple': tuple, 'type': type, 'isinstance': isinstance,
278
+ }
279
+ }
280
+
281
+ # Add safe imports
282
+ try:
283
+ import math
284
+ import datetime
285
+ import json
286
+ import re
287
+ import collections
288
+ import itertools
289
+ safe_globals.update({
290
+ 'math': math, 'datetime': datetime, 'json': json, 're': re,
291
+ 'collections': collections, 'itertools': itertools
292
+ })
293
+
294
+ # Try to import numpy and pandas if available
295
+ try:
296
+ import numpy as np
297
+ safe_globals['np'] = np
298
+ safe_globals['numpy'] = np
299
+ except ImportError:
300
+ pass
301
+
302
+ try:
303
+ import pandas as pd
304
+ safe_globals['pd'] = pd
305
+ safe_globals['pandas'] = pd
306
+ except ImportError:
307
+ pass
308
+
309
+ except ImportError as e:
310
+ logging.warning(f"Some modules not available for code execution: {e}")
311
+
312
  output = io.StringIO()
313
  try:
314
  with contextlib.redirect_stdout(output):
315
+ exec(code, safe_globals)
316
+ result = output.getvalue()
317
+ return result if result else "Code executed successfully (no output)"
318
  except Exception as e:
319
+ return f"Code execution error: {e}"
320
 
321
+ @tool
322
+ def advanced_search(query: str, search_type: str = "general") -> str:
323
+ """
324
+ Perform specialized searches for different types of information.
325
+
326
+ Args:
327
+ query (str): Search query
328
+ search_type (str): Type of search - "academic", "news", "images", "general"
329
+ """
330
+ try:
331
+ client = serpapi.Client(api_key=api_keys['serpapi'])
332
+
333
+ search_params = {"q": query, "num": 8}
334
+
335
+ if search_type == "academic":
336
+ results = client.search(engine="google_scholar", **search_params)
337
+ elif search_type == "news":
338
+ search_params["tbm"] = "nws"
339
+ results = client.search(engine="google", **search_params)
340
+ else: # general
341
+ results = client.search(engine="google", **search_params)
342
+
343
+ return clean_search_results(results)
344
+ except Exception as e:
345
+ return f"Advanced search failed: {e}"
346
+
347
+ # --- Model and Agent Setup ---
348
 
349
  try:
350
  model = InferenceClientModel(
 
357
  logging.error(f"Failed to load model: {e}")
358
  raise
359
 
360
+ # Specialized worker agent with comprehensive toolset
361
  worker_agent = ToolCallingAgent(
362
  tools=[
363
  google_search,
364
+ advanced_search,
365
  query_webpage,
366
  query_youtube_video,
367
  run_python_code,
368
  WikipediaTool(),
369
  ],
370
  model=model,
371
+ max_steps=6, # Allow more steps for complex tasks
372
+ name="gaia_specialist",
373
+ description="Expert agent for GAIA benchmark tasks: web research, document analysis, video processing, and code execution."
374
  )
375
 
376
+ # Strategic manager agent
377
  manager = CodeAgent(
378
  model=model,
379
  managed_agents=[worker_agent],
380
  tools=[WebSearchTool()],
381
+ additional_authorized_imports=[
382
+ "time", "numpy", "pandas", "requests", "serpapi", "llama_index",
383
+ "beautifulsoup4", "markdownify", "lxml", "json", "urllib.parse",
384
+ "youtube_transcript_api", "together", "math", "datetime", "re",
385
+ "collections", "itertools"
386
+ ],
387
+ instructions="""You are an expert AI system designed to excel at the GAIA benchmark. Your mission is to provide precise, accurate answers to complex questions spanning multiple domains.
388
+
389
+ **STRATEGIC APPROACH:**
390
+
391
+ 1. **QUESTION ANALYSIS:**
392
+ - Parse the question carefully to identify: required output format, key constraints, domain (science, history, current events, etc.)
393
+ - Determine the information sources needed: web search, specific websites, videos, calculations, or combinations
394
+
395
+ 2. **EXECUTION STRATEGY:**
396
+
397
+ **Direct Web Search (for simple lookups):**
398
+ ```python
399
+ results = WebSearchTool(query="your search query")
400
+ print(results)
401
+ ```
402
+
403
+ **Delegate to Specialist Agent (for complex tasks):**
404
+ ```python
405
+ answer = gaia_specialist.run("Detailed task description with specific requirements")
406
+ print(answer)
407
+ ```
408
+
409
+ The specialist can:
410
+ - `google_search`: Comprehensive web searches with rich formatting
411
+ - `advanced_search`: Academic papers, news, specialized searches
412
+ - `query_webpage`: Deep analysis of specific URLs
413
+ - `query_youtube_video`: Extract information from video transcripts
414
+ - `run_python_code`: Mathematical calculations, data processing, algorithms
415
+ - `wikipedia_search`: Encyclopedic information
416
+
417
+ 3. **ANSWER FORMATTING:**
418
+ - Provide ONLY the final answer in the exact format requested
419
+ - No explanations, prefixes, or extra text unless specifically asked
420
+ - For numerical answers: provide just the number
421
+ - For yes/no questions: provide just "Yes" or "No"
422
+ - For lists: follow the specified format exactly
423
+
424
+ **EXAMPLES:**
425
+
426
+ Question: "What is 15! (15 factorial)?"
427
+ Strategy: Mathematical calculation → delegate to specialist
428
+ ```python
429
+ result = gaia_specialist.run("Calculate 15 factorial using Python")
430
+ print(result)
431
+ ```
432
+
433
+ Question: "What is the capital of the country where Mount Everest is located?"
434
+ Strategy: Multi-step reasoning → delegate to specialist
435
+ ```python
436
+ answer = gaia_specialist.run("Find the country where Mount Everest is located, then identify its capital city")
437
+ print(answer)
438
+ ```
439
+
440
+ Remember: Your final output must be ONLY the answer itself, formatted exactly as requested."""
441
  )
442
+
443
+ logging.info("Enhanced GAIA agent initialized successfully.")
444
  return manager
445
 
446
+ # --- Testing and Main Execution ---
447
 
448
  def main():
449
+ """Test the agent with sample GAIA-style questions."""
450
  configure_logging()
451
  try:
452
  agent = initialize_agent()
453
  if agent:
454
+ # Sample questions covering different GAIA categories
455
+ test_questions = [
456
+ "What is the square root of 144?",
457
+ "In what year was the Python programming language first released?",
458
+ "What is the chemical formula for caffeine?",
459
+ "How many days are there between January 1, 2024 and March 15, 2024?",
460
+ ]
461
 
462
+ for i, question in enumerate(test_questions, 1):
463
+ logging.info(f"\n{'='*50}")
464
+ logging.info(f"Test Question {i}: {question}")
465
+ logging.info(f"{'='*50}")
466
+
467
+ try:
468
+ response = agent.run(question)
469
+ logging.info(f"Agent Answer: {response}")
470
+ except Exception as e:
471
+ logging.error(f"Error processing question {i}: {e}")
472
+
473
+ # Small delay between questions
474
+ time.sleep(1)
475
 
476
  except Exception as e:
477
+ logging.critical(f"Critical error during testing: {e}", exc_info=True)
478
 
479
  if __name__ == "__main__":
 
480
  main()