Spaces:

jlgaralc
/

Agent_Agents_Course

Sleeping

App Files Files Community

jlgaralc commited on Jun 28, 2025

Commit

c9e0cf1

1 Parent(s): 9e25975

Modified files

Browse files

Files changed (1) hide show

agent.py +320 -115

agent.py CHANGED Viewed

@@ -4,14 +4,16 @@ import logging
 import urllib.parse as urlparse
 import io
 import contextlib
 from functools import lru_cache, wraps
 from dotenv import load_dotenv
 from requests.exceptions import RequestException
 import serpapi
 from llama_index.core import VectorStoreIndex, download_loader
 from llama_index.core.schema import Document
-from youtube_transcript_api import YouTubeTranscriptApi
 from smolagents import (CodeAgent, InferenceClientModel, ToolCallingAgent,
                         WebSearchTool, WikipediaTool, tool)
@@ -37,6 +39,13 @@ def load_api_keys():
         raise ValueError("One or more API keys are missing. Please check your .env file.")
     return keys
 # --- Decorators ---
 def retry(max_retries=3, initial_delay=1, backoff=2):
@@ -45,8 +54,7 @@ def retry(max_retries=3, initial_delay=1, backoff=2):
         @wraps(func)
         def wrapper(*args, **kwargs):
             delay = initial_delay
-            # Define specific, retry-able exceptions
-            retryable_exceptions = (RequestException, SerpApiClientException, YouTubeTranscriptApiError)
             for attempt in range(1, max_retries + 1):
                 try:
                     return func(*args, **kwargs)
@@ -63,113 +71,280 @@ def retry(max_retries=3, initial_delay=1, backoff=2):
         return wrapper
     return decorator
-# --- Main Agent Initialization (as called by app.py) ---
 def initialize_agent():
     """
-    Initializes a multi-disciplinary agent with a toolset and reasoning framework
-    designed for the benchmark's question categories.
     """
     api_keys = load_api_keys()
     # --- Caching Layer for LlamaIndex ---
     @lru_cache(maxsize=32)
     @retry()
     def get_webpage_index(url: str) -> VectorStoreIndex:
         logging.info(f"Indexing webpage: {url}")
-        loader_cls = download_loader("BeautifulSoupWebReader")
-        loader = loader_cls()
-        docs = loader.load_data(urls=[url])
-        return VectorStoreIndex.from_documents(docs)
     @lru_cache(maxsize=32)
     @retry()
     def get_youtube_index(video_id: str) -> VectorStoreIndex:
         logging.info(f"Indexing YouTube video: {video_id}")
-        transcript = YouTubeTranscriptApi.get_transcript(video_id)
-        text = ' '.join([t['text'] for t in transcript])
-        doc = Document(text=text, doc_id=f"youtube_{video_id}")
-        return VectorStoreIndex.from_documents([doc])
-    # --- Specialized Tool Definitions ---
-    # 1. Web Search Tools
     @tool
     @retry()
     def google_search(query: str) -> str:
-        """Use for general knowledge questions, finding facts, or when you don't have a specific URL.
         Args:
-            query (str): The search query.
         """
-        client = serpapi.Client(api_key=api_keys['serpapi'])
-        results = client.search(q=query, engine="google")
-        if organic_results := results.get('organic_results'):
-            md = ["### Top Search Results"]
-            for res in organic_results[:5]:
-                md.append(f"- **{res.get('title', 'N/A')}**: {res.get('snippet', 'No snippet available.')}\n  [Source]({res.get('link', '#')})")
-            return "\n\n".join(md)
-        return "No results found."
     @tool
     def query_webpage(url: str, query: str) -> str:
-        """Use when you need to answer a specific question about the content of a single webpage URL.
         Args:
-            url (str): The URL of the webpage to query.
-            query (str): The specific question to ask about the webpage's content.
         """
         try:
             index = get_webpage_index(url)
-            return str(index.as_query_engine().query(query))
         except Exception as e:
-            return f"Error querying webpage {url}: {e}"
-    # 2. YouTube Tool
     @tool
     def query_youtube_video(video_url_or_id: str, query: str) -> str:
-        """Use for questions about the content of a YouTube video. Accepts a full URL or a video ID.
         Args:
-            video_url_or_id (str): The full URL or just the video ID of the YouTube video.
-            query (str): The specific question to ask about the video's content.
         """
         try:
-            video_id = video_url_or_id
-            if "youtube.com" in video_url_or_id or "youtu.be" in video_url_or_id:
-                parsed_url = urlparse.urlparse(video_url_or_id)
-                video_id = urlparse.parse_qs(parsed_url.query).get('v', [None])[0]
-                if not video_id:
-                    video_id = parsed_url.path.lstrip('/')
             if not video_id:
-                return "Error: Could not extract a valid YouTube video ID."
             index = get_youtube_index(video_id)
-            return str(index.as_query_engine().query(query))
         except YouTubeTranscriptApiError as e:
-            return f"Error fetching transcript for video {video_id}: {e}"
         except Exception as e:
-            return f"Error querying YouTube video {video_id}: {e}"
-    # 3. Coding Tool
     @tool
     def run_python_code(code: str) -> str:
         """
-        Executes a string of Python code and returns its standard output.
-        Use this for coding challenges, calculations, or data manipulation.
-        The code is executed in a restricted environment; it cannot access external files.
         Args:
-            code (str): The Python code to execute as a single string.
         """
         output = io.StringIO()
         try:
             with contextlib.redirect_stdout(output):
-                exec(code, {})
-            return output.getvalue()
         except Exception as e:
-            return f"Error executing code: {e}"
-    # --- Model and Agent Initialization ---
     try:
         model = InferenceClientModel(
@@ -182,94 +357,124 @@ def initialize_agent():
         logging.error(f"Failed to load model: {e}")
         raise
-    # A single, powerful worker agent with a diverse toolset
     worker_agent = ToolCallingAgent(
         tools=[
             google_search,
             query_webpage,
             query_youtube_video,
             run_python_code,
             WikipediaTool(),
         ],
         model=model,
-        max_steps=5, # Sub-tasks should be short and focused
-        name="multi_tool_worker",
-        description="A specialized worker agent that can search the web, query Wikipedia, analyze videos, and execute code."
     )
-    # The manager agent acts as a strategic dispatcher.
     manager = CodeAgent(
         model=model,
         managed_agents=[worker_agent],
         tools=[WebSearchTool()],
-        additional_authorized_imports=["time", "numpy", "pandas", "requests", "serpapi", "llama_index", "beautifulsoup4", "markdownify", "lxml", "json", "urllib.parse", "youtube_transcript_api", "together"],
-        instructions="""You are a master AI assistant responsible for answering a user's question. Your goal is to provide a single, precise, and final answer by generating and executing Python code.
-**Your Strategic Thought Process:**
-1.  **ANALYZE THE QUESTION:**
-    *   Deeply understand the user's question, identifying all constraints, keywords, and the required format for the final answer.
-    *   Determine the core task: Is it a factual lookup, data extraction, code execution, or video analysis?
-2.  **PLAN AND EXECUTE USING YOUR CAPABILITIES:**
-    *   You will write Python code to find the answer. You have two ways to get information: direct tool use or delegation.
-    *   **A) Direct Tool Use (For simple web search):**
-        *   You can directly call `WebSearchTool(query: str)` in your code for quick, general web searches.
-    *   **B) Delegation to a Specialized Worker (For all other tasks):**
-        *   For more complex tasks, you MUST delegate to the `multi_tool_worker` agent.
-        *   To do this, write code that calls `multi_tool_worker.run(task_description: str)`.
-        *   The `multi_tool_worker` is a `ToolCallingAgent` that can use the following tools based on your `task_description`:
-            *   `google_search`: For detailed web searches.
-            *   `wikipedia_search`: For encyclopedic facts (people, places, topics).
-            *   `query_webpage`: To ask questions about a specific webpage URL.
-            *   `query_youtube_video`: To ask questions about a specific YouTube video.
-            *   `run_python_code`: For complex calculations or data manipulation.
-    *   **Example Thought Process & Code:**
-        *   **User Question:** "What is the discography of Mercedes Sosa according to Wikipedia?"
-        *   **Your Plan:** This is a specific factual query best suited for Wikipedia. I must delegate this to the `multi_tool_worker`.
-        *   **Your Code:**
-            ```python
-            discography = multi_tool_worker.run("Search Wikipedia for 'Mercedes Sosa discography'")
-            print(discography)
-            ```
-3.  **FORMULATE THE FINAL ANSWER (Precision & Format):**
-    *   Once you have definitively found the answer, format it *exactly* as requested in the original question.
-    *   **Your final output must be ONLY the answer itself.** Do not include any extra text, explanations, conversational filler, or prefixes like "FINAL ANSWER:".
-    *   Example: If the question asks for a number and the answer is "123", your final output must be `123`.
-"""
     )
-    logging.info("Multi-task agent initialized successfully.")
     return manager
-# --- Main Execution Block for Local Testing ---
 def main():
-    """Main function for local testing of the agent."""
     configure_logging()
     try:
         agent = initialize_agent()
         if agent:
-            # Example prompts for each category
-            prompts = {
-                "Web Search": "Who is the current CEO of OpenAI?",
-                "YouTube": "What is the main topic of the video https://www.youtube.com/watch?v=bZQun8Y4L2A regarding AI models?",
-                "Coding": "Write a Python script that calculates and prints the factorial of 5.",
-            }
-            for category, prompt in prompts.items():
-                logging.info(f"\n--- Testing Category: {category} ---")
-                logging.info(f"Prompt: {prompt}")
-                response = agent.run(prompt)
-                logging.info(f"Agent's Final Answer: {response}")
-                logging.info("-" * (30 + len(category)))
     except Exception as e:
-        logging.critical(f"An unhandled error occurred during local testing: {e}", exc_info=True)
 if __name__ == "__main__":
-    # This allows you to test the agent's logic by running `python agent.py` locally.
     main()

 import urllib.parse as urlparse
 import io
 import contextlib
+import re
 from functools import lru_cache, wraps
+from typing import Optional, Dict, Any
 from dotenv import load_dotenv
 from requests.exceptions import RequestException
 import serpapi
 from llama_index.core import VectorStoreIndex, download_loader
 from llama_index.core.schema import Document
+from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
 from smolagents import (CodeAgent, InferenceClientModel, ToolCallingAgent,
                         WebSearchTool, WikipediaTool, tool)
         raise ValueError("One or more API keys are missing. Please check your .env file.")
     return keys
+# --- Custom Exceptions ---
+class SerpApiClientException(Exception):
+    pass
+class YouTubeTranscriptApiError(Exception):
+    pass
 # --- Decorators ---
 def retry(max_retries=3, initial_delay=1, backoff=2):
         @wraps(func)
         def wrapper(*args, **kwargs):
             delay = initial_delay
+            retryable_exceptions = (RequestException, SerpApiClientException, YouTubeTranscriptApiError, TranscriptsDisabled, NoTranscriptFound)
             for attempt in range(1, max_retries + 1):
                 try:
                     return func(*args, **kwargs)
         return wrapper
     return decorator
+# --- Helper Functions ---
+def extract_video_id(url_or_id: str) -> Optional[str]:
+    """Extract YouTube video ID from various URL formats."""
+    if not url_or_id:
+        return None
+    # If it's already just an ID (11 characters, alphanumeric + underscore/dash)
+    if re.match(r'^[a-zA-Z0-9_-]{11}$', url_or_id):
+        return url_or_id
+    # Extract from various YouTube URL formats
+    patterns = [
+        r'(?:youtube\.com/watch\?v=|youtu\.be/|youtube\.com/embed/)([a-zA-Z0-9_-]{11})',
+        r'youtube\.com/.*[?&]v=([a-zA-Z0-9_-]{11})',
+    ]
+    for pattern in patterns:
+        match = re.search(pattern, url_or_id)
+        if match:
+            return match.group(1)
+    return None
+def clean_search_results(results: Dict[str, Any]) -> str:
+    """Clean and format search results from SerpAPI."""
+    if not results:
+        return "No results found."
+    formatted_results = []
+    # Handle organic results
+    if organic_results := results.get('organic_results', []):
+        formatted_results.append("### Web Results")
+        for i, res in enumerate(organic_results[:5], 1):
+            title = res.get('title', 'N/A')
+            snippet = res.get('snippet', 'No description available.')
+            link = res.get('link', '#')
+            formatted_results.append(f"{i}. **{title}**\n   {snippet}\n   Source: {link}")
+    # Handle knowledge graph
+    if knowledge_graph := results.get('knowledge_graph'):
+        formatted_results.append("\n### Knowledge Graph")
+        if title := knowledge_graph.get('title'):
+            formatted_results.append(f"**{title}**")
+        if description := knowledge_graph.get('description'):
+            formatted_results.append(f"{description}")
+    # Handle answer box
+    if answer_box := results.get('answer_box'):
+        formatted_results.append("\n### Direct Answer")
+        if answer := answer_box.get('answer'):
+            formatted_results.append(f"{answer}")
+        elif snippet := answer_box.get('snippet'):
+            formatted_results.append(f"{snippet}")
+    return "\n\n".join(formatted_results) if formatted_results else "No relevant results found."
+# --- Main Agent Initialization ---
 def initialize_agent():
     """
+    Initializes a multi-disciplinary agent optimized for GAIA benchmark questions.
     """
+    configure_logging()
     api_keys = load_api_keys()
     # --- Caching Layer for LlamaIndex ---
     @lru_cache(maxsize=32)
     @retry()
     def get_webpage_index(url: str) -> VectorStoreIndex:
+        """Create a searchable index from a webpage."""
         logging.info(f"Indexing webpage: {url}")
+        try:
+            loader_cls = download_loader("BeautifulSoupWebReader")
+            loader = loader_cls()
+            docs = loader.load_data(urls=[url])
+            if not docs:
+                raise ValueError(f"No content could be extracted from {url}")
+            return VectorStoreIndex.from_documents(docs)
+        except Exception as e:
+            logging.error(f"Failed to index webpage {url}: {e}")
+            raise
     @lru_cache(maxsize=32)
     @retry()
     def get_youtube_index(video_id: str) -> VectorStoreIndex:
+        """Create a searchable index from a YouTube video transcript."""
         logging.info(f"Indexing YouTube video: {video_id}")
+        try:
+            # Try to get transcript in English first, then any available language
+            try:
+                transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
+            except (TranscriptsDisabled, NoTranscriptFound):
+                # Try to get any available transcript
+                transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
+                transcript = transcript_list.find_transcript(['en']).fetch()
+            if not transcript:
+                raise YouTubeTranscriptApiError(f"No transcript available for video {video_id}")
+            text = ' '.join([entry['text'] for entry in transcript])
+            if not text.strip():
+                raise YouTubeTranscriptApiError(f"Empty transcript for video {video_id}")
+            doc = Document(text=text, doc_id=f"youtube_{video_id}")
+            return VectorStoreIndex.from_documents([doc])
+        except Exception as e:
+            logging.error(f"Failed to index YouTube video {video_id}: {e}")
+            raise YouTubeTranscriptApiError(f"Could not process YouTube video {video_id}: {e}")
+    # --- Enhanced Tool Definitions ---
     @tool
     @retry()
     def google_search(query: str) -> str:
+        """
+        Perform a comprehensive Google search with enhanced result formatting.
+        Use for general knowledge questions, current events, or when you need factual information.
         Args:
+            query (str): The search query
         """
+        try:
+            client = serpapi.Client(api_key=api_keys['serpapi'])
+            results = client.search(q=query, engine="google", num=10)
+            return clean_search_results(results)
+        except Exception as e:
+            logging.error(f"Google search failed for query '{query}': {e}")
+            return f"Search failed: {e}"
     @tool
     def query_webpage(url: str, query: str) -> str:
+        """
+        Extract specific information from a webpage by asking a targeted question.
+        Best for when you have a specific URL and need detailed information from it.
         Args:
+            url (str): The complete URL of the webpage
+            query (str): Specific question about the webpage content
         """
         try:
+            if not url.startswith(('http://', 'https://')):
+                url = 'https://' + url
             index = get_webpage_index(url)
+            query_engine = index.as_query_engine(
+                similarity_top_k=5,
+                response_mode="tree_summarize"
+            )
+            response = query_engine.query(query)
+            return str(response)
         except Exception as e:
+            error_msg = f"Error querying webpage {url}: {e}"
+            logging.error(error_msg)
+            return error_msg
     @tool
     def query_youtube_video(video_url_or_id: str, query: str) -> str:
+        """
+        Extract information from YouTube video transcripts by asking specific questions.
+        Handles various YouTube URL formats and video IDs.
         Args:
+            video_url_or_id (str): YouTube URL or video ID
+            query (str): Specific question about the video content
         """
         try:
+            video_id = extract_video_id(video_url_or_id)
             if not video_id:
+                return f"Error: Could not extract valid YouTube video ID from '{video_url_or_id}'"
             index = get_youtube_index(video_id)
+            query_engine = index.as_query_engine(
+                similarity_top_k=5,
+                response_mode="tree_summarize"
+            )
+            response = query_engine.query(query)
+            return str(response)
         except YouTubeTranscriptApiError as e:
+            return f"YouTube transcript error for {video_url_or_id}: {e}"
         except Exception as e:
+            error_msg = f"Error querying YouTube video {video_url_or_id}: {e}"
+            logging.error(error_msg)
+            return error_msg
     @tool
     def run_python_code(code: str) -> str:
         """
+        Execute Python code in a safe environment and return the output.
+        Perfect for calculations, data processing, and algorithmic problems.
+        Available modules: math, datetime, json, re, collections, itertools, numpy, pandas
         Args:
+            code (str): Python code to execute
         """
+        # Create a safe execution environment with useful modules
+        safe_globals = {
+            '__builtins__': {
+                'print': print, 'len': len, 'range': range, 'enumerate': enumerate,
+                'zip': zip, 'map': map, 'filter': filter, 'sum': sum, 'max': max, 'min': min,
+                'abs': abs, 'round': round, 'sorted': sorted, 'reversed': reversed,
+                'int': int, 'float': float, 'str': str, 'bool': bool, 'list': list,
+                'dict': dict, 'set': set, 'tuple': tuple, 'type': type, 'isinstance': isinstance,
+            }
+        }
+        # Add safe imports
+        try:
+            import math
+            import datetime
+            import json
+            import re
+            import collections
+            import itertools
+            safe_globals.update({
+                'math': math, 'datetime': datetime, 'json': json, 're': re,
+                'collections': collections, 'itertools': itertools
+            })
+            # Try to import numpy and pandas if available
+            try:
+                import numpy as np
+                safe_globals['np'] = np
+                safe_globals['numpy'] = np
+            except ImportError:
+                pass
+            try:
+                import pandas as pd
+                safe_globals['pd'] = pd
+                safe_globals['pandas'] = pd
+            except ImportError:
+                pass
+        except ImportError as e:
+            logging.warning(f"Some modules not available for code execution: {e}")
         output = io.StringIO()
         try:
             with contextlib.redirect_stdout(output):
+                exec(code, safe_globals)
+            result = output.getvalue()
+            return result if result else "Code executed successfully (no output)"
         except Exception as e:
+            return f"Code execution error: {e}"
+    @tool
+    def advanced_search(query: str, search_type: str = "general") -> str:
+        """
+        Perform specialized searches for different types of information.
+        Args:
+            query (str): Search query
+            search_type (str): Type of search - "academic", "news", "images", "general"
+        """
+        try:
+            client = serpapi.Client(api_key=api_keys['serpapi'])
+            search_params = {"q": query, "num": 8}
+            if search_type == "academic":
+                results = client.search(engine="google_scholar", **search_params)
+            elif search_type == "news":
+                search_params["tbm"] = "nws"
+                results = client.search(engine="google", **search_params)
+            else:  # general
+                results = client.search(engine="google", **search_params)
+            return clean_search_results(results)
+        except Exception as e:
+            return f"Advanced search failed: {e}"
+    # --- Model and Agent Setup ---
     try:
         model = InferenceClientModel(
         logging.error(f"Failed to load model: {e}")
         raise
+    # Specialized worker agent with comprehensive toolset
     worker_agent = ToolCallingAgent(
         tools=[
             google_search,
+            advanced_search,
             query_webpage,
             query_youtube_video,
             run_python_code,
             WikipediaTool(),
         ],
         model=model,
+        max_steps=6,  # Allow more steps for complex tasks
+        name="gaia_specialist",
+        description="Expert agent for GAIA benchmark tasks: web research, document analysis, video processing, and code execution."
     )
+    # Strategic manager agent
     manager = CodeAgent(
         model=model,
         managed_agents=[worker_agent],
         tools=[WebSearchTool()],
+        additional_authorized_imports=[
+            "time", "numpy", "pandas", "requests", "serpapi", "llama_index",
+            "beautifulsoup4", "markdownify", "lxml", "json", "urllib.parse",
+            "youtube_transcript_api", "together", "math", "datetime", "re",
+            "collections", "itertools"
+        ],
+        instructions="""You are an expert AI system designed to excel at the GAIA benchmark. Your mission is to provide precise, accurate answers to complex questions spanning multiple domains.
+**STRATEGIC APPROACH:**
+1. **QUESTION ANALYSIS:**
+   - Parse the question carefully to identify: required output format, key constraints, domain (science, history, current events, etc.)
+   - Determine the information sources needed: web search, specific websites, videos, calculations, or combinations
+2. **EXECUTION STRATEGY:**
+   **Direct Web Search (for simple lookups):**
+   ```python
+   results = WebSearchTool(query="your search query")
+   print(results)
+   ```
+   **Delegate to Specialist Agent (for complex tasks):**
+   ```python
+   answer = gaia_specialist.run("Detailed task description with specific requirements")
+   print(answer)
+   ```
+   The specialist can:
+   - `google_search`: Comprehensive web searches with rich formatting
+   - `advanced_search`: Academic papers, news, specialized searches
+   - `query_webpage`: Deep analysis of specific URLs
+   - `query_youtube_video`: Extract information from video transcripts
+   - `run_python_code`: Mathematical calculations, data processing, algorithms
+   - `wikipedia_search`: Encyclopedic information
+3. **ANSWER FORMATTING:**
+   - Provide ONLY the final answer in the exact format requested
+   - No explanations, prefixes, or extra text unless specifically asked
+   - For numerical answers: provide just the number
+   - For yes/no questions: provide just "Yes" or "No"
+   - For lists: follow the specified format exactly
+**EXAMPLES:**
+Question: "What is 15! (15 factorial)?"
+Strategy: Mathematical calculation → delegate to specialist
+```python
+result = gaia_specialist.run("Calculate 15 factorial using Python")
+print(result)
+```
+Question: "What is the capital of the country where Mount Everest is located?"
+Strategy: Multi-step reasoning → delegate to specialist
+```python
+answer = gaia_specialist.run("Find the country where Mount Everest is located, then identify its capital city")
+print(answer)
+```
+Remember: Your final output must be ONLY the answer itself, formatted exactly as requested."""
     )
+    logging.info("Enhanced GAIA agent initialized successfully.")
     return manager
+# --- Testing and Main Execution ---
 def main():
+    """Test the agent with sample GAIA-style questions."""
     configure_logging()
     try:
         agent = initialize_agent()
         if agent:
+            # Sample questions covering different GAIA categories
+            test_questions = [
+                "What is the square root of 144?",
+                "In what year was the Python programming language first released?",
+                "What is the chemical formula for caffeine?",
+                "How many days are there between January 1, 2024 and March 15, 2024?",
+            ]
+            for i, question in enumerate(test_questions, 1):
+                logging.info(f"\n{'='*50}")
+                logging.info(f"Test Question {i}: {question}")
+                logging.info(f"{'='*50}")
+                try:
+                    response = agent.run(question)
+                    logging.info(f"Agent Answer: {response}")
+                except Exception as e:
+                    logging.error(f"Error processing question {i}: {e}")
+                # Small delay between questions
+                time.sleep(1)
     except Exception as e:
+        logging.critical(f"Critical error during testing: {e}", exc_info=True)
 if __name__ == "__main__":
     main()