File size: 4,045 Bytes
d9162ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e3c2163
 
 
 
b4f9ff5
d9162ac
 
 
e3c2163
d9162ac
e3c2163
d9162ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
"""Serper web search tool using Serper API for Google searches."""

import structlog
from tenacity import retry, stop_after_attempt, wait_exponential

from src.tools.query_utils import preprocess_query
from src.tools.rate_limiter import get_serper_limiter
from src.tools.vendored.serper_client import SerperClient
from src.tools.vendored.web_search_core import scrape_urls
from src.utils.config import settings
from src.utils.exceptions import ConfigurationError, RateLimitError, SearchError
from src.utils.models import Citation, Evidence

logger = structlog.get_logger()


class SerperWebSearchTool:
    """Tool for searching the web using Serper API (Google search)."""

    def __init__(self, api_key: str | None = None) -> None:
        """Initialize Serper web search tool.

        Args:
            api_key: Serper API key. If None, reads from settings.

        Raises:
            ConfigurationError: If no API key is available.
        """
        self.api_key = api_key or settings.serper_api_key
        if not self.api_key:
            raise ConfigurationError(
                "Serper API key required. Set SERPER_API_KEY environment variable or serper_api_key in settings."
            )

        self._client = SerperClient(api_key=self.api_key)
        self._limiter = get_serper_limiter(self.api_key)

    @property
    def name(self) -> str:
        """Return the name of this search tool."""
        return "serper"

    async def _rate_limit(self) -> None:
        """Enforce Serper API rate limiting."""
        await self._limiter.acquire()

    @retry(
        stop=stop_after_attempt(3),
        wait=wait_exponential(multiplier=1, min=1, max=10),
        reraise=True,
    )
    async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
        """Execute a web search using Serper API.

        Args:
            query: The search query string
            max_results: Maximum number of results to return

        Returns:
            List of Evidence objects

        Raises:
            SearchError: If the search fails
            RateLimitError: If rate limit is exceeded
        """
        await self._rate_limit()

        # Preprocess query to remove noise
        clean_query = preprocess_query(query)
        final_query = clean_query if clean_query else query

        try:
            # Get search results (snippets)
            search_results = await self._client.search(
                final_query, filter_for_relevance=False, max_results=max_results
            )

            if not search_results:
                logger.info("No search results found", query=final_query)
                return []

            # Scrape URLs to get full content
            scraped = await scrape_urls(search_results)

            # Convert ScrapeResult to Evidence objects
            evidence = []
            for result in scraped:
                # Truncate title to max 500 characters to match Citation model validation
                title = result.title
                if len(title) > 500:
                    title = title[:497] + "..."

                ev = Evidence(
                    content=result.text,
                    citation=Citation(
                        title=title,
                        url=result.url,
                        source="web",  # Use "web" to match SourceName literal, not "serper"
                        date="Unknown",
                        authors=[],
                    ),
                    relevance=0.0,
                )
                evidence.append(ev)

            logger.info(
                "Serper search complete",
                query=final_query,
                results_found=len(evidence),
            )

            return evidence

        except RateLimitError:
            raise
        except SearchError:
            raise
        except Exception as e:
            logger.error("Unexpected error in Serper search", error=str(e), query=final_query)
            raise SearchError(f"Serper search failed: {e}") from e