DeepCritical / tests /integration /test_rag_integration.py
Joseph Pollack
adds the initial iterative and deep research workflows
731a241 unverified
raw
history blame
11.8 kB
"""Integration tests for RAG integration.
These tests require OPENAI_API_KEY and may make real API calls.
Marked with @pytest.mark.integration to skip in unit test runs.
"""
import pytest
from src.services.llamaindex_rag import get_rag_service
from src.tools.rag_tool import create_rag_tool
from src.tools.search_handler import SearchHandler
from src.tools.tool_executor import execute_agent_task
from src.utils.config import settings
from src.utils.models import AgentTask, Citation, Evidence
@pytest.mark.integration
class TestRAGServiceIntegration:
"""Integration tests for LlamaIndexRAGService."""
@pytest.mark.asyncio
async def test_rag_service_ingest_and_retrieve(self):
"""RAG service should ingest and retrieve evidence."""
if not settings.openai_api_key:
pytest.skip("OPENAI_API_KEY required for RAG integration tests")
# Create RAG service
rag_service = get_rag_service(collection_name="test_integration")
# Create sample evidence
evidence_list = [
Evidence(
content="Metformin is a first-line treatment for type 2 diabetes. It works by reducing glucose production in the liver and improving insulin sensitivity.",
citation=Citation(
source="pubmed",
title="Metformin Mechanism of Action",
url="https://pubmed.ncbi.nlm.nih.gov/12345678/",
date="2024-01-15",
authors=["Smith J", "Johnson M"],
),
relevance=0.9,
),
Evidence(
content="Recent studies suggest metformin may have neuroprotective effects in Alzheimer's disease models.",
citation=Citation(
source="pubmed",
title="Metformin and Neuroprotection",
url="https://pubmed.ncbi.nlm.nih.gov/12345679/",
date="2024-02-20",
authors=["Brown K", "Davis L"],
),
relevance=0.85,
),
]
# Ingest evidence
rag_service.ingest_evidence(evidence_list)
# Retrieve evidence
results = rag_service.retrieve("metformin diabetes", top_k=2)
# Assert
assert len(results) > 0
assert any("metformin" in r["text"].lower() for r in results)
assert all("text" in r for r in results)
assert all("metadata" in r for r in results)
# Cleanup
rag_service.clear_collection()
@pytest.mark.asyncio
async def test_rag_service_query(self):
"""RAG service should synthesize responses from ingested evidence."""
if not settings.openai_api_key:
pytest.skip("OPENAI_API_KEY required for RAG integration tests")
rag_service = get_rag_service(collection_name="test_query")
# Ingest evidence
evidence_list = [
Evidence(
content="Python is a high-level programming language known for its simplicity and readability.",
citation=Citation(
source="pubmed",
title="Python Programming",
url="https://example.com/python",
date="2024",
authors=["Author"],
),
)
]
rag_service.ingest_evidence(evidence_list)
# Query
response = rag_service.query("What is Python?", top_k=1)
assert isinstance(response, str)
assert len(response) > 0
assert "python" in response.lower()
# Cleanup
rag_service.clear_collection()
@pytest.mark.integration
class TestRAGToolIntegration:
"""Integration tests for RAGTool."""
@pytest.mark.asyncio
async def test_rag_tool_search(self):
"""RAGTool should search RAG service and return Evidence objects."""
if not settings.openai_api_key:
pytest.skip("OPENAI_API_KEY required for RAG integration tests")
# Create RAG service and ingest evidence
rag_service = get_rag_service(collection_name="test_rag_tool")
evidence_list = [
Evidence(
content="Machine learning is a subset of artificial intelligence.",
citation=Citation(
source="pubmed",
title="ML Basics",
url="https://example.com/ml",
date="2024",
authors=["ML Expert"],
),
)
]
rag_service.ingest_evidence(evidence_list)
# Create RAG tool
tool = create_rag_tool(rag_service=rag_service)
# Search
results = await tool.search("machine learning", max_results=5)
# Assert
assert len(results) > 0
assert all(isinstance(e, Evidence) for e in results)
assert results[0].citation.source == "rag"
assert (
"machine learning" in results[0].content.lower()
or "artificial intelligence" in results[0].content.lower()
)
# Cleanup
rag_service.clear_collection()
@pytest.mark.asyncio
async def test_rag_tool_empty_collection(self):
"""RAGTool should return empty list when collection is empty."""
if not settings.openai_api_key:
pytest.skip("OPENAI_API_KEY required for RAG integration tests")
rag_service = get_rag_service(collection_name="test_empty")
rag_service.clear_collection() # Ensure empty
tool = create_rag_tool(rag_service=rag_service)
results = await tool.search("any query")
assert results == []
@pytest.mark.integration
class TestRAGAgentIntegration:
"""Integration tests for RAGAgent in tool executor."""
@pytest.mark.asyncio
async def test_rag_agent_execution(self):
"""RAGAgent should execute and return ToolAgentOutput."""
if not settings.openai_api_key:
pytest.skip("OPENAI_API_KEY required for RAG integration tests")
# Setup: Ingest evidence into RAG
rag_service = get_rag_service(collection_name="test_rag_agent")
evidence_list = [
Evidence(
content="Deep learning uses neural networks with multiple layers.",
citation=Citation(
source="pubmed",
title="Deep Learning",
url="https://example.com/dl",
date="2024",
authors=["DL Researcher"],
),
)
]
rag_service.ingest_evidence(evidence_list)
# Execute RAGAgent task
task = AgentTask(
agent="RAGAgent",
query="deep learning",
gap="Need information about deep learning",
)
result = await execute_agent_task(task)
# Assert
assert result.output
assert "deep learning" in result.output.lower() or "neural network" in result.output.lower()
assert len(result.sources) > 0
# Cleanup
rag_service.clear_collection()
@pytest.mark.integration
class TestRAGSearchHandlerIntegration:
"""Integration tests for RAG in SearchHandler."""
@pytest.mark.asyncio
async def test_search_handler_with_rag(self):
"""SearchHandler should work with RAG tool included."""
if not settings.openai_api_key:
pytest.skip("OPENAI_API_KEY required for RAG integration tests")
# Setup: Create RAG service and ingest some evidence
rag_service = get_rag_service(collection_name="test_search_handler")
evidence_list = [
Evidence(
content="Test evidence for search handler integration.",
citation=Citation(
source="pubmed",
title="Test Evidence",
url="https://example.com/test",
date="2024",
authors=["Tester"],
),
)
]
rag_service.ingest_evidence(evidence_list)
# Create SearchHandler with RAG
handler = SearchHandler(
tools=[], # No other tools
include_rag=True,
auto_ingest_to_rag=False, # Don't auto-ingest (already has data)
)
# Execute search
result = await handler.execute("test evidence", max_results_per_tool=5)
# Assert
assert result.total_found > 0
assert "rag" in result.sources_searched
assert any(e.citation.source == "rag" for e in result.evidence)
# Cleanup
rag_service.clear_collection()
@pytest.mark.asyncio
async def test_search_handler_auto_ingest(self):
"""SearchHandler should auto-ingest evidence into RAG."""
if not settings.openai_api_key:
pytest.skip("OPENAI_API_KEY required for RAG integration tests")
# Create empty RAG service
rag_service = get_rag_service(collection_name="test_auto_ingest")
rag_service.clear_collection()
# Create mock tool that returns evidence
from unittest.mock import AsyncMock
mock_tool = AsyncMock()
mock_tool.name = "pubmed"
mock_tool.search = AsyncMock(
return_value=[
Evidence(
content="Evidence to be ingested",
citation=Citation(
source="pubmed",
title="Test",
url="https://example.com",
date="2024",
authors=[],
),
)
]
)
# Create handler with auto-ingest enabled
handler = SearchHandler(
tools=[mock_tool],
include_rag=False, # Don't include RAG as search tool
auto_ingest_to_rag=True,
)
handler._rag_service = rag_service # Inject RAG service
# Execute search
await handler.execute("test query")
# Verify evidence was ingested
rag_results = rag_service.retrieve("Evidence to be ingested", top_k=1)
assert len(rag_results) > 0
# Cleanup
rag_service.clear_collection()
@pytest.mark.integration
class TestRAGHybridSearchIntegration:
"""Integration tests for hybrid search (RAG + database)."""
@pytest.mark.asyncio
async def test_hybrid_search_rag_and_pubmed(self):
"""SearchHandler should support RAG + PubMed hybrid search."""
if not settings.openai_api_key:
pytest.skip("OPENAI_API_KEY required for RAG integration tests")
# Setup: Ingest evidence into RAG
rag_service = get_rag_service(collection_name="test_hybrid")
evidence_list = [
Evidence(
content="Previously collected evidence about metformin.",
citation=Citation(
source="pubmed",
title="Previous Research",
url="https://example.com/prev",
date="2024",
authors=[],
),
)
]
rag_service.ingest_evidence(evidence_list)
# Note: This test would require real PubMed API access
# For now, we'll just test that the handler can be created with both tools
from src.tools.pubmed import PubMedTool
handler = SearchHandler(
tools=[PubMedTool()],
include_rag=True,
auto_ingest_to_rag=True,
)
# Verify handler has both tools
tool_names = [t.name for t in handler.tools]
assert "pubmed" in tool_names
assert "rag" in tool_names
# Cleanup
rag_service.clear_collection()