Spaces:

DataQuests
/

DeepCritical

Running

App Files Files Community

VibecoderMcSwaggins commited on 13 days ago

Commit

b25ffdb

2 Parent(s): 24a5878 506a9c0

Merge pull request #10 from The-Obstacle-Is-The-Way/feat/phase6-embeddings

Browse files

Files changed (17) hide show

Dockerfile +15 -4
docs/architecture/overview.md +1 -1
docs/implementation/08_phase_report.md +4 -4
docs/implementation/roadmap.md +1 -1
pyproject.toml +4 -0
src/agents/search_agent.py +67 -14
src/app.py +15 -8
src/orchestrator.py +2 -2
src/orchestrator_magentic.py +44 -4
src/services/__init__.py +1 -0
src/services/embeddings.py +166 -0
src/tools/pubmed.py +1 -1
src/utils/config.py +2 -2
src/utils/models.py +1 -0
tests/unit/agents/test_search_agent.py +44 -1
tests/unit/services/test_embeddings.py +146 -0
uv.lock +0 -0

Dockerfile CHANGED Viewed

@@ -4,9 +4,10 @@ FROM python:3.11-slim
 # Set working directory
 WORKDIR /app
-# Install system dependencies
 RUN apt-get update && apt-get install -y \
     git \
     && rm -rf /var/lib/apt/lists/*
 # Install uv
@@ -18,12 +19,22 @@ COPY uv.lock .
 COPY src/ src/
 COPY README.md .
-# Install dependencies
-RUN uv sync --frozen --no-dev
-# Create non-root user
 RUN useradd --create-home --shell /bin/bash appuser
 USER appuser
 # Expose port
 EXPOSE 7860

 # Set working directory
 WORKDIR /app
+# Install system dependencies (curl needed for HEALTHCHECK)
 RUN apt-get update && apt-get install -y \
     git \
+    curl \
     && rm -rf /var/lib/apt/lists/*
 # Install uv
 COPY src/ src/
 COPY README.md .
+# Install runtime dependencies only (no dev/test tools)
+RUN uv sync --frozen --no-dev --extra embeddings --extra magentic
+# Create non-root user BEFORE downloading models
 RUN useradd --create-home --shell /bin/bash appuser
+# Set cache directory for HuggingFace models (must be writable by appuser)
+ENV HF_HOME=/app/.cache
+ENV TRANSFORMERS_CACHE=/app/.cache
+# Create cache dir with correct ownership
+RUN mkdir -p /app/.cache && chown -R appuser:appuser /app/.cache
+# Pre-download the embedding model during build (as appuser to set correct ownership)
 USER appuser
+RUN uv run python -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('all-MiniLM-L6-v2')"
 # Expose port
 EXPOSE 7860

docs/architecture/overview.md CHANGED Viewed

@@ -65,7 +65,7 @@ Using existing approved drugs to treat NEW diseases they weren't originally desi
 ### High-Level Design (Phases 1-8)
-```
 User Query
     ↓
 Gradio UI (Phase 4)

 ### High-Level Design (Phases 1-8)
+```text
 User Query
     ↓
 Gradio UI (Phase 4)

docs/implementation/08_phase_report.md CHANGED Viewed

@@ -11,7 +11,7 @@
 Current limitation: **Synthesis is basic markdown, not a scientific report.**
 Current output:
-```
 ## Drug Repurposing Analysis
 ### Drug Candidates
 - Metformin
@@ -22,7 +22,7 @@ Current output:
 ```
 With Report Agent:
-```
 ## Executive Summary
 One-paragraph summary for busy readers...
@@ -59,7 +59,7 @@ Properly formatted citations...
 ## 2. Architecture
 ### Phase 8 Addition
-```
 Evidence + Hypotheses + Assessment
             ↓
       Report Agent
@@ -68,7 +68,7 @@ Evidence + Hypotheses + Assessment
 ```
 ### Report Generation Flow
-```
 1. JudgeAgent says "synthesize"
 2. Magentic Manager selects ReportAgent
 3. ReportAgent gathers:

 Current limitation: **Synthesis is basic markdown, not a scientific report.**
 Current output:
+```markdown
 ## Drug Repurposing Analysis
 ### Drug Candidates
 - Metformin
 ```
 With Report Agent:
+```markdown
 ## Executive Summary
 One-paragraph summary for busy readers...
 ## 2. Architecture
 ### Phase 8 Addition
+```text
 Evidence + Hypotheses + Assessment
             ↓
       Report Agent
 ```
 ### Report Generation Flow
+```text
 1. JudgeAgent says "synthesize"
 2. Magentic Manager selects ReportAgent
 3. ReportAgent gathers:

docs/implementation/roadmap.md CHANGED Viewed

@@ -165,7 +165,7 @@ tests/
 ## Complete Architecture (Phases 1-8)
-```
 User Query
     ↓
 Gradio UI (Phase 4)

 ## Complete Architecture (Phases 1-8)
+```text
 User Query
     ↓
 Gradio UI (Phase 4)

pyproject.toml CHANGED Viewed

@@ -49,6 +49,10 @@ dev = [
 magentic = [
     "agent-framework-core",
 ]
 [build-system]
 requires = ["hatchling"]

 magentic = [
     "agent-framework-core",
 ]
+embeddings = [
+    "chromadb>=0.4.0",
+    "sentence-transformers>=2.2.0",
+]
 [build-system]
 requires = ["hatchling"]

src/agents/search_agent.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from collections.abc import AsyncIterable
-from typing import Any
 from agent_framework import (
     AgentRunResponse,
@@ -11,7 +11,10 @@ from agent_framework import (
 )
 from src.orchestrator import SearchHandlerProtocol
-from src.utils.models import Evidence, SearchResult
 class SearchAgent(BaseAgent):  # type: ignore[misc]
@@ -21,6 +24,7 @@ class SearchAgent(BaseAgent):  # type: ignore[misc]
         self,
         search_handler: SearchHandlerProtocol,
         evidence_store: dict[str, list[Evidence]],
     ) -> None:
         super().__init__(
             name="SearchAgent",
@@ -28,6 +32,7 @@ class SearchAgent(BaseAgent):  # type: ignore[misc]
         )
         self._handler = search_handler
         self._evidence_store = evidence_store
     async def run(
         self,
@@ -61,31 +66,79 @@ class SearchAgent(BaseAgent):  # type: ignore[misc]
         # Execute search
         result: SearchResult = await self._handler.execute(query, max_results_per_tool=10)
         # Update shared evidence store
-        # We append new evidence, deduplicating by URL is handled in Orchestrator usually,
-        # but here we should probably add to the list.
-        # For simplicity in this MVP phase, we just extend the list.
-        # Ideally, we should dedupe.
-        existing_urls = {e.citation.url for e in self._evidence_store["current"]}
-        new_unique = [e for e in result.evidence if e.citation.url not in existing_urls]
-        self._evidence_store["current"].extend(new_unique)
-        # Format response
         evidence_text = "\n".join(
             [
                 f"- [{e.citation.title}]({e.citation.url}): {e.content[:200]}..."
-                for e in result.evidence[:5]
             ]
         )
         response_text = (
-            f"Found {result.total_found} sources ({len(new_unique)} new):\n\n{evidence_text}"
         )
         return AgentRunResponse(
             messages=[ChatMessage(role=Role.ASSISTANT, text=response_text)],
             response_id=f"search-{result.total_found}",
-            additional_properties={"evidence": [e.model_dump() for e in result.evidence]},
         )
     async def run_stream(

 from collections.abc import AsyncIterable
+from typing import TYPE_CHECKING, Any
 from agent_framework import (
     AgentRunResponse,
 )
 from src.orchestrator import SearchHandlerProtocol
+from src.utils.models import Citation, Evidence, SearchResult
+if TYPE_CHECKING:
+    from src.services.embeddings import EmbeddingService
 class SearchAgent(BaseAgent):  # type: ignore[misc]
         self,
         search_handler: SearchHandlerProtocol,
         evidence_store: dict[str, list[Evidence]],
+        embedding_service: "EmbeddingService | None" = None,
     ) -> None:
         super().__init__(
             name="SearchAgent",
         )
         self._handler = search_handler
         self._evidence_store = evidence_store
+        self._embeddings = embedding_service
     async def run(
         self,
         # Execute search
         result: SearchResult = await self._handler.execute(query, max_results_per_tool=10)
+        # Track what to show in response (initialized to search results as default)
+        evidence_to_show: list[Evidence] = result.evidence
+        total_new = 0
         # Update shared evidence store
+        if self._embeddings:
+            # Deduplicate by semantic similarity (async-safe)
+            unique_evidence = await self._embeddings.deduplicate(result.evidence)
+            # Also search for semantically related evidence (async-safe)
+            related = await self._embeddings.search_similar(query, n_results=5)
+            # Merge related evidence not already in results
+            existing_urls = {e.citation.url for e in unique_evidence}
+            # Reconstruct Evidence objects from stored vector DB data
+            related_evidence: list[Evidence] = []
+            for item in related:
+                if item["id"] not in existing_urls:
+                    meta = item.get("metadata", {})
+                    # Parse authors (stored as comma-separated string)
+                    authors_str = meta.get("authors", "")
+                    authors = [a.strip() for a in authors_str.split(",") if a.strip()]
+                    ev = Evidence(
+                        content=item["content"],
+                        citation=Citation(
+                            title=meta.get("title", "Related Evidence"),
+                            url=item["id"],
+                            source=meta.get("source", "vector_db"),
+                            date=meta.get("date", "n.d."),
+                            authors=authors,
+                        ),
+                        # Convert distance to relevance (lower distance = higher relevance)
+                        relevance=max(0.0, 1.0 - item.get("distance", 0.5)),
+                    )
+                    related_evidence.append(ev)
+            # Combine unique from search + related from vector DB
+            final_new_evidence = unique_evidence + related_evidence
+            # Add to global store (deduping against global store)
+            global_urls = {e.citation.url for e in self._evidence_store["current"]}
+            really_new = [e for e in final_new_evidence if e.citation.url not in global_urls]
+            self._evidence_store["current"].extend(really_new)
+            total_new = len(really_new)
+            evidence_to_show = unique_evidence + related_evidence
+        else:
+            # Fallback to URL-based deduplication (no embeddings)
+            existing_urls = {e.citation.url for e in self._evidence_store["current"]}
+            new_unique = [e for e in result.evidence if e.citation.url not in existing_urls]
+            self._evidence_store["current"].extend(new_unique)
+            total_new = len(new_unique)
+            evidence_to_show = result.evidence
         evidence_text = "\n".join(
             [
                 f"- [{e.citation.title}]({e.citation.url}): {e.content[:200]}..."
+                for e in evidence_to_show[:5]
             ]
         )
         response_text = (
+            f"Found {result.total_found} sources ({total_new} new added to context):\n\n"
+            f"{evidence_text}"
         )
         return AgentRunResponse(
             messages=[ChatMessage(role=Role.ASSISTANT, text=response_text)],
             response_id=f"search-{result.total_found}",
+            additional_properties={"evidence": [e.model_dump() for e in evidence_to_show]},
         )
     async def run_stream(

src/app.py CHANGED Viewed

@@ -72,23 +72,30 @@ async def research_agent(
         yield "Please enter a research question."
         return
-    # Create orchestrator (use mock if no API key)
-    use_mock = not (os.getenv("OPENAI_API_KEY") or os.getenv("ANTHROPIC_API_KEY"))
-    # If magentic mode requested but no keys, fallback/warn
     if mode == "magentic" and use_mock:
         yield (
-            "⚠️ **Warning**: Magentic mode requires valid API keys. "
             "Falling back to Mock Simple mode."
         )
         mode = "simple"
-    orchestrator = configure_orchestrator(use_mock=use_mock, mode=mode)
     # Run the agent and stream events
-    response_parts = []
     try:
         async for event in orchestrator.run(message):
             # Format event as markdown
             event_md = event.to_markdown()
@@ -144,7 +151,7 @@ def create_demo() -> Any:
                     choices=["simple", "magentic"],
                     value="simple",
                     label="Orchestrator Mode",
-                    info="Simple: Linear loop | Magentic: Multi-Agent (Requires OpenAI Key)",
                 )
             ],
         )

         yield "Please enter a research question."
         return
+    # Decide whether to use real LLMs or mock based on mode and available keys
+    has_openai = bool(os.getenv("OPENAI_API_KEY"))
+    has_anthropic = bool(os.getenv("ANTHROPIC_API_KEY"))
+    if mode == "magentic":
+        # Magentic currently supports OpenAI only
+        use_mock = not has_openai
+    else:
+        # Simple mode can work with either provider
+        use_mock = not (has_openai or has_anthropic)
+    # If magentic mode requested but no OpenAI key, fallback/warn
     if mode == "magentic" and use_mock:
         yield (
+            "⚠️ **Warning**: Magentic mode requires OpenAI API key. "
             "Falling back to Mock Simple mode."
         )
         mode = "simple"
     # Run the agent and stream events
+    response_parts: list[str] = []
     try:
+        orchestrator = configure_orchestrator(use_mock=use_mock, mode=mode)
         async for event in orchestrator.run(message):
             # Format event as markdown
             event_md = event.to_markdown()
                     choices=["simple", "magentic"],
                     value="simple",
                     label="Orchestrator Mode",
+                    info="Simple: Linear (OpenAI/Anthropic) | Magentic: Multi-Agent (OpenAI)",
                 )
             ],
         )

src/orchestrator.py CHANGED Viewed

@@ -263,7 +263,7 @@ class Orchestrator:
         citations = "\n".join(
             [
-                f"{i+1}. [{e.citation.title}]({e.citation.url}) "
                 f"({e.citation.source.upper()}, {e.citation.date})"
                 for i, e in enumerate(evidence[:10])  # Limit to 10 citations
             ]
@@ -312,7 +312,7 @@ class Orchestrator:
         """
         citations = "\n".join(
             [
-                f"{i+1}. [{e.citation.title}]({e.citation.url}) ({e.citation.source.upper()})"
                 for i, e in enumerate(evidence[:10])
             ]
         )

         citations = "\n".join(
             [
+                f"{i + 1}. [{e.citation.title}]({e.citation.url}) "
                 f"({e.citation.source.upper()}, {e.citation.date})"
                 for i, e in enumerate(evidence[:10])  # Limit to 10 citations
             ]
         """
         citations = "\n".join(
             [
+                f"{i + 1}. [{e.citation.title}]({e.citation.url}) ({e.citation.source.upper()})"
                 for i, e in enumerate(evidence[:10])
             ]
         )

src/orchestrator_magentic.py CHANGED Viewed

@@ -1,4 +1,9 @@
-"""Magentic-based orchestrator for DeepCritical."""
 from collections.abc import AsyncGenerator
@@ -17,6 +22,7 @@ from src.agents.judge_agent import JudgeAgent
 from src.agents.search_agent import SearchAgent
 from src.orchestrator import JudgeHandlerProtocol, SearchHandlerProtocol
 from src.utils.config import settings
 from src.utils.models import AgentEvent, Evidence
 logger = structlog.get_logger()
@@ -27,6 +33,11 @@ class MagenticOrchestrator:
     Magentic-based orchestrator - same API as Orchestrator.
     Uses Microsoft Agent Framework's MagenticBuilder for multi-agent coordination.
     """
     def __init__(
@@ -54,12 +65,32 @@ class MagenticOrchestrator:
             iteration=0,
         )
         # Create agent wrappers
-        search_agent = SearchAgent(self._search_handler, self._evidence_store)
         judge_agent = JudgeAgent(self._judge_handler, self._evidence_store)
         # Build Magentic workflow
-        # Note: MagenticBuilder.participants takes named arguments for agent instances
         workflow = (
             MagenticBuilder()
             .participants(
@@ -78,8 +109,17 @@ class MagenticOrchestrator:
         )
         # Task instruction for the manager
-        task = f"""Research drug repurposing opportunities for: {query}
 Instructions:
 1. Use SearcherAgent to find evidence. SEND ONLY A SIMPLE KEYWORD QUERY (e.g. "metformin aging")
    as the instruction. Complex queries fail.

+"""Magentic-based orchestrator for DeepCritical.
+NOTE: Magentic mode currently requires OpenAI API keys. The MagenticBuilder's
+standard manager uses OpenAIChatClient. Anthropic support may be added when
+the agent_framework provides an AnthropicChatClient.
+"""
 from collections.abc import AsyncGenerator
 from src.agents.search_agent import SearchAgent
 from src.orchestrator import JudgeHandlerProtocol, SearchHandlerProtocol
 from src.utils.config import settings
+from src.utils.exceptions import ConfigurationError
 from src.utils.models import AgentEvent, Evidence
 logger = structlog.get_logger()
     Magentic-based orchestrator - same API as Orchestrator.
     Uses Microsoft Agent Framework's MagenticBuilder for multi-agent coordination.
+    Note:
+        Magentic mode requires OPENAI_API_KEY. The MagenticBuilder's standard
+        manager currently only supports OpenAI. If you have only an Anthropic
+        key, use the "simple" orchestrator mode instead.
     """
     def __init__(
             iteration=0,
         )
+        # Initialize embedding service (optional)
+        embedding_service = None
+        try:
+            from src.services.embeddings import get_embedding_service
+            embedding_service = get_embedding_service()
+            logger.info("Embedding service enabled")
+        except ImportError:
+            logger.info("Embedding service not available (dependencies missing)")
+        except Exception as e:
+            logger.warning("Failed to initialize embedding service", error=str(e))
         # Create agent wrappers
+        search_agent = SearchAgent(
+            self._search_handler, self._evidence_store, embedding_service=embedding_service
+        )
         judge_agent = JudgeAgent(self._judge_handler, self._evidence_store)
         # Build Magentic workflow
+        # Note: MagenticBuilder requires OpenAI - validate key exists
+        if not settings.openai_api_key:
+            raise ConfigurationError(
+                "Magentic mode requires OPENAI_API_KEY. "
+                "Set the key or use mode='simple' with Anthropic."
+            )
         workflow = (
             MagenticBuilder()
             .participants(
         )
         # Task instruction for the manager
+        semantic_note = ""
+        if embedding_service:
+            semantic_note = """
+The system has semantic search enabled. When evidence is found:
+1. Related concepts will be automatically surfaced
+2. Duplicates are removed by meaning, not just URL
+3. Use the surfaced related concepts to refine searches
+"""
+        task = f"""Research drug repurposing opportunities for: {query}
+{semantic_note}
 Instructions:
 1. Use SearcherAgent to find evidence. SEND ONLY A SIMPLE KEYWORD QUERY (e.g. "metformin aging")
    as the instruction. Complex queries fail.

src/services/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Services for DeepCritical."""

src/services/embeddings.py ADDED Viewed

	@@ -0,0 +1,166 @@

+"""Embedding service for semantic search.
+IMPORTANT: All public methods are async to avoid blocking the event loop.
+The sentence-transformers model is CPU-bound, so we use run_in_executor().
+"""
+import asyncio
+from typing import Any
+import chromadb
+import structlog
+from sentence_transformers import SentenceTransformer
+from src.utils.models import Evidence
+class EmbeddingService:
+    """Handles text embedding and vector storage.
+    All embedding operations run in a thread pool to avoid blocking
+    the async event loop. See src/tools/websearch.py for the pattern.
+    """
+    def __init__(self, model_name: str = "all-MiniLM-L6-v2"):
+        self._model = SentenceTransformer(model_name)
+        self._client = chromadb.Client()  # In-memory for hackathon
+        self._collection = self._client.create_collection(
+            name="evidence", metadata={"hnsw:space": "cosine"}
+        )
+    # ─────────────────────────────────────────────────────────────────
+    # Sync internal methods (run in thread pool)
+    # ─────────────────────────────────────────────────────────────────
+    def _sync_embed(self, text: str) -> list[float]:
+        """Synchronous embedding - DO NOT call directly from async code."""
+        result: list[float] = self._model.encode(text).tolist()
+        return result
+    def _sync_batch_embed(self, texts: list[str]) -> list[list[float]]:
+        """Batch embedding for efficiency - DO NOT call directly from async code."""
+        embeddings = self._model.encode(texts)
+        return [e.tolist() for e in embeddings]
+    # ─────────────────────────────────────────────────────────────────
+    # Async public methods (safe for event loop)
+    # ─────────────────────────────────────────────────────────────────
+    async def embed(self, text: str) -> list[float]:
+        """Embed a single text (async-safe).
+        Uses run_in_executor to avoid blocking the event loop.
+        """
+        loop = asyncio.get_running_loop()
+        return await loop.run_in_executor(None, self._sync_embed, text)
+    async def embed_batch(self, texts: list[str]) -> list[list[float]]:
+        """Batch embed multiple texts (async-safe, more efficient)."""
+        loop = asyncio.get_running_loop()
+        return await loop.run_in_executor(None, self._sync_batch_embed, texts)
+    async def add_evidence(self, evidence_id: str, content: str, metadata: dict[str, Any]) -> None:
+        """Add evidence to vector store (async-safe)."""
+        embedding = await self.embed(content)
+        # ChromaDB operations are fast, but wrap for consistency
+        loop = asyncio.get_running_loop()
+        await loop.run_in_executor(
+            None,
+            lambda: self._collection.add(
+                ids=[evidence_id],
+                embeddings=[embedding],  # type: ignore[arg-type]
+                metadatas=[metadata],
+                documents=[content],
+            ),
+        )
+    async def search_similar(self, query: str, n_results: int = 5) -> list[dict[str, Any]]:
+        """Find semantically similar evidence (async-safe)."""
+        query_embedding = await self.embed(query)
+        loop = asyncio.get_running_loop()
+        results = await loop.run_in_executor(
+            None,
+            lambda: self._collection.query(
+                query_embeddings=[query_embedding],  # type: ignore[arg-type]
+                n_results=n_results,
+            ),
+        )
+        # Handle empty results gracefully
+        ids = results.get("ids")
+        docs = results.get("documents")
+        metas = results.get("metadatas")
+        dists = results.get("distances")
+        if not ids or not ids[0] or not docs or not metas or not dists:
+            return []
+        return [
+            {"id": id, "content": doc, "metadata": meta, "distance": dist}
+            for id, doc, meta, dist in zip(
+                ids[0],
+                docs[0],
+                metas[0],
+                dists[0],
+                strict=False,
+            )
+        ]
+    async def deduplicate(
+        self, new_evidence: list[Evidence], threshold: float = 0.9
+    ) -> list[Evidence]:
+        """Remove semantically duplicate evidence (async-safe).
+        Args:
+            new_evidence: List of evidence items to deduplicate
+            threshold: Similarity threshold (0.9 = 90% similar is duplicate).
+                      ChromaDB cosine distance: 0=identical, 2=opposite.
+                      We consider duplicate if distance < (1 - threshold).
+        Returns:
+            List of unique evidence items (not already in vector store).
+        """
+        unique = []
+        for evidence in new_evidence:
+            try:
+                similar = await self.search_similar(evidence.content, n_results=1)
+                # ChromaDB cosine distance: 0 = identical, 2 = opposite
+                # threshold=0.9 means distance < 0.1 is considered duplicate
+                is_duplicate = similar and similar[0]["distance"] < (1 - threshold)
+                if not is_duplicate:
+                    unique.append(evidence)
+                    # Store FULL citation metadata for reconstruction later
+                    await self.add_evidence(
+                        evidence_id=evidence.citation.url,
+                        content=evidence.content,
+                        metadata={
+                            "source": evidence.citation.source,
+                            "title": evidence.citation.title,
+                            "date": evidence.citation.date,
+                            "authors": ",".join(evidence.citation.authors or []),
+                        },
+                    )
+            except Exception as e:
+                # Log but don't fail entire deduplication for one bad item
+                structlog.get_logger().warning(
+                    "Failed to process evidence in deduplicate",
+                    url=evidence.citation.url,
+                    error=str(e),
+                )
+                # Still add to unique list - better to have duplicates than lose data
+                unique.append(evidence)
+        return unique
+_embedding_service: EmbeddingService | None = None
+def get_embedding_service() -> EmbeddingService:
+    """Get singleton instance of EmbeddingService."""
+    global _embedding_service  # noqa: PLW0603
+    if _embedding_service is None:
+        _embedding_service = EmbeddingService()
+    return _embedding_service

src/tools/pubmed.py CHANGED Viewed

@@ -22,7 +22,7 @@ class PubMedTool:
     def __init__(self, api_key: str | None = None) -> None:
         self.api_key = api_key or settings.ncbi_api_key
         # Ignore placeholder values from .env.example
-        if self.api_key and "your-ncbi-key-here" in self.api_key:
             self.api_key = None
         self._last_request_time = 0.0

     def __init__(self, api_key: str | None = None) -> None:
         self.api_key = api_key or settings.ncbi_api_key
         # Ignore placeholder values from .env.example
+        if self.api_key == "your-ncbi-key-here":
             self.api_key = None
         self._last_request_time = 0.0

src/utils/config.py CHANGED Viewed

@@ -26,8 +26,8 @@ class Settings(BaseSettings):
     llm_provider: Literal["openai", "anthropic"] = Field(
         default="openai", description="Which LLM provider to use"
     )
-    openai_model: str = Field(default="gpt-5.1", description="OpenAI model name")
-    anthropic_model: str = Field(default="claude-sonnet-4-5", description="Anthropic model")
     # PubMed Configuration
     ncbi_api_key: str | None = Field(

     llm_provider: Literal["openai", "anthropic"] = Field(
         default="openai", description="Which LLM provider to use"
     )
+    openai_model: str = Field(default="gpt-4o", description="OpenAI model name")
+    anthropic_model: str = Field(default="claude-sonnet-4-20250514", description="Anthropic model")
     # PubMed Configuration
     ncbi_api_key: str | None = Field(

src/utils/models.py CHANGED Viewed

@@ -125,6 +125,7 @@ class AgentEvent(BaseModel):
             "synthesizing": "📝",
             "complete": "🎉",
             "error": "❌",
         }
         icon = icons.get(self.type, "•")
         return f"{icon} **{self.type.upper()}**: {self.message}"

             "synthesizing": "📝",
             "complete": "🎉",
             "error": "❌",
+            "streaming": "📡",
         }
         icon = icons.get(self.type, "•")
         return f"{icon} **{self.type.upper()}**: {self.message}"

tests/unit/agents/test_search_agent.py CHANGED Viewed

@@ -81,5 +81,48 @@ async def test_run_handles_list_input(mock_handler: AsyncMock) -> None:
         ChatMessage(role=Role.USER, text="test query"),
     ]
     await agent.run(messages)
     mock_handler.execute.assert_awaited_once_with("test query", max_results_per_tool=10)

         ChatMessage(role=Role.USER, text="test query"),
     ]
     await agent.run(messages)
     mock_handler.execute.assert_awaited_once_with("test query", max_results_per_tool=10)
+@pytest.mark.asyncio
+async def test_run_uses_embeddings(mock_handler: AsyncMock) -> None:
+    """Test that run uses embedding service if provided."""
+    store: dict = {"current": []}
+    # Mock embedding service
+    mock_embeddings = AsyncMock()
+    # Mock deduplicate to return the evidence as is (or filtered)
+    mock_embeddings.deduplicate.return_value = [
+        Evidence(
+            content="unique content",
+            citation=Citation(source="pubmed", url="u1", title="t1", date="2024"),
+        )
+    ]
+    # Mock search_similar to return related items
+    mock_embeddings.search_similar.return_value = [
+        {
+            "id": "u2",
+            "content": "related content",
+            "metadata": {"source": "web", "title": "related", "date": "2024"},
+            "distance": 0.1,
+        }
+    ]
+    agent = SearchAgent(mock_handler, store, embedding_service=mock_embeddings)
+    await agent.run("test query")
+    # Verify deduplicate called
+    mock_embeddings.deduplicate.assert_awaited_once()
+    # Verify semantic search called
+    mock_embeddings.search_similar.assert_awaited_once_with("test query", n_results=5)
+    # Verify store contains related evidence (if logic implemented to add it)
+    # Note: logic for adding related evidence needs to be implemented in SearchAgent
+    # The spec says: "Merge related evidence not already in results"
+    # Check if u1 (deduplicated result) is in store
+    assert any(e.citation.url == "u1" for e in store["current"])
+    # Check if u2 (related result) is in store
+    assert any(e.citation.url == "u2" for e in store["current"])

tests/unit/services/test_embeddings.py ADDED Viewed

	@@ -0,0 +1,146 @@

+"""Unit tests for EmbeddingService."""
+from unittest.mock import patch
+import numpy as np
+import pytest
+# Skip if embeddings dependencies are not installed
+pytest.importorskip("chromadb")
+pytest.importorskip("sentence_transformers")
+from src.services.embeddings import EmbeddingService
+class TestEmbeddingService:
+    @pytest.fixture
+    def mock_sentence_transformer(self):
+        with patch("src.services.embeddings.SentenceTransformer") as mock_st_class:
+            mock_model = mock_st_class.return_value
+            # Mock encode to return a numpy array
+            mock_model.encode.return_value = np.array([0.1, 0.2, 0.3])
+            yield mock_model
+    @pytest.fixture
+    def mock_chroma_client(self):
+        with patch("src.services.embeddings.chromadb.Client") as mock_client_class:
+            mock_client = mock_client_class.return_value
+            mock_collection = mock_client.create_collection.return_value
+            # Mock query return structure
+            mock_collection.query.return_value = {
+                "ids": [["id1"]],
+                "documents": [["doc1"]],
+                "metadatas": [[{"source": "pubmed"}]],
+                "distances": [[0.1]],
+            }
+            yield mock_client
+    @pytest.mark.asyncio
+    async def test_embed_returns_vector(self, mock_sentence_transformer, mock_chroma_client):
+        """Embedding should return a float vector (async check)."""
+        service = EmbeddingService()
+        embedding = await service.embed("metformin diabetes")
+        assert isinstance(embedding, list)
+        assert len(embedding) == 3  # noqa: PLR2004
+        assert all(isinstance(x, float) for x in embedding)
+        # Ensure it ran in executor (mock encode called)
+        mock_sentence_transformer.encode.assert_called_once()
+    @pytest.mark.asyncio
+    async def test_batch_embed_efficient(self, mock_sentence_transformer, mock_chroma_client):
+        """Batch embedding should call encode with list."""
+        # Setup mock for batch return (list of arrays)
+        mock_sentence_transformer.encode.return_value = np.array([[0.1, 0.2], [0.3, 0.4]])
+        service = EmbeddingService()
+        texts = ["text one", "text two"]
+        batch_results = await service.embed_batch(texts)
+        assert len(batch_results) == 2  # noqa: PLR2004
+        assert isinstance(batch_results[0], list)
+        mock_sentence_transformer.encode.assert_called_with(texts)
+    @pytest.mark.asyncio
+    async def test_add_and_search(self, mock_sentence_transformer, mock_chroma_client):
+        """Should be able to add evidence and search for similar."""
+        service = EmbeddingService()
+        await service.add_evidence(
+            evidence_id="test1",
+            content="Metformin activates AMPK pathway",
+            metadata={"source": "pubmed"},
+        )
+        # Verify add was called
+        mock_collection = mock_chroma_client.create_collection.return_value
+        mock_collection.add.assert_called_once()
+        results = await service.search_similar("AMPK activation drugs", n_results=1)
+        # Verify query was called
+        mock_collection.query.assert_called_once()
+        assert len(results) == 1
+        assert results[0]["id"] == "id1"
+    @pytest.mark.asyncio
+    async def test_search_similar_empty_collection(
+        self, mock_sentence_transformer, mock_chroma_client
+    ):
+        """Search on empty collection should return empty list, not error."""
+        mock_collection = mock_chroma_client.create_collection.return_value
+        mock_collection.query.return_value = {
+            "ids": [[]],
+            "documents": [[]],
+            "metadatas": [[]],
+            "distances": [[]],
+        }
+        service = EmbeddingService()
+        results = await service.search_similar("anything", n_results=5)
+        assert results == []
+    @pytest.mark.asyncio
+    async def test_deduplicate(self, mock_sentence_transformer, mock_chroma_client):
+        """Deduplicate should remove similar items."""
+        from src.utils.models import Citation, Evidence
+        service = EmbeddingService()
+        # Mock search to return a match for the first item (duplicate)
+        # and no match for the second (unique)
+        mock_collection = mock_chroma_client.create_collection.return_value
+        # First call returns match (distance 0.05 < threshold)
+        # Second call returns no match or high distance
+        mock_collection.query.side_effect = [
+            {
+                "ids": [["existing_id"]],
+                "documents": [["doc"]],
+                "metadatas": [[{}]],
+                "distances": [[0.05]],  # Very similar
+            },
+            {
+                "ids": [[]],  # No match
+                "documents": [[]],
+                "metadatas": [[]],
+                "distances": [[]],
+            },
+        ]
+        evidence = [
+            Evidence(
+                content="Duplicate content",
+                citation=Citation(source="web", url="u1", title="t1", date="2024"),
+            ),
+            Evidence(
+                content="Unique content",
+                citation=Citation(source="web", url="u2", title="t2", date="2024"),
+            ),
+        ]
+        unique = await service.deduplicate(evidence, threshold=0.9)
+        # Only the unique one should remain
+        assert len(unique) == 1
+        assert unique[0].citation.url == "u2"

uv.lock CHANGED Viewed

The diff for this file is too large to render. See raw diff