Spaces:

DataQuests
/

DeepCritical

Running

App Files Files Community

VibecoderMcSwaggins commited on 11 days ago

Commit

b455f8a

unverified ·

2 Parent(s): 1674285 f5747b1

Merge pull request #29 from The-Obstacle-Is-The-Way/claude/debug-gradio-mock-data-01MDfoUPcbfZ7FLootfhe8zs

Browse files

Files changed (4) hide show

src/agent_factory/judges.py +85 -16
src/app.py +95 -11
tests/unit/agent_factory/test_judges.py +4 -1
tests/unit/tools/test_clinicaltrials.py +13 -0

src/agent_factory/judges.py CHANGED Viewed

@@ -148,9 +148,10 @@ class JudgeHandler:
 class MockJudgeHandler:
     """
-    Mock JudgeHandler for testing without LLM calls.
-    Use this in unit tests to avoid API calls.
     """
     def __init__(self, mock_response: JudgeAssessment | None = None) -> None:
@@ -158,19 +159,64 @@ class MockJudgeHandler:
         Initialize with optional mock response.
         Args:
-            mock_response: The assessment to return. If None, uses default.
         """
         self.mock_response = mock_response
         self.call_count = 0
         self.last_question: str | None = None
         self.last_evidence: list[Evidence] | None = None
     async def assess(
         self,
         question: str,
         evidence: list[Evidence],
     ) -> JudgeAssessment:
-        """Return the mock response."""
         self.call_count += 1
         self.last_question = question
         self.last_evidence = evidence
@@ -179,19 +225,42 @@ class MockJudgeHandler:
             return self.mock_response
         min_evidence = 3
-        # Default mock response
         return JudgeAssessment(
             details=AssessmentDetails(
-                mechanism_score=7,
-                mechanism_reasoning="Mock assessment - good mechanism evidence",
-                clinical_evidence_score=6,
-                clinical_reasoning="Mock assessment - moderate clinical evidence",
-                drug_candidates=["Drug A", "Drug B"],
-                key_findings=["Finding 1", "Finding 2"],
             ),
-            sufficient=len(evidence) >= min_evidence,
-            confidence=0.75,
-            recommendation="synthesize" if len(evidence) >= min_evidence else "continue",
-            next_search_queries=["query 1", "query 2"] if len(evidence) < min_evidence else [],
-            reasoning="Mock assessment for testing purposes",
         )

 class MockJudgeHandler:
     """
+    Mock JudgeHandler for demo mode without LLM calls.
+    Extracts meaningful information from real search results
+    to provide a useful demo experience without requiring API keys.
     """
     def __init__(self, mock_response: JudgeAssessment | None = None) -> None:
         Initialize with optional mock response.
         Args:
+            mock_response: The assessment to return. If None, extracts from evidence.
         """
         self.mock_response = mock_response
         self.call_count = 0
         self.last_question: str | None = None
         self.last_evidence: list[Evidence] | None = None
+    def _extract_key_findings(self, evidence: list[Evidence], max_findings: int = 5) -> list[str]:
+        """Extract key findings from evidence titles."""
+        findings = []
+        for e in evidence[:max_findings]:
+            # Use first 150 chars of title as a finding
+            title = e.citation.title
+            if len(title) > 150:
+                title = title[:147] + "..."
+            findings.append(title)
+        return findings if findings else ["No specific findings extracted (demo mode)"]
+    def _extract_drug_candidates(self, question: str, evidence: list[Evidence]) -> list[str]:
+        """Extract potential drug names from question and evidence."""
+        # Common drug-related keywords to look for
+        candidates = set()
+        # Extract from question (simple heuristic)
+        question_words = question.lower().split()
+        for word in question_words:
+            # Skip common words, keep potential drug names
+            if len(word) > 3 and word not in {
+                "what", "which", "could", "drugs", "drug", "medications",
+                "medicine", "treat", "treatment", "help", "best", "effective",
+                "repurposed", "repurposing", "disease", "condition", "therapy",
+            }:
+                # Capitalize as potential drug name
+                candidates.add(word.capitalize())
+        # Extract from evidence titles (look for capitalized terms)
+        for e in evidence[:10]:
+            words = e.citation.title.split()
+            for word in words:
+                # Look for capitalized words that might be drug names
+                cleaned = word.strip(".,;:()[]")
+                if (
+                    len(cleaned) > 3
+                    and cleaned[0].isupper()
+                    and cleaned.lower() not in {"the", "and", "for", "with", "from"}
+                ):
+                    candidates.add(cleaned)
+        # Return top candidates or placeholder
+        candidate_list = list(candidates)[:5]
+        return candidate_list if candidate_list else ["See evidence below for potential candidates"]
     async def assess(
         self,
         question: str,
         evidence: list[Evidence],
     ) -> JudgeAssessment:
+        """Return assessment based on actual evidence (demo mode)."""
         self.call_count += 1
         self.last_question = question
         self.last_evidence = evidence
             return self.mock_response
         min_evidence = 3
+        evidence_count = len(evidence)
+        # Extract meaningful data from actual evidence
+        drug_candidates = self._extract_drug_candidates(question, evidence)
+        key_findings = self._extract_key_findings(evidence)
+        # Calculate scores based on evidence quantity
+        mechanism_score = min(10, evidence_count * 2) if evidence_count > 0 else 0
+        clinical_score = min(10, evidence_count) if evidence_count > 0 else 0
         return JudgeAssessment(
             details=AssessmentDetails(
+                mechanism_score=mechanism_score,
+                mechanism_reasoning=(
+                    f"Demo mode: Found {evidence_count} sources. "
+                    "Configure LLM API key for detailed mechanism analysis."
+                ),
+                clinical_evidence_score=clinical_score,
+                clinical_reasoning=(
+                    f"Demo mode: {evidence_count} sources retrieved from PubMed, "
+                    "ClinicalTrials.gov, and bioRxiv. Full analysis requires LLM API key."
+                ),
+                drug_candidates=drug_candidates,
+                key_findings=key_findings,
+            ),
+            sufficient=evidence_count >= min_evidence,
+            confidence=min(0.5, evidence_count * 0.1) if evidence_count > 0 else 0.0,
+            recommendation="synthesize" if evidence_count >= min_evidence else "continue",
+            next_search_queries=(
+                [f"{question} mechanism", f"{question} clinical trials"]
+                if evidence_count < min_evidence
+                else []
+            ),
+            reasoning=(
+                f"Demo mode assessment based on {evidence_count} real search results. "
+                "For AI-powered analysis with drug candidate identification and "
+                "evidence synthesis, configure OPENAI_API_KEY or ANTHROPIC_API_KEY."
             ),
         )

src/app.py CHANGED Viewed

@@ -5,6 +5,10 @@ from collections.abc import AsyncGenerator
 from typing import Any
 import gradio as gr
 from src.agent_factory.judges import JudgeHandler, MockJudgeHandler
 from src.mcp_tools import (
@@ -19,16 +23,24 @@ from src.tools.biorxiv import BioRxivTool
 from src.tools.clinicaltrials import ClinicalTrialsTool
 from src.tools.pubmed import PubMedTool
 from src.tools.search_handler import SearchHandler
 from src.utils.models import OrchestratorConfig
-def configure_orchestrator(use_mock: bool = False, mode: str = "simple") -> Any:
     """
     Create an orchestrator instance.
     Args:
         use_mock: If True, use MockJudgeHandler (no API key needed)
         mode: Orchestrator mode ("simple" or "magentic")
     Returns:
         Configured Orchestrator instance
@@ -50,7 +62,16 @@ def configure_orchestrator(use_mock: bool = False, mode: str = "simple") -> Any:
     if use_mock:
         judge_handler = MockJudgeHandler()
     else:
-        judge_handler = JudgeHandler()
     return create_orchestrator(
         search_handler=search_handler,
@@ -64,6 +85,8 @@ async def research_agent(
     message: str,
     history: list[dict[str, Any]],
     mode: str = "simple",
 ) -> AsyncGenerator[str, None]:
     """
     Gradio chat function that runs the research agent.
@@ -72,6 +95,8 @@ async def research_agent(
         message: User's research question
         history: Chat history (Gradio format)
         mode: Orchestrator mode ("simple" or "magentic")
     Yields:
         Markdown-formatted responses for streaming
@@ -80,30 +105,57 @@ async def research_agent(
         yield "Please enter a research question."
         return
     # Decide whether to use real LLMs or mock based on mode and available keys
     has_openai = bool(os.getenv("OPENAI_API_KEY"))
     has_anthropic = bool(os.getenv("ANTHROPIC_API_KEY"))
     if mode == "magentic":
         # Magentic currently supports OpenAI only
-        use_mock = not has_openai
     else:
         # Simple mode can work with either provider
-        use_mock = not (has_openai or has_anthropic)
     # If magentic mode requested but no OpenAI key, fallback/warn
     if mode == "magentic" and use_mock:
         yield (
             "⚠️ **Warning**: Magentic mode requires OpenAI API key. "
-            "Falling back to Mock Simple mode."
         )
         mode = "simple"
     # Run the agent and stream events
     response_parts: list[str] = []
     try:
-        orchestrator = configure_orchestrator(use_mock=use_mock, mode=mode)
         async for event in orchestrator.run(message):
             # Format event as markdown
             event_md = event.to_markdown()
@@ -148,10 +200,30 @@ def create_demo() -> Any:
             fn=research_agent,
             title="",
             examples=[
-                ["What drugs could be repurposed for Alzheimer's disease?", "simple"],
-                ["Is metformin effective for treating cancer?", "simple"],
-                ["What medications show promise for Long COVID treatment?", "simple"],
-                ["Can statins be repurposed for neurological conditions?", "simple"],
             ],
             additional_inputs=[
                 gr.Radio(
@@ -159,7 +231,19 @@ def create_demo() -> Any:
                     value="simple",
                     label="Orchestrator Mode",
                     info="Simple: Linear (OpenAI/Anthropic) | Magentic: Multi-Agent (OpenAI)",
-                )
             ],
         )

 from typing import Any
 import gradio as gr
+from pydantic_ai.models.anthropic import AnthropicModel
+from pydantic_ai.models.openai import OpenAIModel
+from pydantic_ai.providers.anthropic import AnthropicProvider
+from pydantic_ai.providers.openai import OpenAIProvider
 from src.agent_factory.judges import JudgeHandler, MockJudgeHandler
 from src.mcp_tools import (
 from src.tools.clinicaltrials import ClinicalTrialsTool
 from src.tools.pubmed import PubMedTool
 from src.tools.search_handler import SearchHandler
+from src.utils.config import settings
 from src.utils.models import OrchestratorConfig
+def configure_orchestrator(
+    use_mock: bool = False,
+    mode: str = "simple",
+    user_api_key: str | None = None,
+    api_provider: str = "openai",
+) -> Any:
     """
     Create an orchestrator instance.
     Args:
         use_mock: If True, use MockJudgeHandler (no API key needed)
         mode: Orchestrator mode ("simple" or "magentic")
+        user_api_key: Optional user-provided API key (BYOK)
+        api_provider: API provider ("openai" or "anthropic")
     Returns:
         Configured Orchestrator instance
     if use_mock:
         judge_handler = MockJudgeHandler()
     else:
+        # Create model with user's API key if provided
+        model: AnthropicModel | OpenAIModel | None = None
+        if user_api_key:
+            if api_provider == "anthropic":
+                anthropic_provider = AnthropicProvider(api_key=user_api_key)
+                model = AnthropicModel(settings.anthropic_model, provider=anthropic_provider)
+            else:
+                openai_provider = OpenAIProvider(api_key=user_api_key)
+                model = OpenAIModel(settings.openai_model, provider=openai_provider)
+        judge_handler = JudgeHandler(model=model)
     return create_orchestrator(
         search_handler=search_handler,
     message: str,
     history: list[dict[str, Any]],
     mode: str = "simple",
+    api_key: str = "",
+    api_provider: str = "openai",
 ) -> AsyncGenerator[str, None]:
     """
     Gradio chat function that runs the research agent.
         message: User's research question
         history: Chat history (Gradio format)
         mode: Orchestrator mode ("simple" or "magentic")
+        api_key: Optional user-provided API key (BYOK - Bring Your Own Key)
+        api_provider: API provider ("openai" or "anthropic")
     Yields:
         Markdown-formatted responses for streaming
         yield "Please enter a research question."
         return
+    # Clean user-provided API key
+    user_api_key = api_key.strip() if api_key else None
     # Decide whether to use real LLMs or mock based on mode and available keys
     has_openai = bool(os.getenv("OPENAI_API_KEY"))
     has_anthropic = bool(os.getenv("ANTHROPIC_API_KEY"))
+    has_user_key = bool(user_api_key)
     if mode == "magentic":
         # Magentic currently supports OpenAI only
+        use_mock = not (has_openai or (has_user_key and api_provider == "openai"))
     else:
         # Simple mode can work with either provider
+        use_mock = not (has_openai or has_anthropic or has_user_key)
     # If magentic mode requested but no OpenAI key, fallback/warn
     if mode == "magentic" and use_mock:
         yield (
             "⚠️ **Warning**: Magentic mode requires OpenAI API key. "
+            "Falling back to demo mode.\n\n"
         )
         mode = "simple"
+    # Inform user about their key being used
+    if has_user_key and not use_mock:
+        yield (
+            f"🔑 **Using your {api_provider.upper()} API key** - "
+            "Your key is used only for this session and is never stored.\n\n"
+        )
+    # Warn users when running in demo mode (no LLM keys)
+    if use_mock:
+        yield (
+            "🔬 **Demo Mode**: Running with real biomedical searches but without "
+            "LLM-powered analysis.\n\n"
+            "**To unlock full AI analysis:**\n"
+            "- Enter your OpenAI or Anthropic API key below, OR\n"
+            "- Configure secrets in HuggingFace Space settings\n\n"
+            "---\n\n"
+        )
     # Run the agent and stream events
     response_parts: list[str] = []
     try:
+        orchestrator = configure_orchestrator(
+            use_mock=use_mock,
+            mode=mode,
+            user_api_key=user_api_key,
+            api_provider=api_provider,
+        )
         async for event in orchestrator.run(message):
             # Format event as markdown
             event_md = event.to_markdown()
             fn=research_agent,
             title="",
             examples=[
+                [
+                    "What drugs could be repurposed for Alzheimer's disease?",
+                    "simple",
+                    "",
+                    "openai",
+                ],
+                [
+                    "Is metformin effective for treating cancer?",
+                    "simple",
+                    "",
+                    "openai",
+                ],
+                [
+                    "What medications show promise for Long COVID treatment?",
+                    "simple",
+                    "",
+                    "openai",
+                ],
+                [
+                    "Can statins be repurposed for neurological conditions?",
+                    "simple",
+                    "",
+                    "openai",
+                ],
             ],
             additional_inputs=[
                 gr.Radio(
                     value="simple",
                     label="Orchestrator Mode",
                     info="Simple: Linear (OpenAI/Anthropic) | Magentic: Multi-Agent (OpenAI)",
+                ),
+                gr.Textbox(
+                    label="🔑 API Key (Optional - Bring Your Own Key)",
+                    placeholder="sk-... or sk-ant-...",
+                    type="password",
+                    info="Enter your own API key for full AI analysis. Never stored.",
+                ),
+                gr.Radio(
+                    choices=["openai", "anthropic"],
+                    value="openai",
+                    label="API Provider",
+                    info="Select the provider for your API key",
+                ),
             ],
         )

tests/unit/agent_factory/test_judges.py CHANGED Viewed

@@ -164,8 +164,9 @@ class TestMockJudgeHandler:
         result = await handler.assess("test", evidence)
-        expected_mech_score = 7
         expected_evidence_len = 2
         assert handler.call_count == 1
         assert handler.last_question == "test"
@@ -174,6 +175,8 @@ class TestMockJudgeHandler:
         assert result.details.mechanism_score == expected_mech_score
         assert result.sufficient is False
         assert result.recommendation == "continue"
     @pytest.mark.asyncio
     async def test_mock_handler_custom_response(self):

         result = await handler.assess("test", evidence)
         expected_evidence_len = 2
+        # New dynamic scoring: mechanism_score = min(10, evidence_count * 2)
+        expected_mech_score = min(10, expected_evidence_len * 2)  # = 4
         assert handler.call_count == 1
         assert handler.last_question == "test"
         assert result.details.mechanism_score == expected_mech_score
         assert result.sufficient is False
         assert result.recommendation == "continue"
+        # Verify demo mode messaging
+        assert "Demo mode" in result.reasoning
     @pytest.mark.asyncio
     async def test_mock_handler_custom_response(self):

tests/unit/tools/test_clinicaltrials.py CHANGED Viewed

@@ -123,11 +123,24 @@ class TestClinicalTrialsTool:
                 await tool.search("metformin alzheimer")
 class TestClinicalTrialsIntegration:
     """Integration tests (marked for separate run)."""
     @pytest.mark.integration
     @pytest.mark.asyncio
     async def test_real_api_call(self) -> None:
         """Test actual API call (requires network)."""
         tool = ClinicalTrialsTool()

                 await tool.search("metformin alzheimer")
+def _can_reach_clinicaltrials() -> bool:
+    """Check if ClinicalTrials.gov API is reachable."""
+    try:
+        resp = requests.get("https://clinicaltrials.gov/api/v2/studies", timeout=5)
+        return resp.status_code < 500
+    except (requests.RequestException, OSError):
+        return False
 class TestClinicalTrialsIntegration:
     """Integration tests (marked for separate run)."""
     @pytest.mark.integration
     @pytest.mark.asyncio
+    @pytest.mark.skipif(
+        not _can_reach_clinicaltrials(),
+        reason="ClinicalTrials.gov API not reachable (network/SSL issue)",
+    )
     async def test_real_api_call(self) -> None:
         """Test actual API call (requires network)."""
         tool = ClinicalTrialsTool()