VibecoderMcSwaggins commited on
Commit
b455f8a
Β·
unverified Β·
2 Parent(s): 1674285 f5747b1

Merge pull request #29 from The-Obstacle-Is-The-Way/claude/debug-gradio-mock-data-01MDfoUPcbfZ7FLootfhe8zs

Browse files
src/agent_factory/judges.py CHANGED
@@ -148,9 +148,10 @@ class JudgeHandler:
148
 
149
  class MockJudgeHandler:
150
  """
151
- Mock JudgeHandler for testing without LLM calls.
152
 
153
- Use this in unit tests to avoid API calls.
 
154
  """
155
 
156
  def __init__(self, mock_response: JudgeAssessment | None = None) -> None:
@@ -158,19 +159,64 @@ class MockJudgeHandler:
158
  Initialize with optional mock response.
159
 
160
  Args:
161
- mock_response: The assessment to return. If None, uses default.
162
  """
163
  self.mock_response = mock_response
164
  self.call_count = 0
165
  self.last_question: str | None = None
166
  self.last_evidence: list[Evidence] | None = None
167
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  async def assess(
169
  self,
170
  question: str,
171
  evidence: list[Evidence],
172
  ) -> JudgeAssessment:
173
- """Return the mock response."""
174
  self.call_count += 1
175
  self.last_question = question
176
  self.last_evidence = evidence
@@ -179,19 +225,42 @@ class MockJudgeHandler:
179
  return self.mock_response
180
 
181
  min_evidence = 3
182
- # Default mock response
 
 
 
 
 
 
 
 
 
183
  return JudgeAssessment(
184
  details=AssessmentDetails(
185
- mechanism_score=7,
186
- mechanism_reasoning="Mock assessment - good mechanism evidence",
187
- clinical_evidence_score=6,
188
- clinical_reasoning="Mock assessment - moderate clinical evidence",
189
- drug_candidates=["Drug A", "Drug B"],
190
- key_findings=["Finding 1", "Finding 2"],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
  ),
192
- sufficient=len(evidence) >= min_evidence,
193
- confidence=0.75,
194
- recommendation="synthesize" if len(evidence) >= min_evidence else "continue",
195
- next_search_queries=["query 1", "query 2"] if len(evidence) < min_evidence else [],
196
- reasoning="Mock assessment for testing purposes",
197
  )
 
148
 
149
  class MockJudgeHandler:
150
  """
151
+ Mock JudgeHandler for demo mode without LLM calls.
152
 
153
+ Extracts meaningful information from real search results
154
+ to provide a useful demo experience without requiring API keys.
155
  """
156
 
157
  def __init__(self, mock_response: JudgeAssessment | None = None) -> None:
 
159
  Initialize with optional mock response.
160
 
161
  Args:
162
+ mock_response: The assessment to return. If None, extracts from evidence.
163
  """
164
  self.mock_response = mock_response
165
  self.call_count = 0
166
  self.last_question: str | None = None
167
  self.last_evidence: list[Evidence] | None = None
168
 
169
+ def _extract_key_findings(self, evidence: list[Evidence], max_findings: int = 5) -> list[str]:
170
+ """Extract key findings from evidence titles."""
171
+ findings = []
172
+ for e in evidence[:max_findings]:
173
+ # Use first 150 chars of title as a finding
174
+ title = e.citation.title
175
+ if len(title) > 150:
176
+ title = title[:147] + "..."
177
+ findings.append(title)
178
+ return findings if findings else ["No specific findings extracted (demo mode)"]
179
+
180
+ def _extract_drug_candidates(self, question: str, evidence: list[Evidence]) -> list[str]:
181
+ """Extract potential drug names from question and evidence."""
182
+ # Common drug-related keywords to look for
183
+ candidates = set()
184
+
185
+ # Extract from question (simple heuristic)
186
+ question_words = question.lower().split()
187
+ for word in question_words:
188
+ # Skip common words, keep potential drug names
189
+ if len(word) > 3 and word not in {
190
+ "what", "which", "could", "drugs", "drug", "medications",
191
+ "medicine", "treat", "treatment", "help", "best", "effective",
192
+ "repurposed", "repurposing", "disease", "condition", "therapy",
193
+ }:
194
+ # Capitalize as potential drug name
195
+ candidates.add(word.capitalize())
196
+
197
+ # Extract from evidence titles (look for capitalized terms)
198
+ for e in evidence[:10]:
199
+ words = e.citation.title.split()
200
+ for word in words:
201
+ # Look for capitalized words that might be drug names
202
+ cleaned = word.strip(".,;:()[]")
203
+ if (
204
+ len(cleaned) > 3
205
+ and cleaned[0].isupper()
206
+ and cleaned.lower() not in {"the", "and", "for", "with", "from"}
207
+ ):
208
+ candidates.add(cleaned)
209
+
210
+ # Return top candidates or placeholder
211
+ candidate_list = list(candidates)[:5]
212
+ return candidate_list if candidate_list else ["See evidence below for potential candidates"]
213
+
214
  async def assess(
215
  self,
216
  question: str,
217
  evidence: list[Evidence],
218
  ) -> JudgeAssessment:
219
+ """Return assessment based on actual evidence (demo mode)."""
220
  self.call_count += 1
221
  self.last_question = question
222
  self.last_evidence = evidence
 
225
  return self.mock_response
226
 
227
  min_evidence = 3
228
+ evidence_count = len(evidence)
229
+
230
+ # Extract meaningful data from actual evidence
231
+ drug_candidates = self._extract_drug_candidates(question, evidence)
232
+ key_findings = self._extract_key_findings(evidence)
233
+
234
+ # Calculate scores based on evidence quantity
235
+ mechanism_score = min(10, evidence_count * 2) if evidence_count > 0 else 0
236
+ clinical_score = min(10, evidence_count) if evidence_count > 0 else 0
237
+
238
  return JudgeAssessment(
239
  details=AssessmentDetails(
240
+ mechanism_score=mechanism_score,
241
+ mechanism_reasoning=(
242
+ f"Demo mode: Found {evidence_count} sources. "
243
+ "Configure LLM API key for detailed mechanism analysis."
244
+ ),
245
+ clinical_evidence_score=clinical_score,
246
+ clinical_reasoning=(
247
+ f"Demo mode: {evidence_count} sources retrieved from PubMed, "
248
+ "ClinicalTrials.gov, and bioRxiv. Full analysis requires LLM API key."
249
+ ),
250
+ drug_candidates=drug_candidates,
251
+ key_findings=key_findings,
252
+ ),
253
+ sufficient=evidence_count >= min_evidence,
254
+ confidence=min(0.5, evidence_count * 0.1) if evidence_count > 0 else 0.0,
255
+ recommendation="synthesize" if evidence_count >= min_evidence else "continue",
256
+ next_search_queries=(
257
+ [f"{question} mechanism", f"{question} clinical trials"]
258
+ if evidence_count < min_evidence
259
+ else []
260
+ ),
261
+ reasoning=(
262
+ f"Demo mode assessment based on {evidence_count} real search results. "
263
+ "For AI-powered analysis with drug candidate identification and "
264
+ "evidence synthesis, configure OPENAI_API_KEY or ANTHROPIC_API_KEY."
265
  ),
 
 
 
 
 
266
  )
src/app.py CHANGED
@@ -5,6 +5,10 @@ from collections.abc import AsyncGenerator
5
  from typing import Any
6
 
7
  import gradio as gr
 
 
 
 
8
 
9
  from src.agent_factory.judges import JudgeHandler, MockJudgeHandler
10
  from src.mcp_tools import (
@@ -19,16 +23,24 @@ from src.tools.biorxiv import BioRxivTool
19
  from src.tools.clinicaltrials import ClinicalTrialsTool
20
  from src.tools.pubmed import PubMedTool
21
  from src.tools.search_handler import SearchHandler
 
22
  from src.utils.models import OrchestratorConfig
23
 
24
 
25
- def configure_orchestrator(use_mock: bool = False, mode: str = "simple") -> Any:
 
 
 
 
 
26
  """
27
  Create an orchestrator instance.
28
 
29
  Args:
30
  use_mock: If True, use MockJudgeHandler (no API key needed)
31
  mode: Orchestrator mode ("simple" or "magentic")
 
 
32
 
33
  Returns:
34
  Configured Orchestrator instance
@@ -50,7 +62,16 @@ def configure_orchestrator(use_mock: bool = False, mode: str = "simple") -> Any:
50
  if use_mock:
51
  judge_handler = MockJudgeHandler()
52
  else:
53
- judge_handler = JudgeHandler()
 
 
 
 
 
 
 
 
 
54
 
55
  return create_orchestrator(
56
  search_handler=search_handler,
@@ -64,6 +85,8 @@ async def research_agent(
64
  message: str,
65
  history: list[dict[str, Any]],
66
  mode: str = "simple",
 
 
67
  ) -> AsyncGenerator[str, None]:
68
  """
69
  Gradio chat function that runs the research agent.
@@ -72,6 +95,8 @@ async def research_agent(
72
  message: User's research question
73
  history: Chat history (Gradio format)
74
  mode: Orchestrator mode ("simple" or "magentic")
 
 
75
 
76
  Yields:
77
  Markdown-formatted responses for streaming
@@ -80,30 +105,57 @@ async def research_agent(
80
  yield "Please enter a research question."
81
  return
82
 
 
 
 
83
  # Decide whether to use real LLMs or mock based on mode and available keys
84
  has_openai = bool(os.getenv("OPENAI_API_KEY"))
85
  has_anthropic = bool(os.getenv("ANTHROPIC_API_KEY"))
 
86
 
87
  if mode == "magentic":
88
  # Magentic currently supports OpenAI only
89
- use_mock = not has_openai
90
  else:
91
  # Simple mode can work with either provider
92
- use_mock = not (has_openai or has_anthropic)
93
 
94
  # If magentic mode requested but no OpenAI key, fallback/warn
95
  if mode == "magentic" and use_mock:
96
  yield (
97
  "⚠️ **Warning**: Magentic mode requires OpenAI API key. "
98
- "Falling back to Mock Simple mode."
99
  )
100
  mode = "simple"
101
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  # Run the agent and stream events
103
  response_parts: list[str] = []
104
 
105
  try:
106
- orchestrator = configure_orchestrator(use_mock=use_mock, mode=mode)
 
 
 
 
 
107
  async for event in orchestrator.run(message):
108
  # Format event as markdown
109
  event_md = event.to_markdown()
@@ -148,10 +200,30 @@ def create_demo() -> Any:
148
  fn=research_agent,
149
  title="",
150
  examples=[
151
- ["What drugs could be repurposed for Alzheimer's disease?", "simple"],
152
- ["Is metformin effective for treating cancer?", "simple"],
153
- ["What medications show promise for Long COVID treatment?", "simple"],
154
- ["Can statins be repurposed for neurological conditions?", "simple"],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
  ],
156
  additional_inputs=[
157
  gr.Radio(
@@ -159,7 +231,19 @@ def create_demo() -> Any:
159
  value="simple",
160
  label="Orchestrator Mode",
161
  info="Simple: Linear (OpenAI/Anthropic) | Magentic: Multi-Agent (OpenAI)",
162
- )
 
 
 
 
 
 
 
 
 
 
 
 
163
  ],
164
  )
165
 
 
5
  from typing import Any
6
 
7
  import gradio as gr
8
+ from pydantic_ai.models.anthropic import AnthropicModel
9
+ from pydantic_ai.models.openai import OpenAIModel
10
+ from pydantic_ai.providers.anthropic import AnthropicProvider
11
+ from pydantic_ai.providers.openai import OpenAIProvider
12
 
13
  from src.agent_factory.judges import JudgeHandler, MockJudgeHandler
14
  from src.mcp_tools import (
 
23
  from src.tools.clinicaltrials import ClinicalTrialsTool
24
  from src.tools.pubmed import PubMedTool
25
  from src.tools.search_handler import SearchHandler
26
+ from src.utils.config import settings
27
  from src.utils.models import OrchestratorConfig
28
 
29
 
30
+ def configure_orchestrator(
31
+ use_mock: bool = False,
32
+ mode: str = "simple",
33
+ user_api_key: str | None = None,
34
+ api_provider: str = "openai",
35
+ ) -> Any:
36
  """
37
  Create an orchestrator instance.
38
 
39
  Args:
40
  use_mock: If True, use MockJudgeHandler (no API key needed)
41
  mode: Orchestrator mode ("simple" or "magentic")
42
+ user_api_key: Optional user-provided API key (BYOK)
43
+ api_provider: API provider ("openai" or "anthropic")
44
 
45
  Returns:
46
  Configured Orchestrator instance
 
62
  if use_mock:
63
  judge_handler = MockJudgeHandler()
64
  else:
65
+ # Create model with user's API key if provided
66
+ model: AnthropicModel | OpenAIModel | None = None
67
+ if user_api_key:
68
+ if api_provider == "anthropic":
69
+ anthropic_provider = AnthropicProvider(api_key=user_api_key)
70
+ model = AnthropicModel(settings.anthropic_model, provider=anthropic_provider)
71
+ else:
72
+ openai_provider = OpenAIProvider(api_key=user_api_key)
73
+ model = OpenAIModel(settings.openai_model, provider=openai_provider)
74
+ judge_handler = JudgeHandler(model=model)
75
 
76
  return create_orchestrator(
77
  search_handler=search_handler,
 
85
  message: str,
86
  history: list[dict[str, Any]],
87
  mode: str = "simple",
88
+ api_key: str = "",
89
+ api_provider: str = "openai",
90
  ) -> AsyncGenerator[str, None]:
91
  """
92
  Gradio chat function that runs the research agent.
 
95
  message: User's research question
96
  history: Chat history (Gradio format)
97
  mode: Orchestrator mode ("simple" or "magentic")
98
+ api_key: Optional user-provided API key (BYOK - Bring Your Own Key)
99
+ api_provider: API provider ("openai" or "anthropic")
100
 
101
  Yields:
102
  Markdown-formatted responses for streaming
 
105
  yield "Please enter a research question."
106
  return
107
 
108
+ # Clean user-provided API key
109
+ user_api_key = api_key.strip() if api_key else None
110
+
111
  # Decide whether to use real LLMs or mock based on mode and available keys
112
  has_openai = bool(os.getenv("OPENAI_API_KEY"))
113
  has_anthropic = bool(os.getenv("ANTHROPIC_API_KEY"))
114
+ has_user_key = bool(user_api_key)
115
 
116
  if mode == "magentic":
117
  # Magentic currently supports OpenAI only
118
+ use_mock = not (has_openai or (has_user_key and api_provider == "openai"))
119
  else:
120
  # Simple mode can work with either provider
121
+ use_mock = not (has_openai or has_anthropic or has_user_key)
122
 
123
  # If magentic mode requested but no OpenAI key, fallback/warn
124
  if mode == "magentic" and use_mock:
125
  yield (
126
  "⚠️ **Warning**: Magentic mode requires OpenAI API key. "
127
+ "Falling back to demo mode.\n\n"
128
  )
129
  mode = "simple"
130
 
131
+ # Inform user about their key being used
132
+ if has_user_key and not use_mock:
133
+ yield (
134
+ f"πŸ”‘ **Using your {api_provider.upper()} API key** - "
135
+ "Your key is used only for this session and is never stored.\n\n"
136
+ )
137
+
138
+ # Warn users when running in demo mode (no LLM keys)
139
+ if use_mock:
140
+ yield (
141
+ "πŸ”¬ **Demo Mode**: Running with real biomedical searches but without "
142
+ "LLM-powered analysis.\n\n"
143
+ "**To unlock full AI analysis:**\n"
144
+ "- Enter your OpenAI or Anthropic API key below, OR\n"
145
+ "- Configure secrets in HuggingFace Space settings\n\n"
146
+ "---\n\n"
147
+ )
148
+
149
  # Run the agent and stream events
150
  response_parts: list[str] = []
151
 
152
  try:
153
+ orchestrator = configure_orchestrator(
154
+ use_mock=use_mock,
155
+ mode=mode,
156
+ user_api_key=user_api_key,
157
+ api_provider=api_provider,
158
+ )
159
  async for event in orchestrator.run(message):
160
  # Format event as markdown
161
  event_md = event.to_markdown()
 
200
  fn=research_agent,
201
  title="",
202
  examples=[
203
+ [
204
+ "What drugs could be repurposed for Alzheimer's disease?",
205
+ "simple",
206
+ "",
207
+ "openai",
208
+ ],
209
+ [
210
+ "Is metformin effective for treating cancer?",
211
+ "simple",
212
+ "",
213
+ "openai",
214
+ ],
215
+ [
216
+ "What medications show promise for Long COVID treatment?",
217
+ "simple",
218
+ "",
219
+ "openai",
220
+ ],
221
+ [
222
+ "Can statins be repurposed for neurological conditions?",
223
+ "simple",
224
+ "",
225
+ "openai",
226
+ ],
227
  ],
228
  additional_inputs=[
229
  gr.Radio(
 
231
  value="simple",
232
  label="Orchestrator Mode",
233
  info="Simple: Linear (OpenAI/Anthropic) | Magentic: Multi-Agent (OpenAI)",
234
+ ),
235
+ gr.Textbox(
236
+ label="πŸ”‘ API Key (Optional - Bring Your Own Key)",
237
+ placeholder="sk-... or sk-ant-...",
238
+ type="password",
239
+ info="Enter your own API key for full AI analysis. Never stored.",
240
+ ),
241
+ gr.Radio(
242
+ choices=["openai", "anthropic"],
243
+ value="openai",
244
+ label="API Provider",
245
+ info="Select the provider for your API key",
246
+ ),
247
  ],
248
  )
249
 
tests/unit/agent_factory/test_judges.py CHANGED
@@ -164,8 +164,9 @@ class TestMockJudgeHandler:
164
 
165
  result = await handler.assess("test", evidence)
166
 
167
- expected_mech_score = 7
168
  expected_evidence_len = 2
 
 
169
 
170
  assert handler.call_count == 1
171
  assert handler.last_question == "test"
@@ -174,6 +175,8 @@ class TestMockJudgeHandler:
174
  assert result.details.mechanism_score == expected_mech_score
175
  assert result.sufficient is False
176
  assert result.recommendation == "continue"
 
 
177
 
178
  @pytest.mark.asyncio
179
  async def test_mock_handler_custom_response(self):
 
164
 
165
  result = await handler.assess("test", evidence)
166
 
 
167
  expected_evidence_len = 2
168
+ # New dynamic scoring: mechanism_score = min(10, evidence_count * 2)
169
+ expected_mech_score = min(10, expected_evidence_len * 2) # = 4
170
 
171
  assert handler.call_count == 1
172
  assert handler.last_question == "test"
 
175
  assert result.details.mechanism_score == expected_mech_score
176
  assert result.sufficient is False
177
  assert result.recommendation == "continue"
178
+ # Verify demo mode messaging
179
+ assert "Demo mode" in result.reasoning
180
 
181
  @pytest.mark.asyncio
182
  async def test_mock_handler_custom_response(self):
tests/unit/tools/test_clinicaltrials.py CHANGED
@@ -123,11 +123,24 @@ class TestClinicalTrialsTool:
123
  await tool.search("metformin alzheimer")
124
 
125
 
 
 
 
 
 
 
 
 
 
126
  class TestClinicalTrialsIntegration:
127
  """Integration tests (marked for separate run)."""
128
 
129
  @pytest.mark.integration
130
  @pytest.mark.asyncio
 
 
 
 
131
  async def test_real_api_call(self) -> None:
132
  """Test actual API call (requires network)."""
133
  tool = ClinicalTrialsTool()
 
123
  await tool.search("metformin alzheimer")
124
 
125
 
126
+ def _can_reach_clinicaltrials() -> bool:
127
+ """Check if ClinicalTrials.gov API is reachable."""
128
+ try:
129
+ resp = requests.get("https://clinicaltrials.gov/api/v2/studies", timeout=5)
130
+ return resp.status_code < 500
131
+ except (requests.RequestException, OSError):
132
+ return False
133
+
134
+
135
  class TestClinicalTrialsIntegration:
136
  """Integration tests (marked for separate run)."""
137
 
138
  @pytest.mark.integration
139
  @pytest.mark.asyncio
140
+ @pytest.mark.skipif(
141
+ not _can_reach_clinicaltrials(),
142
+ reason="ClinicalTrials.gov API not reachable (network/SSL issue)",
143
+ )
144
  async def test_real_api_call(self) -> None:
145
  """Test actual API call (requires network)."""
146
  tool = ClinicalTrialsTool()