VibecoderMcSwaggins commited on
Commit
4e2ccbf
·
1 Parent(s): cd004e1

feat: Implement Phase 10 (ClinicalTrials.gov) with requests

Browse files
docs/implementation/10_phase_clinicaltrials.md CHANGED
@@ -115,12 +115,28 @@ Evidence(
115
 
116
  ## 4. Implementation
117
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  ### 4.1 ClinicalTrials Tool (`src/tools/clinicaltrials.py`)
119
 
120
  ```python
121
  """ClinicalTrials.gov search tool using API v2."""
122
 
123
- import httpx
 
 
 
124
  from tenacity import retry, stop_after_attempt, wait_exponential
125
 
126
  from src.utils.exceptions import SearchError
@@ -128,10 +144,14 @@ from src.utils.models import Citation, Evidence
128
 
129
 
130
  class ClinicalTrialsTool:
131
- """Search tool for ClinicalTrials.gov."""
 
 
 
 
132
 
133
  BASE_URL = "https://clinicaltrials.gov/api/v2/studies"
134
- FIELDS = [
135
  "NCTId",
136
  "BriefTitle",
137
  "Phase",
@@ -152,34 +172,33 @@ class ClinicalTrialsTool:
152
  reraise=True,
153
  )
154
  async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
155
- """
156
- Search ClinicalTrials.gov for studies.
157
-
158
- Args:
159
- query: Search query (e.g., "metformin alzheimer")
160
- max_results: Maximum results to return
161
-
162
- Returns:
163
- List of Evidence objects from clinical trials
164
- """
165
  params = {
166
  "query.term": query,
167
  "pageSize": min(max_results, 100),
168
  "fields": "|".join(self.FIELDS),
169
  }
170
 
171
- async with httpx.AsyncClient(timeout=30.0) as client:
172
- try:
173
- response = await client.get(self.BASE_URL, params=params)
174
- response.raise_for_status()
175
- except httpx.HTTPStatusError as e:
176
- raise SearchError(f"ClinicalTrials.gov search failed: {e}") from e
 
 
 
 
177
 
178
  data = response.json()
179
  studies = data.get("studies", [])
180
-
181
  return [self._study_to_evidence(study) for study in studies[:max_results]]
182
 
 
 
 
 
 
183
  def _study_to_evidence(self, study: dict) -> Evidence:
184
  """Convert a clinical trial study to Evidence."""
185
  # Navigate nested structure
@@ -240,19 +259,23 @@ class ClinicalTrialsTool:
240
 
241
  ### 5.1 Unit Tests (`tests/unit/tools/test_clinicaltrials.py`)
242
 
 
 
243
  ```python
244
  """Unit tests for ClinicalTrials.gov tool."""
245
 
 
 
246
  import pytest
247
- import respx
248
- from httpx import Response
249
 
250
  from src.tools.clinicaltrials import ClinicalTrialsTool
 
251
  from src.utils.models import Evidence
252
 
253
 
254
  @pytest.fixture
255
- def mock_clinicaltrials_response():
256
  """Mock ClinicalTrials.gov API response."""
257
  return {
258
  "studies": [
@@ -260,26 +283,20 @@ def mock_clinicaltrials_response():
260
  "protocolSection": {
261
  "identificationModule": {
262
  "nctId": "NCT04098666",
263
- "briefTitle": "Metformin in Alzheimer's Dementia Prevention"
264
  },
265
  "statusModule": {
266
  "overallStatus": "Recruiting",
267
- "startDateStruct": {"date": "2020-01-15"}
268
  },
269
  "descriptionModule": {
270
  "briefSummary": "This study evaluates metformin for Alzheimer's prevention."
271
  },
272
- "designModule": {
273
- "phases": ["PHASE2"]
274
- },
275
- "conditionsModule": {
276
- "conditions": ["Alzheimer Disease", "Dementia"]
277
- },
278
  "armsInterventionsModule": {
279
- "interventions": [
280
- {"name": "Metformin", "type": "Drug"}
281
- ]
282
- }
283
  }
284
  }
285
  ]
@@ -289,81 +306,45 @@ def mock_clinicaltrials_response():
289
  class TestClinicalTrialsTool:
290
  """Tests for ClinicalTrialsTool."""
291
 
292
- def test_tool_name(self):
293
  """Tool should have correct name."""
294
  tool = ClinicalTrialsTool()
295
  assert tool.name == "clinicaltrials"
296
 
297
  @pytest.mark.asyncio
298
- @respx.mock
299
- async def test_search_returns_evidence(self, mock_clinicaltrials_response):
 
300
  """Search should return Evidence objects."""
301
- respx.get("https://clinicaltrials.gov/api/v2/studies").mock(
302
- return_value=Response(200, json=mock_clinicaltrials_response)
303
- )
 
 
304
 
305
- tool = ClinicalTrialsTool()
306
- results = await tool.search("metformin alzheimer", max_results=5)
307
 
308
- assert len(results) == 1
309
- assert isinstance(results[0], Evidence)
310
- assert results[0].citation.source == "clinicaltrials"
311
- assert "NCT04098666" in results[0].citation.url
312
- assert "Metformin" in results[0].citation.title
313
 
314
  @pytest.mark.asyncio
315
- @respx.mock
316
- async def test_search_extracts_phase(self, mock_clinicaltrials_response):
317
- """Search should extract trial phase."""
318
- respx.get("https://clinicaltrials.gov/api/v2/studies").mock(
319
- return_value=Response(200, json=mock_clinicaltrials_response)
320
- )
321
-
322
- tool = ClinicalTrialsTool()
323
- results = await tool.search("metformin alzheimer")
324
-
325
- assert "PHASE2" in results[0].content
326
-
327
- @pytest.mark.asyncio
328
- @respx.mock
329
- async def test_search_extracts_status(self, mock_clinicaltrials_response):
330
- """Search should extract trial status."""
331
- respx.get("https://clinicaltrials.gov/api/v2/studies").mock(
332
- return_value=Response(200, json=mock_clinicaltrials_response)
333
- )
334
-
335
- tool = ClinicalTrialsTool()
336
- results = await tool.search("metformin alzheimer")
337
-
338
- assert "Recruiting" in results[0].content
339
-
340
- @pytest.mark.asyncio
341
- @respx.mock
342
- async def test_search_empty_results(self):
343
- """Search should handle empty results gracefully."""
344
- respx.get("https://clinicaltrials.gov/api/v2/studies").mock(
345
- return_value=Response(200, json={"studies": []})
346
- )
347
-
348
- tool = ClinicalTrialsTool()
349
- results = await tool.search("nonexistent query xyz")
350
-
351
- assert results == []
352
-
353
- @pytest.mark.asyncio
354
- @respx.mock
355
- async def test_search_api_error(self):
356
  """Search should raise SearchError on API failure."""
357
- from src.utils.exceptions import SearchError
358
-
359
- respx.get("https://clinicaltrials.gov/api/v2/studies").mock(
360
- return_value=Response(500, text="Internal Server Error")
361
- )
 
362
 
363
- tool = ClinicalTrialsTool()
364
 
365
- with pytest.raises(SearchError):
366
- await tool.search("metformin alzheimer")
367
 
368
 
369
  class TestClinicalTrialsIntegration:
@@ -371,7 +352,7 @@ class TestClinicalTrialsIntegration:
371
 
372
  @pytest.mark.integration
373
  @pytest.mark.asyncio
374
- async def test_real_api_call(self):
375
  """Test actual API call (requires network)."""
376
  tool = ClinicalTrialsTool()
377
  results = await tool.search("metformin diabetes", max_results=3)
 
115
 
116
  ## 4. Implementation
117
 
118
+ ### 4.0 Important: HTTP Client Selection
119
+
120
+ **ClinicalTrials.gov's WAF blocks `httpx`'s TLS fingerprint.** Use `requests` instead.
121
+
122
+ | Library | Status | Notes |
123
+ |---------|--------|-------|
124
+ | `httpx` | ❌ 403 Blocked | TLS/JA3 fingerprint flagged |
125
+ | `httpx[http2]` | ❌ 403 Blocked | HTTP/2 doesn't help |
126
+ | `requests` | ✅ Works | Industry standard, not blocked |
127
+ | `urllib` | ✅ Works | Stdlib alternative |
128
+
129
+ We use `requests` wrapped in `asyncio.to_thread()` for async compatibility.
130
+
131
  ### 4.1 ClinicalTrials Tool (`src/tools/clinicaltrials.py`)
132
 
133
  ```python
134
  """ClinicalTrials.gov search tool using API v2."""
135
 
136
+ import asyncio
137
+ from typing import Any, ClassVar
138
+
139
+ import requests
140
  from tenacity import retry, stop_after_attempt, wait_exponential
141
 
142
  from src.utils.exceptions import SearchError
 
144
 
145
 
146
  class ClinicalTrialsTool:
147
+ """Search tool for ClinicalTrials.gov.
148
+
149
+ Note: Uses `requests` library instead of `httpx` because ClinicalTrials.gov's
150
+ WAF blocks httpx's TLS fingerprint. The `requests` library is not blocked.
151
+ """
152
 
153
  BASE_URL = "https://clinicaltrials.gov/api/v2/studies"
154
+ FIELDS: ClassVar[list[str]] = [
155
  "NCTId",
156
  "BriefTitle",
157
  "Phase",
 
172
  reraise=True,
173
  )
174
  async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
175
+ """Search ClinicalTrials.gov for studies."""
 
 
 
 
 
 
 
 
 
176
  params = {
177
  "query.term": query,
178
  "pageSize": min(max_results, 100),
179
  "fields": "|".join(self.FIELDS),
180
  }
181
 
182
+ try:
183
+ # Run blocking requests.get in a separate thread for async compatibility
184
+ response = await asyncio.to_thread(
185
+ requests.get,
186
+ self.BASE_URL,
187
+ params=params,
188
+ headers={"User-Agent": "DeepCritical-Research-Agent/1.0"},
189
+ timeout=30,
190
+ )
191
+ response.raise_for_status()
192
 
193
  data = response.json()
194
  studies = data.get("studies", [])
 
195
  return [self._study_to_evidence(study) for study in studies[:max_results]]
196
 
197
+ except requests.HTTPError as e:
198
+ raise SearchError(f"ClinicalTrials.gov API error: {e}") from e
199
+ except requests.RequestException as e:
200
+ raise SearchError(f"ClinicalTrials.gov request failed: {e}") from e
201
+
202
  def _study_to_evidence(self, study: dict) -> Evidence:
203
  """Convert a clinical trial study to Evidence."""
204
  # Navigate nested structure
 
259
 
260
  ### 5.1 Unit Tests (`tests/unit/tools/test_clinicaltrials.py`)
261
 
262
+ Uses `unittest.mock.patch` to mock `requests.get` (not `respx` since we're not using `httpx`).
263
+
264
  ```python
265
  """Unit tests for ClinicalTrials.gov tool."""
266
 
267
+ from unittest.mock import MagicMock, patch
268
+
269
  import pytest
270
+ import requests
 
271
 
272
  from src.tools.clinicaltrials import ClinicalTrialsTool
273
+ from src.utils.exceptions import SearchError
274
  from src.utils.models import Evidence
275
 
276
 
277
  @pytest.fixture
278
+ def mock_clinicaltrials_response() -> dict:
279
  """Mock ClinicalTrials.gov API response."""
280
  return {
281
  "studies": [
 
283
  "protocolSection": {
284
  "identificationModule": {
285
  "nctId": "NCT04098666",
286
+ "briefTitle": "Metformin in Alzheimer's Dementia Prevention",
287
  },
288
  "statusModule": {
289
  "overallStatus": "Recruiting",
290
+ "startDateStruct": {"date": "2020-01-15"},
291
  },
292
  "descriptionModule": {
293
  "briefSummary": "This study evaluates metformin for Alzheimer's prevention."
294
  },
295
+ "designModule": {"phases": ["PHASE2"]},
296
+ "conditionsModule": {"conditions": ["Alzheimer Disease", "Dementia"]},
 
 
 
 
297
  "armsInterventionsModule": {
298
+ "interventions": [{"name": "Metformin", "type": "Drug"}]
299
+ },
 
 
300
  }
301
  }
302
  ]
 
306
  class TestClinicalTrialsTool:
307
  """Tests for ClinicalTrialsTool."""
308
 
309
+ def test_tool_name(self) -> None:
310
  """Tool should have correct name."""
311
  tool = ClinicalTrialsTool()
312
  assert tool.name == "clinicaltrials"
313
 
314
  @pytest.mark.asyncio
315
+ async def test_search_returns_evidence(
316
+ self, mock_clinicaltrials_response: dict
317
+ ) -> None:
318
  """Search should return Evidence objects."""
319
+ with patch("src.tools.clinicaltrials.requests.get") as mock_get:
320
+ mock_response = MagicMock()
321
+ mock_response.json.return_value = mock_clinicaltrials_response
322
+ mock_response.raise_for_status = MagicMock()
323
+ mock_get.return_value = mock_response
324
 
325
+ tool = ClinicalTrialsTool()
326
+ results = await tool.search("metformin alzheimer", max_results=5)
327
 
328
+ assert len(results) == 1
329
+ assert isinstance(results[0], Evidence)
330
+ assert results[0].citation.source == "clinicaltrials"
331
+ assert "NCT04098666" in results[0].citation.url
332
+ assert "Metformin" in results[0].citation.title
333
 
334
  @pytest.mark.asyncio
335
+ async def test_search_api_error(self) -> None:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
336
  """Search should raise SearchError on API failure."""
337
+ with patch("src.tools.clinicaltrials.requests.get") as mock_get:
338
+ mock_response = MagicMock()
339
+ mock_response.raise_for_status.side_effect = requests.HTTPError(
340
+ "500 Server Error"
341
+ )
342
+ mock_get.return_value = mock_response
343
 
344
+ tool = ClinicalTrialsTool()
345
 
346
+ with pytest.raises(SearchError):
347
+ await tool.search("metformin alzheimer")
348
 
349
 
350
  class TestClinicalTrialsIntegration:
 
352
 
353
  @pytest.mark.integration
354
  @pytest.mark.asyncio
355
+ async def test_real_api_call(self) -> None:
356
  """Test actual API call (requires network)."""
357
  tool = ClinicalTrialsTool()
358
  results = await tool.search("metformin diabetes", max_results=3)
examples/search_demo/run_search.py CHANGED
@@ -20,6 +20,7 @@ Requirements:
20
  import asyncio
21
  import sys
22
 
 
23
  from src.tools.pubmed import PubMedTool
24
  from src.tools.search_handler import SearchHandler
25
 
@@ -33,10 +34,11 @@ async def main(query: str) -> None:
33
 
34
  # Initialize tools
35
  pubmed = PubMedTool()
36
- handler = SearchHandler(tools=[pubmed], timeout=30.0)
 
37
 
38
  # Execute search
39
- print("Searching PubMed in parallel...")
40
  result = await handler.execute(query, max_results_per_tool=5)
41
 
42
  # Display results
 
20
  import asyncio
21
  import sys
22
 
23
+ from src.tools.clinicaltrials import ClinicalTrialsTool
24
  from src.tools.pubmed import PubMedTool
25
  from src.tools.search_handler import SearchHandler
26
 
 
34
 
35
  # Initialize tools
36
  pubmed = PubMedTool()
37
+ trials = ClinicalTrialsTool()
38
+ handler = SearchHandler(tools=[pubmed, trials], timeout=30.0)
39
 
40
  # Execute search
41
+ print("Searching PubMed and ClinicalTrials.gov in parallel...")
42
  result = await handler.execute(query, max_results_per_tool=5)
43
 
44
  # Display results
pyproject.toml CHANGED
@@ -7,25 +7,22 @@ requires-python = ">=3.11"
7
  dependencies = [
8
  # Core
9
  "pydantic>=2.7",
10
- "pydantic-settings>=2.2", # For BaseSettings (config)
11
- "pydantic-ai>=0.0.16", # Agent framework
12
-
13
  # AI Providers
14
  "openai>=1.0.0",
15
  "anthropic>=0.18.0",
16
-
17
  # HTTP & Parsing
18
- "httpx>=0.27", # Async HTTP client
19
- "beautifulsoup4>=4.12", # HTML parsing
20
- "xmltodict>=0.13", # PubMed XML -> dict
21
-
22
  # UI
23
- "gradio>=5.0", # Chat interface
24
-
25
  # Utils
26
- "python-dotenv>=1.0", # .env loading
27
- "tenacity>=8.2", # Retry logic
28
- "structlog>=24.1", # Structured logging
 
29
  ]
30
 
31
  [project.optional-dependencies]
 
7
  dependencies = [
8
  # Core
9
  "pydantic>=2.7",
10
+ "pydantic-settings>=2.2", # For BaseSettings (config)
11
+ "pydantic-ai>=0.0.16", # Agent framework
 
12
  # AI Providers
13
  "openai>=1.0.0",
14
  "anthropic>=0.18.0",
 
15
  # HTTP & Parsing
16
+ "httpx[http2]>=0.27", # Async HTTP client
17
+ "beautifulsoup4>=4.12", # HTML parsing
18
+ "xmltodict>=0.13", # PubMed XML -> dict
 
19
  # UI
20
+ "gradio>=5.0", # Chat interface
 
21
  # Utils
22
+ "python-dotenv>=1.0", # .env loading
23
+ "tenacity>=8.2", # Retry logic
24
+ "structlog>=24.1", # Structured logging
25
+ "requests>=2.32.5",
26
  ]
27
 
28
  [project.optional-dependencies]
src/app.py CHANGED
@@ -8,6 +8,7 @@ import gradio as gr
8
 
9
  from src.agent_factory.judges import JudgeHandler, MockJudgeHandler
10
  from src.orchestrator_factory import create_orchestrator
 
11
  from src.tools.pubmed import PubMedTool
12
  from src.tools.search_handler import SearchHandler
13
  from src.utils.models import OrchestratorConfig
@@ -32,7 +33,7 @@ def configure_orchestrator(use_mock: bool = False, mode: str = "simple") -> Any:
32
 
33
  # Create search tools
34
  search_handler = SearchHandler(
35
- tools=[PubMedTool()],
36
  timeout=config.search_timeout,
37
  )
38
 
@@ -160,7 +161,7 @@ def create_demo() -> Any:
160
  **Note**: This is a research tool and should not be used for medical decisions.
161
  Always consult healthcare professionals for medical advice.
162
 
163
- Built with 🤖 PydanticAI + 🔬 PubMed
164
  """)
165
 
166
  return demo
 
8
 
9
  from src.agent_factory.judges import JudgeHandler, MockJudgeHandler
10
  from src.orchestrator_factory import create_orchestrator
11
+ from src.tools.clinicaltrials import ClinicalTrialsTool
12
  from src.tools.pubmed import PubMedTool
13
  from src.tools.search_handler import SearchHandler
14
  from src.utils.models import OrchestratorConfig
 
33
 
34
  # Create search tools
35
  search_handler = SearchHandler(
36
+ tools=[PubMedTool(), ClinicalTrialsTool()],
37
  timeout=config.search_timeout,
38
  )
39
 
 
161
  **Note**: This is a research tool and should not be used for medical decisions.
162
  Always consult healthcare professionals for medical advice.
163
 
164
+ Built with 🤖 PydanticAI + 🔬 PubMed & ClinicalTrials.gov
165
  """)
166
 
167
  return demo
src/tools/clinicaltrials.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ClinicalTrials.gov search tool using API v2."""
2
+
3
+ import asyncio
4
+ from typing import Any, ClassVar
5
+
6
+ import requests
7
+ from tenacity import retry, stop_after_attempt, wait_exponential
8
+
9
+ from src.utils.exceptions import SearchError
10
+ from src.utils.models import Citation, Evidence
11
+
12
+
13
+ class ClinicalTrialsTool:
14
+ """Search tool for ClinicalTrials.gov.
15
+
16
+ Note: Uses `requests` library instead of `httpx` because ClinicalTrials.gov's
17
+ WAF blocks httpx's TLS fingerprint. The `requests` library is not blocked.
18
+ See: https://clinicaltrials.gov/data-api/api
19
+ """
20
+
21
+ BASE_URL = "https://clinicaltrials.gov/api/v2/studies"
22
+ FIELDS: ClassVar[list[str]] = [
23
+ "NCTId",
24
+ "BriefTitle",
25
+ "Phase",
26
+ "OverallStatus",
27
+ "Condition",
28
+ "InterventionName",
29
+ "StartDate",
30
+ "BriefSummary",
31
+ ]
32
+
33
+ @property
34
+ def name(self) -> str:
35
+ return "clinicaltrials"
36
+
37
+ @retry(
38
+ stop=stop_after_attempt(3),
39
+ wait=wait_exponential(multiplier=1, min=1, max=10),
40
+ reraise=True,
41
+ )
42
+ async def search(self, query: str, max_results: int = 10) -> list[Evidence]:
43
+ """Search ClinicalTrials.gov for studies.
44
+
45
+ Args:
46
+ query: Search query (e.g., "metformin alzheimer")
47
+ max_results: Maximum results to return (max 100)
48
+
49
+ Returns:
50
+ List of Evidence objects from clinical trials
51
+ """
52
+ params: dict[str, str | int] = {
53
+ "query.term": query,
54
+ "pageSize": min(max_results, 100),
55
+ "fields": "|".join(self.FIELDS),
56
+ }
57
+
58
+ try:
59
+ # Run blocking requests.get in a separate thread for async compatibility
60
+ response = await asyncio.to_thread(
61
+ requests.get,
62
+ self.BASE_URL,
63
+ params=params,
64
+ headers={"User-Agent": "DeepCritical-Research-Agent/1.0"},
65
+ timeout=30,
66
+ )
67
+ response.raise_for_status()
68
+
69
+ data = response.json()
70
+ studies = data.get("studies", [])
71
+ return [self._study_to_evidence(study) for study in studies[:max_results]]
72
+
73
+ except requests.HTTPError as e:
74
+ raise SearchError(f"ClinicalTrials.gov API error: {e}") from e
75
+ except requests.RequestException as e:
76
+ raise SearchError(f"ClinicalTrials.gov request failed: {e}") from e
77
+
78
+ def _study_to_evidence(self, study: dict[str, Any]) -> Evidence:
79
+ """Convert a clinical trial study to Evidence."""
80
+ # Navigate nested structure
81
+ protocol = study.get("protocolSection", {})
82
+ id_module = protocol.get("identificationModule", {})
83
+ status_module = protocol.get("statusModule", {})
84
+ desc_module = protocol.get("descriptionModule", {})
85
+ design_module = protocol.get("designModule", {})
86
+ conditions_module = protocol.get("conditionsModule", {})
87
+ arms_module = protocol.get("armsInterventionsModule", {})
88
+
89
+ nct_id = id_module.get("nctId", "Unknown")
90
+ title = id_module.get("briefTitle", "Untitled Study")
91
+ status = status_module.get("overallStatus", "Unknown")
92
+ start_date = status_module.get("startDateStruct", {}).get("date", "Unknown")
93
+
94
+ # Get phase (might be a list)
95
+ phases = design_module.get("phases", [])
96
+ phase = phases[0] if phases else "Not Applicable"
97
+
98
+ # Get conditions
99
+ conditions = conditions_module.get("conditions", [])
100
+ conditions_str = ", ".join(conditions[:3]) if conditions else "Unknown"
101
+
102
+ # Get interventions
103
+ interventions = arms_module.get("interventions", [])
104
+ intervention_names = [i.get("name", "") for i in interventions[:3]]
105
+ interventions_str = ", ".join(intervention_names) if intervention_names else "Unknown"
106
+
107
+ # Get summary
108
+ summary = desc_module.get("briefSummary", "No summary available.")
109
+
110
+ # Build content with key trial info
111
+ content = (
112
+ f"{summary[:500]}... "
113
+ f"Trial Phase: {phase}. "
114
+ f"Status: {status}. "
115
+ f"Conditions: {conditions_str}. "
116
+ f"Interventions: {interventions_str}."
117
+ )
118
+
119
+ return Evidence(
120
+ content=content[:2000],
121
+ citation=Citation(
122
+ source="clinicaltrials",
123
+ title=title[:500],
124
+ url=f"https://clinicaltrials.gov/study/{nct_id}",
125
+ date=start_date,
126
+ authors=[], # Trials don't have traditional authors
127
+ ),
128
+ relevance=0.85, # Trials are highly relevant for repurposing
129
+ )
src/tools/search_handler.py CHANGED
@@ -49,7 +49,7 @@ class SearchHandler:
49
 
50
  # Process results
51
  all_evidence: list[Evidence] = []
52
- sources_searched: list[Literal["pubmed"]] = []
53
  errors: list[str] = []
54
 
55
  for tool, result in zip(self.tools, results, strict=True):
@@ -62,7 +62,7 @@ class SearchHandler:
62
  all_evidence.extend(success_result)
63
 
64
  # Cast tool.name to the expected Literal
65
- tool_name = cast(Literal["pubmed"], tool.name)
66
  sources_searched.append(tool_name)
67
  logger.info("Search tool succeeded", tool=tool.name, count=len(success_result))
68
 
 
49
 
50
  # Process results
51
  all_evidence: list[Evidence] = []
52
+ sources_searched: list[Literal["pubmed", "clinicaltrials"]] = []
53
  errors: list[str] = []
54
 
55
  for tool, result in zip(self.tools, results, strict=True):
 
62
  all_evidence.extend(success_result)
63
 
64
  # Cast tool.name to the expected Literal
65
+ tool_name = cast(Literal["pubmed", "clinicaltrials"], tool.name)
66
  sources_searched.append(tool_name)
67
  logger.info("Search tool succeeded", tool=tool.name, count=len(success_result))
68
 
src/utils/models.py CHANGED
@@ -9,7 +9,7 @@ from pydantic import BaseModel, Field
9
  class Citation(BaseModel):
10
  """A citation to a source document."""
11
 
12
- source: Literal["pubmed"] = Field(description="Where this came from")
13
  title: str = Field(min_length=1, max_length=500)
14
  url: str = Field(description="URL to the source")
15
  date: str = Field(description="Publication date (YYYY-MM-DD or 'Unknown')")
@@ -41,7 +41,7 @@ class SearchResult(BaseModel):
41
 
42
  query: str
43
  evidence: list[Evidence]
44
- sources_searched: list[Literal["pubmed"]]
45
  total_found: int
46
  errors: list[str] = Field(default_factory=list)
47
 
 
9
  class Citation(BaseModel):
10
  """A citation to a source document."""
11
 
12
+ source: Literal["pubmed", "clinicaltrials"] = Field(description="Where this came from")
13
  title: str = Field(min_length=1, max_length=500)
14
  url: str = Field(description="URL to the source")
15
  date: str = Field(description="Publication date (YYYY-MM-DD or 'Unknown')")
 
41
 
42
  query: str
43
  evidence: list[Evidence]
44
+ sources_searched: list[Literal["pubmed", "clinicaltrials"]]
45
  total_found: int
46
  errors: list[str] = Field(default_factory=list)
47
 
tests/unit/tools/test_clinicaltrials.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Unit tests for ClinicalTrials.gov tool."""
2
+
3
+ from unittest.mock import MagicMock, patch
4
+
5
+ import pytest
6
+ import requests
7
+
8
+ from src.tools.clinicaltrials import ClinicalTrialsTool
9
+ from src.utils.exceptions import SearchError
10
+ from src.utils.models import Evidence
11
+
12
+
13
+ @pytest.fixture
14
+ def mock_clinicaltrials_response() -> dict:
15
+ """Mock ClinicalTrials.gov API response."""
16
+ return {
17
+ "studies": [
18
+ {
19
+ "protocolSection": {
20
+ "identificationModule": {
21
+ "nctId": "NCT04098666",
22
+ "briefTitle": "Metformin in Alzheimer's Dementia Prevention",
23
+ },
24
+ "statusModule": {
25
+ "overallStatus": "Recruiting",
26
+ "startDateStruct": {"date": "2020-01-15"},
27
+ },
28
+ "descriptionModule": {
29
+ "briefSummary": "This study evaluates metformin for Alzheimer's prevention."
30
+ },
31
+ "designModule": {"phases": ["PHASE2"]},
32
+ "conditionsModule": {"conditions": ["Alzheimer Disease", "Dementia"]},
33
+ "armsInterventionsModule": {
34
+ "interventions": [{"name": "Metformin", "type": "Drug"}]
35
+ },
36
+ }
37
+ }
38
+ ]
39
+ }
40
+
41
+
42
+ class TestClinicalTrialsTool:
43
+ """Tests for ClinicalTrialsTool."""
44
+
45
+ def test_tool_name(self) -> None:
46
+ """Tool should have correct name."""
47
+ tool = ClinicalTrialsTool()
48
+ assert tool.name == "clinicaltrials"
49
+
50
+ @pytest.mark.asyncio
51
+ async def test_search_returns_evidence(self, mock_clinicaltrials_response: dict) -> None:
52
+ """Search should return Evidence objects."""
53
+ with patch("src.tools.clinicaltrials.requests.get") as mock_get:
54
+ mock_response = MagicMock()
55
+ mock_response.json.return_value = mock_clinicaltrials_response
56
+ mock_response.raise_for_status = MagicMock()
57
+ mock_get.return_value = mock_response
58
+
59
+ tool = ClinicalTrialsTool()
60
+ results = await tool.search("metformin alzheimer", max_results=5)
61
+
62
+ assert len(results) == 1
63
+ assert isinstance(results[0], Evidence)
64
+ assert results[0].citation.source == "clinicaltrials"
65
+ assert "NCT04098666" in results[0].citation.url
66
+ assert "Metformin" in results[0].citation.title
67
+
68
+ @pytest.mark.asyncio
69
+ async def test_search_extracts_phase(self, mock_clinicaltrials_response: dict) -> None:
70
+ """Search should extract trial phase."""
71
+ with patch("src.tools.clinicaltrials.requests.get") as mock_get:
72
+ mock_response = MagicMock()
73
+ mock_response.json.return_value = mock_clinicaltrials_response
74
+ mock_response.raise_for_status = MagicMock()
75
+ mock_get.return_value = mock_response
76
+
77
+ tool = ClinicalTrialsTool()
78
+ results = await tool.search("metformin alzheimer")
79
+
80
+ assert "PHASE2" in results[0].content
81
+
82
+ @pytest.mark.asyncio
83
+ async def test_search_extracts_status(self, mock_clinicaltrials_response: dict) -> None:
84
+ """Search should extract trial status."""
85
+ with patch("src.tools.clinicaltrials.requests.get") as mock_get:
86
+ mock_response = MagicMock()
87
+ mock_response.json.return_value = mock_clinicaltrials_response
88
+ mock_response.raise_for_status = MagicMock()
89
+ mock_get.return_value = mock_response
90
+
91
+ tool = ClinicalTrialsTool()
92
+ results = await tool.search("metformin alzheimer")
93
+
94
+ assert "Recruiting" in results[0].content
95
+
96
+ @pytest.mark.asyncio
97
+ async def test_search_empty_results(self) -> None:
98
+ """Search should handle empty results gracefully."""
99
+ with patch("src.tools.clinicaltrials.requests.get") as mock_get:
100
+ mock_response = MagicMock()
101
+ mock_response.json.return_value = {"studies": []}
102
+ mock_response.raise_for_status = MagicMock()
103
+ mock_get.return_value = mock_response
104
+
105
+ tool = ClinicalTrialsTool()
106
+ results = await tool.search("nonexistent query xyz")
107
+
108
+ assert results == []
109
+
110
+ @pytest.mark.asyncio
111
+ async def test_search_api_error(self) -> None:
112
+ """Search should raise SearchError on API failure."""
113
+ with patch("src.tools.clinicaltrials.requests.get") as mock_get:
114
+ mock_response = MagicMock()
115
+ mock_response.raise_for_status.side_effect = requests.HTTPError("500 Server Error")
116
+ mock_get.return_value = mock_response
117
+
118
+ tool = ClinicalTrialsTool()
119
+
120
+ with pytest.raises(SearchError):
121
+ await tool.search("metformin alzheimer")
122
+
123
+
124
+ class TestClinicalTrialsIntegration:
125
+ """Integration tests (marked for separate run)."""
126
+
127
+ @pytest.mark.integration
128
+ @pytest.mark.asyncio
129
+ async def test_real_api_call(self) -> None:
130
+ """Test actual API call (requires network)."""
131
+ tool = ClinicalTrialsTool()
132
+ results = await tool.search("metformin diabetes", max_results=3)
133
+
134
+ assert len(results) > 0
135
+ assert all(isinstance(r, Evidence) for r in results)
136
+ assert all(r.citation.source == "clinicaltrials" for r in results)
uv.lock CHANGED
@@ -982,12 +982,13 @@ dependencies = [
982
  { name = "anthropic" },
983
  { name = "beautifulsoup4" },
984
  { name = "gradio" },
985
- { name = "httpx" },
986
  { name = "openai" },
987
  { name = "pydantic" },
988
  { name = "pydantic-ai" },
989
  { name = "pydantic-settings" },
990
  { name = "python-dotenv" },
 
991
  { name = "structlog" },
992
  { name = "tenacity" },
993
  { name = "xmltodict" },
@@ -1020,7 +1021,7 @@ requires-dist = [
1020
  { name = "beautifulsoup4", specifier = ">=4.12" },
1021
  { name = "chromadb", marker = "extra == 'embeddings'", specifier = ">=0.4.0" },
1022
  { name = "gradio", specifier = ">=5.0" },
1023
- { name = "httpx", specifier = ">=0.27" },
1024
  { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.10" },
1025
  { name = "openai", specifier = ">=1.0.0" },
1026
  { name = "pre-commit", marker = "extra == 'dev'", specifier = ">=3.7" },
@@ -1033,6 +1034,7 @@ requires-dist = [
1033
  { name = "pytest-mock", marker = "extra == 'dev'", specifier = ">=3.12" },
1034
  { name = "pytest-sugar", marker = "extra == 'dev'", specifier = ">=1.0" },
1035
  { name = "python-dotenv", specifier = ">=1.0" },
 
1036
  { name = "respx", marker = "extra == 'dev'", specifier = ">=0.21" },
1037
  { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.4.0" },
1038
  { name = "sentence-transformers", marker = "extra == 'embeddings'", specifier = ">=2.2.0" },
@@ -1582,6 +1584,19 @@ wheels = [
1582
  { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515 },
1583
  ]
1584
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1585
  [[package]]
1586
  name = "hf-xet"
1587
  version = "1.2.0"
@@ -1611,6 +1626,15 @@ wheels = [
1611
  { url = "https://files.pythonhosted.org/packages/cb/44/870d44b30e1dcfb6a65932e3e1506c103a8a5aea9103c337e7a53180322c/hf_xet-1.2.0-cp37-abi3-win_amd64.whl", hash = "sha256:e6584a52253f72c9f52f9e549d5895ca7a471608495c4ecaa6cc73dba2b24d69", size = 2905735 },
1612
  ]
1613
 
 
 
 
 
 
 
 
 
 
1614
  [[package]]
1615
  name = "httpcore"
1616
  version = "1.0.9"
@@ -1675,6 +1699,11 @@ wheels = [
1675
  { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517 },
1676
  ]
1677
 
 
 
 
 
 
1678
  [[package]]
1679
  name = "httpx-sse"
1680
  version = "0.4.0"
@@ -1720,6 +1749,15 @@ wheels = [
1720
  { url = "https://files.pythonhosted.org/packages/f0/0f/310fb31e39e2d734ccaa2c0fb981ee41f7bd5056ce9bc29b2248bd569169/humanfriendly-10.0-py2.py3-none-any.whl", hash = "sha256:1697e1a8a8f550fd43c2865cd84542fc175a61dcb779b6fee18cf6b6ccba1477", size = 86794 },
1721
  ]
1722
 
 
 
 
 
 
 
 
 
 
1723
  [[package]]
1724
  name = "identify"
1725
  version = "2.6.15"
 
982
  { name = "anthropic" },
983
  { name = "beautifulsoup4" },
984
  { name = "gradio" },
985
+ { name = "httpx", extra = ["http2"] },
986
  { name = "openai" },
987
  { name = "pydantic" },
988
  { name = "pydantic-ai" },
989
  { name = "pydantic-settings" },
990
  { name = "python-dotenv" },
991
+ { name = "requests" },
992
  { name = "structlog" },
993
  { name = "tenacity" },
994
  { name = "xmltodict" },
 
1021
  { name = "beautifulsoup4", specifier = ">=4.12" },
1022
  { name = "chromadb", marker = "extra == 'embeddings'", specifier = ">=0.4.0" },
1023
  { name = "gradio", specifier = ">=5.0" },
1024
+ { name = "httpx", extras = ["http2"], specifier = ">=0.27" },
1025
  { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.10" },
1026
  { name = "openai", specifier = ">=1.0.0" },
1027
  { name = "pre-commit", marker = "extra == 'dev'", specifier = ">=3.7" },
 
1034
  { name = "pytest-mock", marker = "extra == 'dev'", specifier = ">=3.12" },
1035
  { name = "pytest-sugar", marker = "extra == 'dev'", specifier = ">=1.0" },
1036
  { name = "python-dotenv", specifier = ">=1.0" },
1037
+ { name = "requests", specifier = ">=2.32.5" },
1038
  { name = "respx", marker = "extra == 'dev'", specifier = ">=0.21" },
1039
  { name = "ruff", marker = "extra == 'dev'", specifier = ">=0.4.0" },
1040
  { name = "sentence-transformers", marker = "extra == 'embeddings'", specifier = ">=2.2.0" },
 
1584
  { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515 },
1585
  ]
1586
 
1587
+ [[package]]
1588
+ name = "h2"
1589
+ version = "4.3.0"
1590
+ source = { registry = "https://pypi.org/simple" }
1591
+ dependencies = [
1592
+ { name = "hpack" },
1593
+ { name = "hyperframe" },
1594
+ ]
1595
+ sdist = { url = "https://files.pythonhosted.org/packages/1d/17/afa56379f94ad0fe8defd37d6eb3f89a25404ffc71d4d848893d270325fc/h2-4.3.0.tar.gz", hash = "sha256:6c59efe4323fa18b47a632221a1888bd7fde6249819beda254aeca909f221bf1", size = 2152026 }
1596
+ wheels = [
1597
+ { url = "https://files.pythonhosted.org/packages/69/b2/119f6e6dcbd96f9069ce9a2665e0146588dc9f88f29549711853645e736a/h2-4.3.0-py3-none-any.whl", hash = "sha256:c438f029a25f7945c69e0ccf0fb951dc3f73a5f6412981daee861431b70e2bdd", size = 61779 },
1598
+ ]
1599
+
1600
  [[package]]
1601
  name = "hf-xet"
1602
  version = "1.2.0"
 
1626
  { url = "https://files.pythonhosted.org/packages/cb/44/870d44b30e1dcfb6a65932e3e1506c103a8a5aea9103c337e7a53180322c/hf_xet-1.2.0-cp37-abi3-win_amd64.whl", hash = "sha256:e6584a52253f72c9f52f9e549d5895ca7a471608495c4ecaa6cc73dba2b24d69", size = 2905735 },
1627
  ]
1628
 
1629
+ [[package]]
1630
+ name = "hpack"
1631
+ version = "4.1.0"
1632
+ source = { registry = "https://pypi.org/simple" }
1633
+ sdist = { url = "https://files.pythonhosted.org/packages/2c/48/71de9ed269fdae9c8057e5a4c0aa7402e8bb16f2c6e90b3aa53327b113f8/hpack-4.1.0.tar.gz", hash = "sha256:ec5eca154f7056aa06f196a557655c5b009b382873ac8d1e66e79e87535f1dca", size = 51276 }
1634
+ wheels = [
1635
+ { url = "https://files.pythonhosted.org/packages/07/c6/80c95b1b2b94682a72cbdbfb85b81ae2daffa4291fbfa1b1464502ede10d/hpack-4.1.0-py3-none-any.whl", hash = "sha256:157ac792668d995c657d93111f46b4535ed114f0c9c8d672271bbec7eae1b496", size = 34357 },
1636
+ ]
1637
+
1638
  [[package]]
1639
  name = "httpcore"
1640
  version = "1.0.9"
 
1699
  { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517 },
1700
  ]
1701
 
1702
+ [package.optional-dependencies]
1703
+ http2 = [
1704
+ { name = "h2" },
1705
+ ]
1706
+
1707
  [[package]]
1708
  name = "httpx-sse"
1709
  version = "0.4.0"
 
1749
  { url = "https://files.pythonhosted.org/packages/f0/0f/310fb31e39e2d734ccaa2c0fb981ee41f7bd5056ce9bc29b2248bd569169/humanfriendly-10.0-py2.py3-none-any.whl", hash = "sha256:1697e1a8a8f550fd43c2865cd84542fc175a61dcb779b6fee18cf6b6ccba1477", size = 86794 },
1750
  ]
1751
 
1752
+ [[package]]
1753
+ name = "hyperframe"
1754
+ version = "6.1.0"
1755
+ source = { registry = "https://pypi.org/simple" }
1756
+ sdist = { url = "https://files.pythonhosted.org/packages/02/e7/94f8232d4a74cc99514c13a9f995811485a6903d48e5d952771ef6322e30/hyperframe-6.1.0.tar.gz", hash = "sha256:f630908a00854a7adeabd6382b43923a4c4cd4b821fcb527e6ab9e15382a3b08", size = 26566 }
1757
+ wheels = [
1758
+ { url = "https://files.pythonhosted.org/packages/48/30/47d0bf6072f7252e6521f3447ccfa40b421b6824517f82854703d0f5a98b/hyperframe-6.1.0-py3-none-any.whl", hash = "sha256:b03380493a519fce58ea5af42e4a42317bf9bd425596f7a0835ffce80f1a42e5", size = 13007 },
1759
+ ]
1760
+
1761
  [[package]]
1762
  name = "identify"
1763
  version = "2.6.15"