VibecoderMcSwaggins commited on
Commit
3139749
Β·
1 Parent(s): e993253

fix: apply lazy init pattern and pydantic-ai fixes to ReportAgent (#12)

Browse files

Applies the same fixes as Phase 7 HypothesisAgent:
- Lazy initialization via _get_agent() to avoid API key requirement at import
- Use output_type instead of result_type (pydantic-ai API)
- Use result.output instead of result.data
- Fix line length issues in tests
- Proper mocking of get_model in tests

src/agents/report_agent.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Report agent for generating structured research reports."""
2
+
3
+ from collections.abc import AsyncIterable
4
+ from typing import TYPE_CHECKING, Any
5
+
6
+ from agent_framework import (
7
+ AgentRunResponse,
8
+ AgentRunResponseUpdate,
9
+ AgentThread,
10
+ BaseAgent,
11
+ ChatMessage,
12
+ Role,
13
+ )
14
+ from pydantic_ai import Agent
15
+
16
+ from src.agent_factory.judges import get_model
17
+ from src.prompts.report import SYSTEM_PROMPT, format_report_prompt
18
+ from src.utils.citation_validator import validate_references
19
+ from src.utils.models import Evidence, ResearchReport
20
+
21
+ if TYPE_CHECKING:
22
+ from src.services.embeddings import EmbeddingService
23
+
24
+
25
+ class ReportAgent(BaseAgent): # type: ignore[misc]
26
+ """Generates structured scientific reports from evidence and hypotheses."""
27
+
28
+ def __init__(
29
+ self,
30
+ evidence_store: dict[str, Any],
31
+ embedding_service: "EmbeddingService | None" = None, # For diverse selection
32
+ ) -> None:
33
+ super().__init__(
34
+ name="ReportAgent",
35
+ description="Generates structured scientific research reports with citations",
36
+ )
37
+ self._evidence_store = evidence_store
38
+ self._embeddings = embedding_service
39
+ self._agent: Agent[None, ResearchReport] | None = None # Lazy init
40
+
41
+ def _get_agent(self) -> Agent[None, ResearchReport]:
42
+ """Lazy initialization of LLM agent to avoid requiring API keys at import."""
43
+ if self._agent is None:
44
+ self._agent = Agent(
45
+ model=get_model(),
46
+ output_type=ResearchReport,
47
+ system_prompt=SYSTEM_PROMPT,
48
+ )
49
+ return self._agent
50
+
51
+ async def run(
52
+ self,
53
+ messages: str | ChatMessage | list[str] | list[ChatMessage] | None = None,
54
+ *,
55
+ thread: AgentThread | None = None,
56
+ **kwargs: Any,
57
+ ) -> AgentRunResponse:
58
+ """Generate research report."""
59
+ query = self._extract_query(messages)
60
+
61
+ # Gather all context
62
+ evidence: list[Evidence] = self._evidence_store.get("current", [])
63
+ hypotheses = self._evidence_store.get("hypotheses", [])
64
+ assessment = self._evidence_store.get("last_assessment", {})
65
+
66
+ if not evidence:
67
+ return AgentRunResponse(
68
+ messages=[
69
+ ChatMessage(
70
+ role=Role.ASSISTANT,
71
+ text="Cannot generate report: No evidence collected.",
72
+ )
73
+ ],
74
+ response_id="report-no-evidence",
75
+ )
76
+
77
+ # Build metadata
78
+ metadata = {
79
+ "sources": list(set(e.citation.source for e in evidence)),
80
+ "iterations": self._evidence_store.get("iteration_count", 0),
81
+ }
82
+
83
+ # Generate report (format_report_prompt is now async)
84
+ prompt = await format_report_prompt(
85
+ query=query,
86
+ evidence=evidence,
87
+ hypotheses=hypotheses,
88
+ assessment=assessment,
89
+ metadata=metadata,
90
+ embeddings=self._embeddings,
91
+ )
92
+
93
+ result = await self._get_agent().run(prompt)
94
+ report = result.output
95
+
96
+ # ═══════════════════════════════════════════════════════════════════
97
+ # 🚨 CRITICAL: Validate citations to prevent hallucination
98
+ # ═══════════════════════════════════════════════════════════════════
99
+ report = validate_references(report, evidence)
100
+
101
+ # Store validated report
102
+ self._evidence_store["final_report"] = report
103
+
104
+ # Return markdown version
105
+ return AgentRunResponse(
106
+ messages=[ChatMessage(role=Role.ASSISTANT, text=report.to_markdown())],
107
+ response_id="report-complete",
108
+ additional_properties={"report": report.model_dump()},
109
+ )
110
+
111
+ def _extract_query(
112
+ self, messages: str | ChatMessage | list[str] | list[ChatMessage] | None
113
+ ) -> str:
114
+ """Extract query from messages."""
115
+ if isinstance(messages, str):
116
+ return messages
117
+ elif isinstance(messages, ChatMessage):
118
+ return messages.text or ""
119
+ elif isinstance(messages, list):
120
+ for msg in reversed(messages):
121
+ if isinstance(msg, ChatMessage) and msg.role == Role.USER:
122
+ return msg.text or ""
123
+ elif isinstance(msg, str):
124
+ return msg
125
+ return ""
126
+
127
+ async def run_stream(
128
+ self,
129
+ messages: str | ChatMessage | list[str] | list[ChatMessage] | None = None,
130
+ *,
131
+ thread: AgentThread | None = None,
132
+ **kwargs: Any,
133
+ ) -> AsyncIterable[AgentRunResponseUpdate]:
134
+ """Streaming wrapper."""
135
+ result = await self.run(messages, thread=thread, **kwargs)
136
+ yield AgentRunResponseUpdate(messages=result.messages, response_id=result.response_id)
src/orchestrator_magentic.py CHANGED
@@ -25,6 +25,7 @@ from agent_framework.openai import OpenAIChatClient
25
 
26
  from src.agents.hypothesis_agent import HypothesisAgent
27
  from src.agents.judge_agent import JudgeAgent
 
28
  from src.agents.search_agent import SearchAgent
29
  from src.orchestrator import JudgeHandlerProtocol, SearchHandlerProtocol
30
  from src.utils.config import settings
@@ -81,6 +82,7 @@ class MagenticOrchestrator:
81
  search_agent: SearchAgent,
82
  hypothesis_agent: HypothesisAgent,
83
  judge_agent: JudgeAgent,
 
84
  ) -> Any:
85
  """Build the Magentic workflow with participants."""
86
  if not settings.openai_api_key:
@@ -95,6 +97,7 @@ class MagenticOrchestrator:
95
  searcher=search_agent,
96
  hypothesizer=hypothesis_agent,
97
  judge=judge_agent,
 
98
  )
99
  .with_standard_manager(
100
  chat_client=OpenAIChatClient(
@@ -124,12 +127,22 @@ Workflow:
124
  2. HypothesisAgent: Generate mechanistic hypotheses (Drug -> Target -> Pathway -> Effect).
125
  3. SearcherAgent: Use hypothesis-suggested queries for targeted search.
126
  4. JudgeAgent: Evaluate if evidence supports hypotheses.
127
- 5. Repeat until confident or max rounds.
 
128
 
129
  Focus on:
130
  - Identifying specific molecular targets
131
  - Understanding mechanism of action
132
  - Finding supporting/contradicting evidence for hypotheses
 
 
 
 
 
 
 
 
 
133
  """
134
 
135
  async def run(self, query: str) -> AsyncGenerator[AgentEvent, None]:
@@ -155,9 +168,10 @@ Focus on:
155
  hypothesis_agent = HypothesisAgent(
156
  self._evidence_store, embedding_service=embedding_service
157
  )
 
158
 
159
  # Build workflow and task
160
- workflow = self._build_workflow(search_agent, hypothesis_agent, judge_agent)
161
  task = self._format_task(query, embedding_service is not None)
162
 
163
  iteration = 0
@@ -249,6 +263,12 @@ Focus on:
249
  message=f"Judge agent: {_truncate(msg_text)}",
250
  iteration=iteration,
251
  )
 
 
 
 
 
 
252
  return AgentEvent(
253
  type="judging",
254
  message=f"{agent_name}: {_truncate(msg_text)}",
 
25
 
26
  from src.agents.hypothesis_agent import HypothesisAgent
27
  from src.agents.judge_agent import JudgeAgent
28
+ from src.agents.report_agent import ReportAgent
29
  from src.agents.search_agent import SearchAgent
30
  from src.orchestrator import JudgeHandlerProtocol, SearchHandlerProtocol
31
  from src.utils.config import settings
 
82
  search_agent: SearchAgent,
83
  hypothesis_agent: HypothesisAgent,
84
  judge_agent: JudgeAgent,
85
+ report_agent: ReportAgent,
86
  ) -> Any:
87
  """Build the Magentic workflow with participants."""
88
  if not settings.openai_api_key:
 
97
  searcher=search_agent,
98
  hypothesizer=hypothesis_agent,
99
  judge=judge_agent,
100
+ reporter=report_agent,
101
  )
102
  .with_standard_manager(
103
  chat_client=OpenAIChatClient(
 
127
  2. HypothesisAgent: Generate mechanistic hypotheses (Drug -> Target -> Pathway -> Effect).
128
  3. SearcherAgent: Use hypothesis-suggested queries for targeted search.
129
  4. JudgeAgent: Evaluate if evidence supports hypotheses.
130
+ 5. If sufficient -> ReportAgent: Generate structured research report.
131
+ 6. If not sufficient -> Repeat from step 1 with refined queries.
132
 
133
  Focus on:
134
  - Identifying specific molecular targets
135
  - Understanding mechanism of action
136
  - Finding supporting/contradicting evidence for hypotheses
137
+
138
+ The final output should be a complete research report with:
139
+ - Executive summary
140
+ - Methodology
141
+ - Hypotheses tested
142
+ - Mechanistic and clinical findings
143
+ - Drug candidates
144
+ - Limitations
145
+ - Conclusion with references
146
  """
147
 
148
  async def run(self, query: str) -> AsyncGenerator[AgentEvent, None]:
 
168
  hypothesis_agent = HypothesisAgent(
169
  self._evidence_store, embedding_service=embedding_service
170
  )
171
+ report_agent = ReportAgent(self._evidence_store, embedding_service=embedding_service)
172
 
173
  # Build workflow and task
174
+ workflow = self._build_workflow(search_agent, hypothesis_agent, judge_agent, report_agent)
175
  task = self._format_task(query, embedding_service is not None)
176
 
177
  iteration = 0
 
263
  message=f"Judge agent: {_truncate(msg_text)}",
264
  iteration=iteration,
265
  )
266
+ elif "report" in agent_name.lower():
267
+ return AgentEvent(
268
+ type="synthesizing",
269
+ message="Report generated successfully.",
270
+ iteration=iteration,
271
+ )
272
  return AgentEvent(
273
  type="judging",
274
  message=f"{agent_name}: {_truncate(msg_text)}",
src/prompts/report.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Prompts for Report Agent."""
2
+
3
+ from typing import TYPE_CHECKING, Any
4
+
5
+ from src.utils.text_utils import select_diverse_evidence, truncate_at_sentence
6
+
7
+ if TYPE_CHECKING:
8
+ from src.services.embeddings import EmbeddingService
9
+ from src.utils.models import Evidence, MechanismHypothesis
10
+
11
+ SYSTEM_PROMPT = """You are a scientific writer specializing in drug repurposing research reports.
12
+
13
+ Your role is to synthesize evidence and hypotheses into a clear, structured report.
14
+
15
+ A good report:
16
+ 1. Has a clear EXECUTIVE SUMMARY (one paragraph, key takeaways)
17
+ 2. States the RESEARCH QUESTION clearly
18
+ 3. Describes METHODOLOGY (what was searched, how)
19
+ 4. Evaluates HYPOTHESES with evidence counts
20
+ 5. Separates MECHANISTIC and CLINICAL findings
21
+ 6. Lists specific DRUG CANDIDATES
22
+ 7. Acknowledges LIMITATIONS honestly
23
+ 8. Provides a balanced CONCLUSION
24
+ 9. Includes properly formatted REFERENCES
25
+
26
+ Write in scientific but accessible language. Be specific about evidence strength.
27
+
28
+ ─────────────────────────────────────────────────────────────────────────────
29
+ 🚨 CRITICAL CITATION REQUIREMENTS 🚨
30
+ ─────────────────────────────────────────────────────────────────────────────
31
+
32
+ You MUST follow these rules for the References section:
33
+
34
+ 1. You may ONLY cite papers that appear in the Evidence section above
35
+ 2. Every reference URL must EXACTLY match a provided evidence URL
36
+ 3. Do NOT invent, fabricate, or hallucinate any references
37
+ 4. Do NOT modify paper titles, authors, dates, or URLs
38
+ 5. If unsure about a citation, OMIT it rather than guess
39
+ 6. Copy URLs exactly as provided - do not create similar-looking URLs
40
+
41
+ VIOLATION OF THESE RULES PRODUCES DANGEROUS MISINFORMATION.
42
+ ─────────────────────────────────────────────────────────────────────────────"""
43
+
44
+
45
+ async def format_report_prompt(
46
+ query: str,
47
+ evidence: list["Evidence"],
48
+ hypotheses: list["MechanismHypothesis"],
49
+ assessment: dict[str, Any],
50
+ metadata: dict[str, Any],
51
+ embeddings: "EmbeddingService | None" = None,
52
+ ) -> str:
53
+ """Format prompt for report generation.
54
+
55
+ Includes full evidence details for accurate citation.
56
+ """
57
+ # Select diverse evidence (not arbitrary truncation)
58
+ selected = await select_diverse_evidence(evidence, n=20, query=query, embeddings=embeddings)
59
+
60
+ # Include FULL citation details for each evidence item
61
+ # This helps the LLM create accurate references
62
+ evidence_lines = []
63
+ for e in selected:
64
+ authors = ", ".join(e.citation.authors or ["Unknown"])
65
+ evidence_lines.append(
66
+ f"- **Title**: {e.citation.title}\n"
67
+ f" **URL**: {e.citation.url}\n"
68
+ f" **Authors**: {authors}\n"
69
+ f" **Date**: {e.citation.date or 'n.d.'}\n"
70
+ f" **Source**: {e.citation.source}\n"
71
+ f" **Content**: {truncate_at_sentence(e.content, 200)}\n"
72
+ )
73
+ evidence_summary = "\n".join(evidence_lines)
74
+
75
+ if hypotheses:
76
+ hypotheses_lines = []
77
+ for h in hypotheses:
78
+ hypotheses_lines.append(
79
+ f"- {h.drug} -> {h.target} -> {h.pathway} -> {h.effect} "
80
+ f"(Confidence: {h.confidence:.0%})"
81
+ )
82
+ hypotheses_summary = "\n".join(hypotheses_lines)
83
+ else:
84
+ hypotheses_summary = "No hypotheses generated yet."
85
+
86
+ sources = ", ".join(metadata.get("sources", []))
87
+
88
+ return f"""Generate a structured research report for the following query.
89
+
90
+ ## Original Query
91
+ {query}
92
+
93
+ ## Evidence Collected ({len(selected)} papers, selected for diversity)
94
+
95
+ {evidence_summary}
96
+
97
+ ## Hypotheses Generated
98
+ {hypotheses_summary}
99
+
100
+ ## Assessment Scores
101
+ - Mechanism Score: {assessment.get('mechanism_score', 'N/A')}/10
102
+ - Clinical Evidence Score: {assessment.get('clinical_score', 'N/A')}/10
103
+ - Overall Confidence: {assessment.get('confidence', 0):.0%}
104
+
105
+ ## Metadata
106
+ - Sources Searched: {sources}
107
+ - Search Iterations: {metadata.get('iterations', 0)}
108
+
109
+ Generate a complete ResearchReport with all sections filled in.
110
+
111
+ REMINDER: Only cite papers from the Evidence section above. Copy URLs exactly."""
src/utils/citation_validator.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Citation validation to prevent LLM hallucination.
2
+
3
+ CRITICAL: Medical research requires accurate citations.
4
+ This module validates that all references exist in collected evidence.
5
+ """
6
+
7
+ import logging
8
+ from typing import TYPE_CHECKING
9
+
10
+ if TYPE_CHECKING:
11
+ from src.utils.models import Evidence, ResearchReport
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+
16
+ def validate_references(report: "ResearchReport", evidence: list["Evidence"]) -> "ResearchReport":
17
+ """Ensure all references actually exist in collected evidence.
18
+
19
+ CRITICAL: Prevents LLM hallucination of citations.
20
+
21
+ Args:
22
+ report: The generated research report
23
+ evidence: All evidence collected during research
24
+
25
+ Returns:
26
+ Report with only valid references (hallucinated ones removed)
27
+ """
28
+ # Build set of valid URLs from evidence
29
+ valid_urls = {e.citation.url for e in evidence}
30
+ # Also check titles (case-insensitive) as fallback
31
+ valid_titles = {e.citation.title.lower() for e in evidence}
32
+
33
+ validated_refs = []
34
+ removed_count = 0
35
+
36
+ for ref in report.references:
37
+ ref_url = ref.get("url", "")
38
+ ref_title = ref.get("title", "").lower()
39
+
40
+ # Check if URL matches collected evidence
41
+ if ref_url in valid_urls:
42
+ validated_refs.append(ref)
43
+ # Fallback: check title match (URLs might differ slightly)
44
+ elif ref_title and any(ref_title in t or t in ref_title for t in valid_titles):
45
+ validated_refs.append(ref)
46
+ else:
47
+ removed_count += 1
48
+ logger.warning(
49
+ f"Removed hallucinated reference: '{ref.get('title', 'Unknown')}' "
50
+ f"(URL: {ref_url[:50]}...)"
51
+ )
52
+
53
+ if removed_count > 0:
54
+ logger.info(
55
+ f"Citation validation removed {removed_count} hallucinated references. "
56
+ f"{len(validated_refs)} valid references remain."
57
+ )
58
+
59
+ # Update report with validated references
60
+ report.references = validated_refs
61
+ return report
62
+
63
+
64
+ def build_reference_from_evidence(evidence: "Evidence") -> dict[str, str]:
65
+ """Build a properly formatted reference from evidence.
66
+
67
+ Use this to ensure references match the original evidence exactly.
68
+ """
69
+ return {
70
+ "title": evidence.citation.title,
71
+ "authors": ", ".join(evidence.citation.authors or ["Unknown"]),
72
+ "source": evidence.citation.source,
73
+ "date": evidence.citation.date or "n.d.",
74
+ "url": evidence.citation.url,
75
+ }
src/utils/models.py CHANGED
@@ -172,6 +172,105 @@ class HypothesisAssessment(BaseModel):
172
  recommended_searches: list[str] = Field(description="Searches to fill knowledge gaps")
173
 
174
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  class OrchestratorConfig(BaseModel):
176
  """Configuration for the orchestrator."""
177
 
 
172
  recommended_searches: list[str] = Field(description="Searches to fill knowledge gaps")
173
 
174
 
175
+ class ReportSection(BaseModel):
176
+ """A section of the research report."""
177
+
178
+ title: str
179
+ content: str
180
+ citations: list[str] = Field(default_factory=list)
181
+
182
+
183
+ class ResearchReport(BaseModel):
184
+ """Structured scientific report."""
185
+
186
+ title: str = Field(description="Report title")
187
+ executive_summary: str = Field(
188
+ description="One-paragraph summary for quick reading", min_length=100, max_length=1000
189
+ )
190
+ research_question: str = Field(description="Clear statement of what was investigated")
191
+
192
+ methodology: ReportSection = Field(description="How the research was conducted")
193
+ hypotheses_tested: list[dict[str, Any]] = Field(
194
+ description="Hypotheses with supporting/contradicting evidence counts"
195
+ )
196
+
197
+ mechanistic_findings: ReportSection = Field(description="Findings about drug mechanisms")
198
+ clinical_findings: ReportSection = Field(
199
+ description="Findings from clinical/preclinical studies"
200
+ )
201
+
202
+ drug_candidates: list[str] = Field(description="Identified drug candidates")
203
+ limitations: list[str] = Field(description="Study limitations")
204
+ conclusion: str = Field(description="Overall conclusion")
205
+
206
+ references: list[dict[str, str]] = Field(
207
+ description="Formatted references with title, authors, source, URL"
208
+ )
209
+
210
+ # Metadata
211
+ sources_searched: list[str] = Field(default_factory=list)
212
+ total_papers_reviewed: int = 0
213
+ search_iterations: int = 0
214
+ confidence_score: float = Field(ge=0, le=1)
215
+
216
+ def to_markdown(self) -> str:
217
+ """Render report as markdown."""
218
+ sections = [
219
+ f"# {self.title}\n",
220
+ f"## Executive Summary\n{self.executive_summary}\n",
221
+ f"## Research Question\n{self.research_question}\n",
222
+ f"## Methodology\n{self.methodology.content}\n",
223
+ ]
224
+
225
+ # Hypotheses
226
+ sections.append("## Hypotheses Tested\n")
227
+ for h in self.hypotheses_tested:
228
+ supported = h.get("supported", 0)
229
+ contradicted = h.get("contradicted", 0)
230
+ status = "βœ… Supported" if supported > contradicted else "⚠️ Mixed"
231
+ sections.append(
232
+ f"- **{h.get('mechanism', 'Unknown')}** ({status}): "
233
+ f"{supported} supporting, {contradicted} contradicting\n"
234
+ )
235
+
236
+ # Findings
237
+ sections.append(f"## Mechanistic Findings\n{self.mechanistic_findings.content}\n")
238
+ sections.append(f"## Clinical Findings\n{self.clinical_findings.content}\n")
239
+
240
+ # Drug candidates
241
+ sections.append("## Drug Candidates\n")
242
+ for drug in self.drug_candidates:
243
+ sections.append(f"- **{drug}**\n")
244
+
245
+ # Limitations
246
+ sections.append("## Limitations\n")
247
+ for lim in self.limitations:
248
+ sections.append(f"- {lim}\n")
249
+
250
+ # Conclusion
251
+ sections.append(f"## Conclusion\n{self.conclusion}\n")
252
+
253
+ # References
254
+ sections.append("## References\n")
255
+ for i, ref in enumerate(self.references, 1):
256
+ sections.append(
257
+ f"{i}. {ref.get('authors', 'Unknown')}. "
258
+ f"*{ref.get('title', 'Untitled')}*. "
259
+ f"{ref.get('source', '')} ({ref.get('date', '')}). "
260
+ f"[Link]({ref.get('url', '#')})\n"
261
+ )
262
+
263
+ # Metadata footer
264
+ sections.append("\n---\n")
265
+ sections.append(
266
+ f"*Report generated from {self.total_papers_reviewed} papers "
267
+ f"across {self.search_iterations} search iterations. "
268
+ f"Confidence: {self.confidence_score:.0%}*"
269
+ )
270
+
271
+ return "\n".join(sections)
272
+
273
+
274
  class OrchestratorConfig(BaseModel):
275
  """Configuration for the orchestrator."""
276
 
tests/unit/agents/test_report_agent.py ADDED
@@ -0,0 +1,228 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Unit tests for ReportAgent."""
2
+
3
+ from typing import Any
4
+ from unittest.mock import AsyncMock, MagicMock, patch
5
+
6
+ import pytest
7
+
8
+ from src.agents.report_agent import ReportAgent
9
+ from src.utils.models import (
10
+ Citation,
11
+ Evidence,
12
+ MechanismHypothesis,
13
+ ReportSection,
14
+ ResearchReport,
15
+ )
16
+
17
+
18
+ @pytest.fixture
19
+ def sample_evidence() -> list[Evidence]:
20
+ return [
21
+ Evidence(
22
+ content="Metformin activates AMPK...",
23
+ citation=Citation(
24
+ source="pubmed",
25
+ title="Metformin mechanisms",
26
+ url="https://pubmed.ncbi.nlm.nih.gov/12345/",
27
+ date="2023",
28
+ authors=["Smith J", "Jones A"],
29
+ ),
30
+ )
31
+ ]
32
+
33
+
34
+ @pytest.fixture
35
+ def sample_hypotheses() -> list[MechanismHypothesis]:
36
+ return [
37
+ MechanismHypothesis(
38
+ drug="Metformin",
39
+ target="AMPK",
40
+ pathway="mTOR inhibition",
41
+ effect="Neuroprotection",
42
+ confidence=0.8,
43
+ search_suggestions=[],
44
+ )
45
+ ]
46
+
47
+
48
+ @pytest.fixture
49
+ def mock_report() -> ResearchReport:
50
+ return ResearchReport(
51
+ title="Drug Repurposing Analysis: Metformin for Alzheimer's",
52
+ executive_summary=(
53
+ "This report analyzes metformin as a potential candidate for "
54
+ "repurposing in Alzheimer's disease treatment. It summarizes "
55
+ "findings from mechanistic studies showing AMPK activation effects "
56
+ "and reviews clinical data. The evidence suggests a potential "
57
+ "neuroprotective role, although clinical trials are still limited."
58
+ ),
59
+ research_question="Can metformin be repurposed for Alzheimer's disease?",
60
+ methodology=ReportSection(
61
+ title="Methodology", content="Searched PubMed and web sources..."
62
+ ),
63
+ hypotheses_tested=[
64
+ {"mechanism": "Metformin -> AMPK -> neuroprotection", "supported": 5, "contradicted": 1}
65
+ ],
66
+ mechanistic_findings=ReportSection(
67
+ title="Mechanistic Findings", content="Evidence suggests AMPK activation..."
68
+ ),
69
+ clinical_findings=ReportSection(
70
+ title="Clinical Findings", content="Limited clinical data available..."
71
+ ),
72
+ drug_candidates=["Metformin"],
73
+ limitations=["Abstract-level analysis only"],
74
+ conclusion="Metformin shows promise...",
75
+ references=[],
76
+ sources_searched=["pubmed", "web"],
77
+ total_papers_reviewed=10,
78
+ search_iterations=3,
79
+ confidence_score=0.75,
80
+ )
81
+
82
+
83
+ @pytest.mark.asyncio
84
+ async def test_report_agent_generates_report(
85
+ sample_evidence: list[Evidence],
86
+ sample_hypotheses: list[MechanismHypothesis],
87
+ mock_report: ResearchReport,
88
+ ) -> None:
89
+ """ReportAgent should generate structured report."""
90
+ store: dict[str, Any] = {
91
+ "current": sample_evidence,
92
+ "hypotheses": sample_hypotheses,
93
+ "last_assessment": {"mechanism_score": 8, "clinical_score": 6},
94
+ }
95
+
96
+ with (
97
+ patch("src.agents.report_agent.get_model") as mock_get_model,
98
+ patch("src.agents.report_agent.Agent") as mock_agent_class,
99
+ ):
100
+ mock_get_model.return_value = MagicMock()
101
+ mock_result = MagicMock()
102
+ mock_result.output = mock_report
103
+ mock_agent_class.return_value.run = AsyncMock(return_value=mock_result)
104
+
105
+ agent = ReportAgent(store)
106
+ response = await agent.run("metformin alzheimer")
107
+
108
+ assert response.messages[0].text is not None
109
+ assert "Executive Summary" in response.messages[0].text
110
+ assert "Methodology" in response.messages[0].text
111
+ assert "References" in response.messages[0].text
112
+
113
+
114
+ @pytest.mark.asyncio
115
+ async def test_report_agent_no_evidence() -> None:
116
+ """ReportAgent should handle empty evidence gracefully."""
117
+ store: dict[str, Any] = {"current": [], "hypotheses": []}
118
+
119
+ # Lazy init means no patching needed - agent only instantiated when run() has evidence
120
+ agent = ReportAgent(store)
121
+ response = await agent.run("test query")
122
+
123
+ assert response.messages[0].text is not None
124
+ assert "Cannot generate report" in response.messages[0].text
125
+
126
+
127
+ # ═══════════════════════════════════════════════════════════════════════════
128
+ # 🚨 CRITICAL: Citation Validation Tests
129
+ # ═══════════════════════════════════════════════════════════════════════════
130
+
131
+
132
+ @pytest.mark.asyncio
133
+ async def test_report_agent_removes_hallucinated_citations(
134
+ sample_evidence: list[Evidence],
135
+ ) -> None:
136
+ """ReportAgent should remove citations not in evidence."""
137
+ from src.utils.citation_validator import validate_references
138
+
139
+ # Create report with mix of valid and hallucinated references
140
+ report_with_hallucinations = ResearchReport(
141
+ title="Test Report",
142
+ executive_summary=(
143
+ "This is a test report for citation validation. It needs to be "
144
+ "sufficiently long to pass validation. We are ensuring that the "
145
+ "system correctly identifies and removes citations that do not "
146
+ "appear in collected evidence. This prevents hallucinations."
147
+ ),
148
+ research_question="Testing citation validation",
149
+ methodology=ReportSection(title="Methodology", content="Test"),
150
+ hypotheses_tested=[],
151
+ mechanistic_findings=ReportSection(title="Mechanistic", content="Test"),
152
+ clinical_findings=ReportSection(title="Clinical", content="Test"),
153
+ drug_candidates=["TestDrug"],
154
+ limitations=["Test limitation"],
155
+ conclusion="Test conclusion",
156
+ references=[
157
+ # Valid reference (matches sample_evidence)
158
+ {
159
+ "title": "Metformin mechanisms",
160
+ "url": "https://pubmed.ncbi.nlm.nih.gov/12345/",
161
+ "authors": "Smith J, Jones A",
162
+ "date": "2023",
163
+ "source": "pubmed",
164
+ },
165
+ # HALLUCINATED reference (URL doesn't exist in evidence)
166
+ {
167
+ "title": "Fake Paper That Doesn't Exist",
168
+ "url": "https://fake-journal.com/made-up-paper",
169
+ "authors": "Hallucinated A",
170
+ "date": "2024",
171
+ "source": "fake",
172
+ },
173
+ # Another HALLUCINATED reference
174
+ {
175
+ "title": "Invented Research",
176
+ "url": "https://pubmed.ncbi.nlm.nih.gov/99999999/",
177
+ "authors": "NotReal B",
178
+ "date": "2025",
179
+ "source": "pubmed",
180
+ },
181
+ ],
182
+ sources_searched=["pubmed"],
183
+ total_papers_reviewed=1,
184
+ search_iterations=1,
185
+ confidence_score=0.5,
186
+ )
187
+
188
+ # Validate - should remove hallucinated references
189
+ validated_report = validate_references(report_with_hallucinations, sample_evidence)
190
+
191
+ # Only the valid reference should remain
192
+ assert len(validated_report.references) == 1
193
+ assert validated_report.references[0]["title"] == "Metformin mechanisms"
194
+ # Check that "Fake Paper" is NOT in the string representation of the references list
195
+ # (This is a bit safer than checking presence in list of dicts if structure varies)
196
+ ref_urls = [r.get("url") for r in validated_report.references]
197
+ assert "https://fake-journal.com/made-up-paper" not in ref_urls
198
+
199
+
200
+ def test_citation_validator_handles_empty_references() -> None:
201
+ """Citation validator should handle reports with no references."""
202
+ from src.utils.citation_validator import validate_references
203
+
204
+ report = ResearchReport(
205
+ title="Empty Refs Report",
206
+ executive_summary=(
207
+ "This report has no references. It is designed to test the "
208
+ "validator's handling of empty reference lists. We must ensure "
209
+ "that the system does not crash when a report contains no "
210
+ "citations. This is a valid edge case in early-stage research."
211
+ ),
212
+ research_question="Testing empty refs",
213
+ methodology=ReportSection(title="Methodology", content="Test"),
214
+ hypotheses_tested=[],
215
+ mechanistic_findings=ReportSection(title="Mechanistic", content="Test"),
216
+ clinical_findings=ReportSection(title="Clinical", content="Test"),
217
+ drug_candidates=[],
218
+ limitations=[],
219
+ conclusion="Test",
220
+ references=[], # Empty!
221
+ sources_searched=[],
222
+ total_papers_reviewed=0,
223
+ search_iterations=0,
224
+ confidence_score=0.0,
225
+ )
226
+
227
+ validated = validate_references(report, [])
228
+ assert validated.references == []