MetaSearch / test_api.py
Tirath5504's picture
use openrouter only instead of google-genai
08a5a31
"""
Local test script for the MetaSearch API
Tests individual pipeline components with sample data
"""
import asyncio
import os
from dotenv import load_dotenv
load_dotenv()
# Sample test data
SAMPLE_PAPER_TITLE = "Attention Is All You Need"
SAMPLE_PAPER_ABSTRACT = """
We propose a new simple network architecture, the Transformer, based solely on
attention mechanisms, dispensing with recurrence and convolutions entirely.
Experiments on two machine translation tasks show these models to be superior
in quality while being more parallelizable and requiring significantly less time to train.
"""
SAMPLE_REVIEWS = [
"""
This paper introduces a novel architecture that replaces recurrence with self-attention.
Strengths:
- The model achieves state-of-the-art results on translation benchmarks
- Training is significantly faster due to parallelization
- The attention visualization provides interpretability
Weaknesses:
- Limited evaluation on other NLP tasks beyond translation
- The computational complexity of self-attention scales quadratically with sequence length
- Missing comparison with some recent RNN variants
The methodology is sound but could benefit from more diverse experiments.
Overall, this is a strong contribution to the field.
""",
"""
The Transformer architecture is an interesting departure from RNN-based models.
Strengths:
- Clean and elegant architecture design
- Strong empirical results on WMT benchmarks
- Good ablation studies
Weaknesses:
- The paper overclaims novelty - attention mechanisms existed before
- Experiments are limited to machine translation only
- No theoretical analysis of why this works better
- Memory requirements are high for long sequences
The significance of this work is questionable given the narrow evaluation scope.
""",
"""
This is a well-written paper with clear presentation of a new architecture.
Strengths:
- Excellent results, setting new SOTA on translation
- The multi-head attention is a clever innovation
- Reproducibility details are provided
Weaknesses:
- Claims of "attention is all you need" are overstated
- Limited to sequence-to-sequence tasks
- Positional encoding seems like a hack
Overall a solid paper with important contributions despite some limitations.
"""
]
async def test_critique_extraction():
"""Test the critique extraction module"""
print("\n" + "="*60)
print("Testing Critique Extraction")
print("="*60)
from pipeline.critique_extraction import extract_critiques
print(f"Processing {len(SAMPLE_REVIEWS)} reviews...")
critiques = await extract_critiques(SAMPLE_REVIEWS)
for i, critique in enumerate(critiques):
print(f"\n--- Review {i+1} Critiques ---")
for category, points in critique.items():
if category != "error" and points:
print(f" {category}: {len(points)} points")
for point in points[:2]: # Show first 2 points
print(f" - {point[:80]}...")
return critiques
async def test_disagreement_detection(critiques):
"""Test the disagreement detection module"""
print("\n" + "="*60)
print("Testing Disagreement Detection")
print("="*60)
from pipeline.disagreement_detection import detect_disagreements
print(f"Detecting disagreements across {len(critiques)} reviews...")
disagreements = await detect_disagreements(critiques)
for d in disagreements:
pair = d.get('review_pair', [])
score = d.get('disagreement_score', 0)
print(f"\n--- Reviews {pair[0]+1} vs {pair[1]+1} ---")
print(f" Disagreement Score: {score:.2f}")
details = d.get('disagreement_details', {})
for category, points in details.items():
if points:
print(f" {category}: {len(points)} disagreements")
return disagreements
async def test_search_retrieval(critiques):
"""Test the search and retrieval module"""
print("\n" + "="*60)
print("Testing Search & Retrieval")
print("="*60)
from pipeline.search_retrieval import search_and_retrieve
print("Searching for SoTA research and evidence...")
results = await search_and_retrieve(
SAMPLE_PAPER_TITLE,
SAMPLE_PAPER_ABSTRACT,
critiques
)
print(f"\n--- SoTA Results (first 500 chars) ---")
print(results.get('SoTA_Results', 'N/A')[:500])
print(f"\n--- Combined Critiques ---")
for cat, text in results.get('Combined_Critiques', {}).items():
print(f" {cat}: {len(text)} chars")
print(f"\n--- Retrieved Evidence ---")
for cat, evidence in results.get('Retrieved_Evidence', {}).items():
print(f" {cat}: {len(evidence)} chars")
return results
async def test_disagreement_resolution(critiques, disagreements, search_results):
"""Test the disagreement resolution module"""
print("\n" + "="*60)
print("Testing Disagreement Resolution")
print("="*60)
from pipeline.disagreement_resolution import resolve_disagreements
print(f"Resolving {len(disagreements)} disagreements...")
resolutions = await resolve_disagreements(
SAMPLE_PAPER_TITLE,
SAMPLE_PAPER_ABSTRACT,
disagreements,
critiques,
search_results
)
for i, resolution in enumerate(resolutions):
print(f"\n--- Resolution {i+1} ---")
details = resolution.get('resolution_details', {})
accepted = details.get('accepted_critique_points', {})
rejected = details.get('rejected_critique_points', {})
print(f" Accepted categories: {list(accepted.keys())}")
print(f" Rejected categories: {list(rejected.keys())}")
summary = details.get('final_resolution_summary', '')
print(f" Summary: {summary[:200]}...")
return resolutions
async def test_meta_review(resolutions, search_results):
"""Test the meta-review generation module"""
print("\n" + "="*60)
print("Testing Meta-Review Generation")
print("="*60)
from pipeline.meta_review import generate_meta_review
print("Generating meta-review...")
meta_review = await generate_meta_review(
SAMPLE_PAPER_TITLE,
SAMPLE_PAPER_ABSTRACT,
resolutions,
search_results
)
print(f"\n--- Meta-Review (first 1000 chars) ---")
print(meta_review[:1000])
print("...")
return meta_review
async def run_full_pipeline():
"""Run the complete pipeline test"""
print("\n" + "#"*60)
print("# MetaSearch API - Full Pipeline Test")
print("#"*60)
# Check environment
if not os.getenv("OPENROUTER_API_KEY"):
print("\n❌ ERROR: OPENROUTER_API_KEY not set!")
print("Please set it in your .env file")
return
print("\n✅ OPENROUTER_API_KEY is set")
try:
# Step 1: Extract critiques
critiques = await test_critique_extraction()
# Step 2: Detect disagreements
disagreements = await test_disagreement_detection(critiques)
# Step 3: Search and retrieve (optional - can be slow)
search_results = await test_search_retrieval(critiques)
# Step 4: Resolve disagreements
resolutions = await test_disagreement_resolution(
critiques, disagreements, search_results
)
# Step 5: Generate meta-review
meta_review = await test_meta_review(resolutions, search_results)
print("\n" + "#"*60)
print("# ✅ Full Pipeline Test Complete!")
print("#"*60)
except Exception as e:
print(f"\n❌ Pipeline failed with error: {e}")
import traceback
traceback.print_exc()
async def run_quick_test():
"""Run a quick test of just critique extraction"""
print("\n" + "#"*60)
print("# MetaSearch API - Quick Test (Critique Extraction Only)")
print("#"*60)
if not os.getenv("OPENROUTER_API_KEY"):
print("\n❌ ERROR: OPENROUTER_API_KEY not set!")
return
print("\n✅ OPENROUTER_API_KEY is set")
try:
critiques = await test_critique_extraction()
print("\n✅ Quick test passed!")
except Exception as e:
print(f"\n❌ Test failed: {e}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
import sys
if len(sys.argv) > 1 and sys.argv[1] == "--quick":
asyncio.run(run_quick_test())
else:
asyncio.run(run_full_pipeline())