""" Local test script for the MetaSearch API Tests individual pipeline components with sample data """ import asyncio import os from dotenv import load_dotenv load_dotenv() # Sample test data SAMPLE_PAPER_TITLE = "Attention Is All You Need" SAMPLE_PAPER_ABSTRACT = """ We propose a new simple network architecture, the Transformer, based solely on attention mechanisms, dispensing with recurrence and convolutions entirely. Experiments on two machine translation tasks show these models to be superior in quality while being more parallelizable and requiring significantly less time to train. """ SAMPLE_REVIEWS = [ """ This paper introduces a novel architecture that replaces recurrence with self-attention. Strengths: - The model achieves state-of-the-art results on translation benchmarks - Training is significantly faster due to parallelization - The attention visualization provides interpretability Weaknesses: - Limited evaluation on other NLP tasks beyond translation - The computational complexity of self-attention scales quadratically with sequence length - Missing comparison with some recent RNN variants The methodology is sound but could benefit from more diverse experiments. Overall, this is a strong contribution to the field. """, """ The Transformer architecture is an interesting departure from RNN-based models. Strengths: - Clean and elegant architecture design - Strong empirical results on WMT benchmarks - Good ablation studies Weaknesses: - The paper overclaims novelty - attention mechanisms existed before - Experiments are limited to machine translation only - No theoretical analysis of why this works better - Memory requirements are high for long sequences The significance of this work is questionable given the narrow evaluation scope. """, """ This is a well-written paper with clear presentation of a new architecture. Strengths: - Excellent results, setting new SOTA on translation - The multi-head attention is a clever innovation - Reproducibility details are provided Weaknesses: - Claims of "attention is all you need" are overstated - Limited to sequence-to-sequence tasks - Positional encoding seems like a hack Overall a solid paper with important contributions despite some limitations. """ ] async def test_critique_extraction(): """Test the critique extraction module""" print("\n" + "="*60) print("Testing Critique Extraction") print("="*60) from pipeline.critique_extraction import extract_critiques print(f"Processing {len(SAMPLE_REVIEWS)} reviews...") critiques = await extract_critiques(SAMPLE_REVIEWS) for i, critique in enumerate(critiques): print(f"\n--- Review {i+1} Critiques ---") for category, points in critique.items(): if category != "error" and points: print(f" {category}: {len(points)} points") for point in points[:2]: # Show first 2 points print(f" - {point[:80]}...") return critiques async def test_disagreement_detection(critiques): """Test the disagreement detection module""" print("\n" + "="*60) print("Testing Disagreement Detection") print("="*60) from pipeline.disagreement_detection import detect_disagreements print(f"Detecting disagreements across {len(critiques)} reviews...") disagreements = await detect_disagreements(critiques) for d in disagreements: pair = d.get('review_pair', []) score = d.get('disagreement_score', 0) print(f"\n--- Reviews {pair[0]+1} vs {pair[1]+1} ---") print(f" Disagreement Score: {score:.2f}") details = d.get('disagreement_details', {}) for category, points in details.items(): if points: print(f" {category}: {len(points)} disagreements") return disagreements async def test_search_retrieval(critiques): """Test the search and retrieval module""" print("\n" + "="*60) print("Testing Search & Retrieval") print("="*60) from pipeline.search_retrieval import search_and_retrieve print("Searching for SoTA research and evidence...") results = await search_and_retrieve( SAMPLE_PAPER_TITLE, SAMPLE_PAPER_ABSTRACT, critiques ) print(f"\n--- SoTA Results (first 500 chars) ---") print(results.get('SoTA_Results', 'N/A')[:500]) print(f"\n--- Combined Critiques ---") for cat, text in results.get('Combined_Critiques', {}).items(): print(f" {cat}: {len(text)} chars") print(f"\n--- Retrieved Evidence ---") for cat, evidence in results.get('Retrieved_Evidence', {}).items(): print(f" {cat}: {len(evidence)} chars") return results async def test_disagreement_resolution(critiques, disagreements, search_results): """Test the disagreement resolution module""" print("\n" + "="*60) print("Testing Disagreement Resolution") print("="*60) from pipeline.disagreement_resolution import resolve_disagreements print(f"Resolving {len(disagreements)} disagreements...") resolutions = await resolve_disagreements( SAMPLE_PAPER_TITLE, SAMPLE_PAPER_ABSTRACT, disagreements, critiques, search_results ) for i, resolution in enumerate(resolutions): print(f"\n--- Resolution {i+1} ---") details = resolution.get('resolution_details', {}) accepted = details.get('accepted_critique_points', {}) rejected = details.get('rejected_critique_points', {}) print(f" Accepted categories: {list(accepted.keys())}") print(f" Rejected categories: {list(rejected.keys())}") summary = details.get('final_resolution_summary', '') print(f" Summary: {summary[:200]}...") return resolutions async def test_meta_review(resolutions, search_results): """Test the meta-review generation module""" print("\n" + "="*60) print("Testing Meta-Review Generation") print("="*60) from pipeline.meta_review import generate_meta_review print("Generating meta-review...") meta_review = await generate_meta_review( SAMPLE_PAPER_TITLE, SAMPLE_PAPER_ABSTRACT, resolutions, search_results ) print(f"\n--- Meta-Review (first 1000 chars) ---") print(meta_review[:1000]) print("...") return meta_review async def run_full_pipeline(): """Run the complete pipeline test""" print("\n" + "#"*60) print("# MetaSearch API - Full Pipeline Test") print("#"*60) # Check environment if not os.getenv("OPENROUTER_API_KEY"): print("\n❌ ERROR: OPENROUTER_API_KEY not set!") print("Please set it in your .env file") return print("\n✅ OPENROUTER_API_KEY is set") try: # Step 1: Extract critiques critiques = await test_critique_extraction() # Step 2: Detect disagreements disagreements = await test_disagreement_detection(critiques) # Step 3: Search and retrieve (optional - can be slow) search_results = await test_search_retrieval(critiques) # Step 4: Resolve disagreements resolutions = await test_disagreement_resolution( critiques, disagreements, search_results ) # Step 5: Generate meta-review meta_review = await test_meta_review(resolutions, search_results) print("\n" + "#"*60) print("# ✅ Full Pipeline Test Complete!") print("#"*60) except Exception as e: print(f"\n❌ Pipeline failed with error: {e}") import traceback traceback.print_exc() async def run_quick_test(): """Run a quick test of just critique extraction""" print("\n" + "#"*60) print("# MetaSearch API - Quick Test (Critique Extraction Only)") print("#"*60) if not os.getenv("OPENROUTER_API_KEY"): print("\n❌ ERROR: OPENROUTER_API_KEY not set!") return print("\n✅ OPENROUTER_API_KEY is set") try: critiques = await test_critique_extraction() print("\n✅ Quick test passed!") except Exception as e: print(f"\n❌ Test failed: {e}") import traceback traceback.print_exc() if __name__ == "__main__": import sys if len(sys.argv) > 1 and sys.argv[1] == "--quick": asyncio.run(run_quick_test()) else: asyncio.run(run_full_pipeline())