Spaces:
Sleeping
Sleeping
| """ | |
| Local test script for the MetaSearch API | |
| Tests individual pipeline components with sample data | |
| """ | |
| import asyncio | |
| import os | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| # Sample test data | |
| SAMPLE_PAPER_TITLE = "Attention Is All You Need" | |
| SAMPLE_PAPER_ABSTRACT = """ | |
| We propose a new simple network architecture, the Transformer, based solely on | |
| attention mechanisms, dispensing with recurrence and convolutions entirely. | |
| Experiments on two machine translation tasks show these models to be superior | |
| in quality while being more parallelizable and requiring significantly less time to train. | |
| """ | |
| SAMPLE_REVIEWS = [ | |
| """ | |
| This paper introduces a novel architecture that replaces recurrence with self-attention. | |
| Strengths: | |
| - The model achieves state-of-the-art results on translation benchmarks | |
| - Training is significantly faster due to parallelization | |
| - The attention visualization provides interpretability | |
| Weaknesses: | |
| - Limited evaluation on other NLP tasks beyond translation | |
| - The computational complexity of self-attention scales quadratically with sequence length | |
| - Missing comparison with some recent RNN variants | |
| The methodology is sound but could benefit from more diverse experiments. | |
| Overall, this is a strong contribution to the field. | |
| """, | |
| """ | |
| The Transformer architecture is an interesting departure from RNN-based models. | |
| Strengths: | |
| - Clean and elegant architecture design | |
| - Strong empirical results on WMT benchmarks | |
| - Good ablation studies | |
| Weaknesses: | |
| - The paper overclaims novelty - attention mechanisms existed before | |
| - Experiments are limited to machine translation only | |
| - No theoretical analysis of why this works better | |
| - Memory requirements are high for long sequences | |
| The significance of this work is questionable given the narrow evaluation scope. | |
| """, | |
| """ | |
| This is a well-written paper with clear presentation of a new architecture. | |
| Strengths: | |
| - Excellent results, setting new SOTA on translation | |
| - The multi-head attention is a clever innovation | |
| - Reproducibility details are provided | |
| Weaknesses: | |
| - Claims of "attention is all you need" are overstated | |
| - Limited to sequence-to-sequence tasks | |
| - Positional encoding seems like a hack | |
| Overall a solid paper with important contributions despite some limitations. | |
| """ | |
| ] | |
| async def test_critique_extraction(): | |
| """Test the critique extraction module""" | |
| print("\n" + "="*60) | |
| print("Testing Critique Extraction") | |
| print("="*60) | |
| from pipeline.critique_extraction import extract_critiques | |
| print(f"Processing {len(SAMPLE_REVIEWS)} reviews...") | |
| critiques = await extract_critiques(SAMPLE_REVIEWS) | |
| for i, critique in enumerate(critiques): | |
| print(f"\n--- Review {i+1} Critiques ---") | |
| for category, points in critique.items(): | |
| if category != "error" and points: | |
| print(f" {category}: {len(points)} points") | |
| for point in points[:2]: # Show first 2 points | |
| print(f" - {point[:80]}...") | |
| return critiques | |
| async def test_disagreement_detection(critiques): | |
| """Test the disagreement detection module""" | |
| print("\n" + "="*60) | |
| print("Testing Disagreement Detection") | |
| print("="*60) | |
| from pipeline.disagreement_detection import detect_disagreements | |
| print(f"Detecting disagreements across {len(critiques)} reviews...") | |
| disagreements = await detect_disagreements(critiques) | |
| for d in disagreements: | |
| pair = d.get('review_pair', []) | |
| score = d.get('disagreement_score', 0) | |
| print(f"\n--- Reviews {pair[0]+1} vs {pair[1]+1} ---") | |
| print(f" Disagreement Score: {score:.2f}") | |
| details = d.get('disagreement_details', {}) | |
| for category, points in details.items(): | |
| if points: | |
| print(f" {category}: {len(points)} disagreements") | |
| return disagreements | |
| async def test_search_retrieval(critiques): | |
| """Test the search and retrieval module""" | |
| print("\n" + "="*60) | |
| print("Testing Search & Retrieval") | |
| print("="*60) | |
| from pipeline.search_retrieval import search_and_retrieve | |
| print("Searching for SoTA research and evidence...") | |
| results = await search_and_retrieve( | |
| SAMPLE_PAPER_TITLE, | |
| SAMPLE_PAPER_ABSTRACT, | |
| critiques | |
| ) | |
| print(f"\n--- SoTA Results (first 500 chars) ---") | |
| print(results.get('SoTA_Results', 'N/A')[:500]) | |
| print(f"\n--- Combined Critiques ---") | |
| for cat, text in results.get('Combined_Critiques', {}).items(): | |
| print(f" {cat}: {len(text)} chars") | |
| print(f"\n--- Retrieved Evidence ---") | |
| for cat, evidence in results.get('Retrieved_Evidence', {}).items(): | |
| print(f" {cat}: {len(evidence)} chars") | |
| return results | |
| async def test_disagreement_resolution(critiques, disagreements, search_results): | |
| """Test the disagreement resolution module""" | |
| print("\n" + "="*60) | |
| print("Testing Disagreement Resolution") | |
| print("="*60) | |
| from pipeline.disagreement_resolution import resolve_disagreements | |
| print(f"Resolving {len(disagreements)} disagreements...") | |
| resolutions = await resolve_disagreements( | |
| SAMPLE_PAPER_TITLE, | |
| SAMPLE_PAPER_ABSTRACT, | |
| disagreements, | |
| critiques, | |
| search_results | |
| ) | |
| for i, resolution in enumerate(resolutions): | |
| print(f"\n--- Resolution {i+1} ---") | |
| details = resolution.get('resolution_details', {}) | |
| accepted = details.get('accepted_critique_points', {}) | |
| rejected = details.get('rejected_critique_points', {}) | |
| print(f" Accepted categories: {list(accepted.keys())}") | |
| print(f" Rejected categories: {list(rejected.keys())}") | |
| summary = details.get('final_resolution_summary', '') | |
| print(f" Summary: {summary[:200]}...") | |
| return resolutions | |
| async def test_meta_review(resolutions, search_results): | |
| """Test the meta-review generation module""" | |
| print("\n" + "="*60) | |
| print("Testing Meta-Review Generation") | |
| print("="*60) | |
| from pipeline.meta_review import generate_meta_review | |
| print("Generating meta-review...") | |
| meta_review = await generate_meta_review( | |
| SAMPLE_PAPER_TITLE, | |
| SAMPLE_PAPER_ABSTRACT, | |
| resolutions, | |
| search_results | |
| ) | |
| print(f"\n--- Meta-Review (first 1000 chars) ---") | |
| print(meta_review[:1000]) | |
| print("...") | |
| return meta_review | |
| async def run_full_pipeline(): | |
| """Run the complete pipeline test""" | |
| print("\n" + "#"*60) | |
| print("# MetaSearch API - Full Pipeline Test") | |
| print("#"*60) | |
| # Check environment | |
| if not os.getenv("OPENROUTER_API_KEY"): | |
| print("\n❌ ERROR: OPENROUTER_API_KEY not set!") | |
| print("Please set it in your .env file") | |
| return | |
| print("\n✅ OPENROUTER_API_KEY is set") | |
| try: | |
| # Step 1: Extract critiques | |
| critiques = await test_critique_extraction() | |
| # Step 2: Detect disagreements | |
| disagreements = await test_disagreement_detection(critiques) | |
| # Step 3: Search and retrieve (optional - can be slow) | |
| search_results = await test_search_retrieval(critiques) | |
| # Step 4: Resolve disagreements | |
| resolutions = await test_disagreement_resolution( | |
| critiques, disagreements, search_results | |
| ) | |
| # Step 5: Generate meta-review | |
| meta_review = await test_meta_review(resolutions, search_results) | |
| print("\n" + "#"*60) | |
| print("# ✅ Full Pipeline Test Complete!") | |
| print("#"*60) | |
| except Exception as e: | |
| print(f"\n❌ Pipeline failed with error: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| async def run_quick_test(): | |
| """Run a quick test of just critique extraction""" | |
| print("\n" + "#"*60) | |
| print("# MetaSearch API - Quick Test (Critique Extraction Only)") | |
| print("#"*60) | |
| if not os.getenv("OPENROUTER_API_KEY"): | |
| print("\n❌ ERROR: OPENROUTER_API_KEY not set!") | |
| return | |
| print("\n✅ OPENROUTER_API_KEY is set") | |
| try: | |
| critiques = await test_critique_extraction() | |
| print("\n✅ Quick test passed!") | |
| except Exception as e: | |
| print(f"\n❌ Test failed: {e}") | |
| import traceback | |
| traceback.print_exc() | |
| if __name__ == "__main__": | |
| import sys | |
| if len(sys.argv) > 1 and sys.argv[1] == "--quick": | |
| asyncio.run(run_quick_test()) | |
| else: | |
| asyncio.run(run_full_pipeline()) | |