Spaces:

Tirath5504
/

MetaSearch

Sleeping

App Files Files Community

MetaSearch / test_api.py

Tirath5504

use openrouter only instead of google-genai

08a5a31 12 days ago

raw

history blame contribute delete

8.79 kB

	"""
	Local test script for the MetaSearch API
	Tests individual pipeline components with sample data
	"""

	import asyncio
	import os
	from dotenv import load_dotenv

	load_dotenv()

	# Sample test data
	SAMPLE_PAPER_TITLE = "Attention Is All You Need"
	SAMPLE_PAPER_ABSTRACT = """
	We propose a new simple network architecture, the Transformer, based solely on
	attention mechanisms, dispensing with recurrence and convolutions entirely.
	Experiments on two machine translation tasks show these models to be superior
	in quality while being more parallelizable and requiring significantly less time to train.
	"""

	SAMPLE_REVIEWS = [
	"""
	This paper introduces a novel architecture that replaces recurrence with self-attention.

	Strengths:
	- The model achieves state-of-the-art results on translation benchmarks
	- Training is significantly faster due to parallelization
	- The attention visualization provides interpretability

	Weaknesses:
	- Limited evaluation on other NLP tasks beyond translation
	- The computational complexity of self-attention scales quadratically with sequence length
	- Missing comparison with some recent RNN variants

	The methodology is sound but could benefit from more diverse experiments.
	Overall, this is a strong contribution to the field.
	""",
	"""
	The Transformer architecture is an interesting departure from RNN-based models.

	Strengths:
	- Clean and elegant architecture design
	- Strong empirical results on WMT benchmarks
	- Good ablation studies

	Weaknesses:
	- The paper overclaims novelty - attention mechanisms existed before
	- Experiments are limited to machine translation only
	- No theoretical analysis of why this works better
	- Memory requirements are high for long sequences

	The significance of this work is questionable given the narrow evaluation scope.
	""",
	"""
	This is a well-written paper with clear presentation of a new architecture.

	Strengths:
	- Excellent results, setting new SOTA on translation
	- The multi-head attention is a clever innovation
	- Reproducibility details are provided

	Weaknesses:
	- Claims of "attention is all you need" are overstated
	- Limited to sequence-to-sequence tasks
	- Positional encoding seems like a hack

	Overall a solid paper with important contributions despite some limitations.
	"""
	]


	async def test_critique_extraction():
	"""Test the critique extraction module"""
	print("\n" + "="*60)
	print("Testing Critique Extraction")
	print("="*60)

	from pipeline.critique_extraction import extract_critiques

	print(f"Processing {len(SAMPLE_REVIEWS)} reviews...")
	critiques = await extract_critiques(SAMPLE_REVIEWS)

	for i, critique in enumerate(critiques):
	print(f"\n--- Review {i+1} Critiques ---")
	for category, points in critique.items():
	if category != "error" and points:
	print(f" {category}: {len(points)} points")
	for point in points[:2]: # Show first 2 points
	print(f" - {point[:80]}...")

	return critiques


	async def test_disagreement_detection(critiques):
	"""Test the disagreement detection module"""
	print("\n" + "="*60)
	print("Testing Disagreement Detection")
	print("="*60)

	from pipeline.disagreement_detection import detect_disagreements

	print(f"Detecting disagreements across {len(critiques)} reviews...")
	disagreements = await detect_disagreements(critiques)

	for d in disagreements:
	pair = d.get('review_pair', [])
	score = d.get('disagreement_score', 0)
	print(f"\n--- Reviews {pair[0]+1} vs {pair[1]+1} ---")
	print(f" Disagreement Score: {score:.2f}")

	details = d.get('disagreement_details', {})
	for category, points in details.items():
	if points:
	print(f" {category}: {len(points)} disagreements")

	return disagreements


	async def test_search_retrieval(critiques):
	"""Test the search and retrieval module"""
	print("\n" + "="*60)
	print("Testing Search & Retrieval")
	print("="*60)

	from pipeline.search_retrieval import search_and_retrieve

	print("Searching for SoTA research and evidence...")
	results = await search_and_retrieve(
	SAMPLE_PAPER_TITLE,
	SAMPLE_PAPER_ABSTRACT,
	critiques
	)

	print(f"\n--- SoTA Results (first 500 chars) ---")
	print(results.get('SoTA_Results', 'N/A')[:500])

	print(f"\n--- Combined Critiques ---")
	for cat, text in results.get('Combined_Critiques', {}).items():
	print(f" {cat}: {len(text)} chars")

	print(f"\n--- Retrieved Evidence ---")
	for cat, evidence in results.get('Retrieved_Evidence', {}).items():
	print(f" {cat}: {len(evidence)} chars")

	return results


	async def test_disagreement_resolution(critiques, disagreements, search_results):
	"""Test the disagreement resolution module"""
	print("\n" + "="*60)
	print("Testing Disagreement Resolution")
	print("="*60)

	from pipeline.disagreement_resolution import resolve_disagreements

	print(f"Resolving {len(disagreements)} disagreements...")
	resolutions = await resolve_disagreements(
	SAMPLE_PAPER_TITLE,
	SAMPLE_PAPER_ABSTRACT,
	disagreements,
	critiques,
	search_results
	)

	for i, resolution in enumerate(resolutions):
	print(f"\n--- Resolution {i+1} ---")
	details = resolution.get('resolution_details', {})

	accepted = details.get('accepted_critique_points', {})
	rejected = details.get('rejected_critique_points', {})

	print(f" Accepted categories: {list(accepted.keys())}")
	print(f" Rejected categories: {list(rejected.keys())}")

	summary = details.get('final_resolution_summary', '')
	print(f" Summary: {summary[:200]}...")

	return resolutions


	async def test_meta_review(resolutions, search_results):
	"""Test the meta-review generation module"""
	print("\n" + "="*60)
	print("Testing Meta-Review Generation")
	print("="*60)

	from pipeline.meta_review import generate_meta_review

	print("Generating meta-review...")
	meta_review = await generate_meta_review(
	SAMPLE_PAPER_TITLE,
	SAMPLE_PAPER_ABSTRACT,
	resolutions,
	search_results
	)

	print(f"\n--- Meta-Review (first 1000 chars) ---")
	print(meta_review[:1000])
	print("...")

	return meta_review


	async def run_full_pipeline():
	"""Run the complete pipeline test"""
	print("\n" + "#"*60)
	print("# MetaSearch API - Full Pipeline Test")
	print("#"*60)

	# Check environment
	if not os.getenv("OPENROUTER_API_KEY"):
	print("\n❌ ERROR: OPENROUTER_API_KEY not set!")
	print("Please set it in your .env file")
	return

	print("\n✅ OPENROUTER_API_KEY is set")

	try:
	# Step 1: Extract critiques
	critiques = await test_critique_extraction()

	# Step 2: Detect disagreements
	disagreements = await test_disagreement_detection(critiques)

	# Step 3: Search and retrieve (optional - can be slow)
	search_results = await test_search_retrieval(critiques)

	# Step 4: Resolve disagreements
	resolutions = await test_disagreement_resolution(
	critiques, disagreements, search_results
	)

	# Step 5: Generate meta-review
	meta_review = await test_meta_review(resolutions, search_results)

	print("\n" + "#"*60)
	print("# ✅ Full Pipeline Test Complete!")
	print("#"*60)

	except Exception as e:
	print(f"\n❌ Pipeline failed with error: {e}")
	import traceback
	traceback.print_exc()


	async def run_quick_test():
	"""Run a quick test of just critique extraction"""
	print("\n" + "#"*60)
	print("# MetaSearch API - Quick Test (Critique Extraction Only)")
	print("#"*60)

	if not os.getenv("OPENROUTER_API_KEY"):
	print("\n❌ ERROR: OPENROUTER_API_KEY not set!")
	return

	print("\n✅ OPENROUTER_API_KEY is set")

	try:
	critiques = await test_critique_extraction()
	print("\n✅ Quick test passed!")
	except Exception as e:
	print(f"\n❌ Test failed: {e}")
	import traceback
	traceback.print_exc()


	if __name__ == "__main__":
	import sys

	if len(sys.argv) > 1 and sys.argv[1] == "--quick":
	asyncio.run(run_quick_test())
	else:
	asyncio.run(run_full_pipeline())