|
|
import gradio as gr |
|
|
from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns |
|
|
import pandas as pd |
|
|
from apscheduler.schedulers.background import BackgroundScheduler |
|
|
import json |
|
|
import os |
|
|
import datetime |
|
|
import urllib.parse |
|
|
|
|
|
from src.about import ( |
|
|
CITATION_BUTTON_LABEL, |
|
|
CITATION_BUTTON_TEXT, |
|
|
EVALUATION_QUEUE_TEXT, |
|
|
INTRODUCTION_TEXT, |
|
|
LLM_BENCHMARKS_TEXT, |
|
|
TITLE, |
|
|
) |
|
|
from src.display.css_html_js import custom_css |
|
|
from src.display.utils import ( |
|
|
BENCHMARK_COLS, |
|
|
COLS, |
|
|
EVAL_COLS, |
|
|
EVAL_TYPES, |
|
|
AutoEvalColumn, |
|
|
ModelType, |
|
|
fields, |
|
|
WeightType, |
|
|
Precision, |
|
|
) |
|
|
from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN |
|
|
from src.populate import get_evaluation_queue_df, get_leaderboard_df |
|
|
|
|
|
|
|
|
def restart_space(): |
|
|
API.restart_space(repo_id=REPO_ID) |
|
|
|
|
|
|
|
|
def save_submission_and_notify(model_name, contact_email, weight_link, json_results, paper_link, description): |
|
|
"""Save submission to file and provide instructions for email""" |
|
|
try: |
|
|
|
|
|
if json_results.strip(): |
|
|
try: |
|
|
json.loads(json_results) |
|
|
except json.JSONDecodeError: |
|
|
return "β Invalid JSON format in results field" |
|
|
|
|
|
|
|
|
submission_data = { |
|
|
"timestamp": datetime.datetime.now().isoformat(), |
|
|
"model_name": model_name, |
|
|
"contact_email": contact_email, |
|
|
"weight_link": weight_link, |
|
|
"paper_link": paper_link, |
|
|
"description": description, |
|
|
"json_results": json_results, |
|
|
} |
|
|
|
|
|
|
|
|
os.makedirs("submissions", exist_ok=True) |
|
|
filename = ( |
|
|
f"submissions/{model_name.replace('/', '_')}_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.json" |
|
|
) |
|
|
|
|
|
with open(filename, "w") as f: |
|
|
json.dump(submission_data, f, indent=2) |
|
|
|
|
|
|
|
|
subject = f"SearchAgent Leaderboard Submission: {model_name}" |
|
|
body = f"""New model submission for SearchAgent Leaderboard: |
|
|
|
|
|
Model Name: {model_name} |
|
|
Contact Email: {contact_email} |
|
|
Weight Link: {weight_link} |
|
|
Paper Link: {paper_link} |
|
|
Description: {description} |
|
|
|
|
|
JSON Results: |
|
|
{json_results}""" |
|
|
|
|
|
|
|
|
mailto_link = ( |
|
|
f"mailto:[email protected]?subject={urllib.parse.quote(subject)}&body={urllib.parse.quote(body[:500])}" |
|
|
) |
|
|
|
|
|
return f"""β
Submission saved successfully! |
|
|
|
|
|
π§ **Please send your submission to: [email protected]** |
|
|
|
|
|
You can either: |
|
|
1. Click here to open your email client: [Send Email](mailto:[email protected]) |
|
|
2. Or copy the submission details above and send manually |
|
|
|
|
|
Your submission has been saved to: {filename} |
|
|
|
|
|
We'll review your model and get back to you at {contact_email}.""" |
|
|
|
|
|
except Exception as e: |
|
|
return f"β Failed to save submission: {str(e)}" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try: |
|
|
print(EVAL_REQUESTS_PATH) |
|
|
|
|
|
import os |
|
|
|
|
|
if not os.path.exists(EVAL_REQUESTS_PATH): |
|
|
os.makedirs(EVAL_REQUESTS_PATH, exist_ok=True) |
|
|
|
|
|
|
|
|
|
|
|
except Exception as e: |
|
|
print(f"Could not setup eval requests path: {e}") |
|
|
try: |
|
|
print(EVAL_RESULTS_PATH) |
|
|
|
|
|
if not os.path.exists(EVAL_RESULTS_PATH): |
|
|
os.makedirs(EVAL_RESULTS_PATH, exist_ok=True) |
|
|
|
|
|
|
|
|
|
|
|
except Exception as e: |
|
|
print(f"Could not setup eval results path: {e}") |
|
|
|
|
|
|
|
|
def _debug_print_dataframe(name: str, dataframe: pd.DataFrame) -> None: |
|
|
if dataframe is None: |
|
|
print(f"[debug] {name}: DataFrame is None") |
|
|
return |
|
|
print(f"[debug] {name}: shape={dataframe.shape}, columns={list(dataframe.columns)}") |
|
|
if not dataframe.empty: |
|
|
preview = dataframe.head().to_dict(orient="records") |
|
|
print(f"[debug] {name}: head={preview}") |
|
|
else: |
|
|
print(f"[debug] {name}: DataFrame is empty") |
|
|
|
|
|
|
|
|
def _debug_list_dir(label: str, path: str, limit: int = 10) -> None: |
|
|
try: |
|
|
entries = os.listdir(path) |
|
|
print(f"[debug] {label}: path={path}, count={len(entries)}, preview={entries[:limit]}") |
|
|
except FileNotFoundError: |
|
|
print(f"[debug] {label}: path={path} not found") |
|
|
except Exception as exc: |
|
|
print(f"[debug] {label}: path={path} error={exc}") |
|
|
|
|
|
|
|
|
_debug_list_dir("EVAL_RESULTS", EVAL_RESULTS_PATH) |
|
|
_debug_list_dir("EVAL_QUEUE", EVAL_REQUESTS_PATH) |
|
|
|
|
|
LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS) |
|
|
_debug_print_dataframe("LEADERBOARD", LEADERBOARD_DF) |
|
|
|
|
|
( |
|
|
finished_eval_queue_df, |
|
|
running_eval_queue_df, |
|
|
pending_eval_queue_df, |
|
|
) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS) |
|
|
_debug_print_dataframe("EVAL_QUEUE_FINISHED", finished_eval_queue_df) |
|
|
_debug_print_dataframe("EVAL_QUEUE_RUNNING", running_eval_queue_df) |
|
|
_debug_print_dataframe("EVAL_QUEUE_PENDING", pending_eval_queue_df) |
|
|
|
|
|
|
|
|
def init_leaderboard(dataframe): |
|
|
if dataframe is None or dataframe.empty: |
|
|
raise ValueError("Leaderboard DataFrame is empty or None.") |
|
|
return Leaderboard( |
|
|
value=dataframe, |
|
|
datatype=[c.type for c in fields(AutoEvalColumn)], |
|
|
select_columns=SelectColumns( |
|
|
default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default], |
|
|
cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden], |
|
|
label="Select Columns to Display:", |
|
|
), |
|
|
search_columns=[AutoEvalColumn.model.name], |
|
|
hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden], |
|
|
filter_columns=[ |
|
|
ColumnFilter(AutoEvalColumn.model_size.name, type="checkboxgroup", label="Model Size"), |
|
|
], |
|
|
bool_checkboxgroup_label="Hide models", |
|
|
interactive=False, |
|
|
) |
|
|
|
|
|
|
|
|
def create_demo(): |
|
|
"""Create the Gradio interface.""" |
|
|
with gr.Blocks(css=custom_css) as demo: |
|
|
gr.HTML(TITLE) |
|
|
|
|
|
with gr.Tabs(elem_classes="tab-buttons") as tabs: |
|
|
print("[debug] Rendering leaderboard tab start") |
|
|
with gr.TabItem("π
SearchAgent Benchmark", elem_id="llm-benchmark-tab-table", id=0): |
|
|
leaderboard = init_leaderboard(LEADERBOARD_DF) |
|
|
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") |
|
|
print("[debug] Rendering leaderboard tab done") |
|
|
|
|
|
print("[debug] Rendering about tab start") |
|
|
with gr.TabItem("π About", elem_id="llm-benchmark-tab-table", id=2): |
|
|
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text") |
|
|
print("[debug] Rendering about tab done") |
|
|
|
|
|
print("[debug] Rendering submit tab start") |
|
|
with gr.TabItem("π Submit here! ", elem_id="llm-benchmark-tab-table", id=3): |
|
|
with gr.Column(): |
|
|
with gr.Row(): |
|
|
gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text") |
|
|
print("[debug] Rendering submit tab done") |
|
|
|
|
|
with gr.Row(): |
|
|
print("[debug] Rendering citation start") |
|
|
with gr.Accordion("π Citation", open=False): |
|
|
gr.Textbox( |
|
|
value=CITATION_BUTTON_TEXT, |
|
|
label=CITATION_BUTTON_LABEL, |
|
|
lines=20, |
|
|
elem_id="citation-button", |
|
|
show_copy_button=True, |
|
|
) |
|
|
print("[debug] Rendering citation done") |
|
|
|
|
|
return demo |
|
|
|
|
|
|
|
|
demo = create_demo() |
|
|
|
|
|
scheduler = BackgroundScheduler() |
|
|
scheduler.add_job(restart_space, "interval", seconds=1800) |
|
|
scheduler.start() |
|
|
demo.launch(show_error=True) |
|
|
|