ko-freshqa-leaderboard / src /submission_handler.py
jisubae
fix: HF login error and future warning
cd13f52
from __future__ import annotations
import os
import time
import queue
from dataclasses import dataclass
from typing import Any, Optional, Dict, Tuple, Callable
import pandas as pd
import gradio as gr
from config import Config
from src.submission_tracker import get_submission_tracker, SubmissionTracker
from src.quick_csv_loader import quick_load_csv
from src.leaderboard_manager import append_to_leaderboard_data
from src.utils import get_current_datetime_str
from freshqa.fresheval_parallel import evaluate_dataframe_parallel
from freshqa.freshqa_acc import process_freshqa_dataframe, calculate_accuracy
from freshqa.merge_csv_with_model_response import merge_dataframe_with_model_response_df
# -------------------------
# ๊ณตํ†ต ๋ฐ˜ํ™˜ํ˜•(Result)
# -------------------------
@dataclass
class Result:
ok: bool
data: Optional[Any] = None
error: Optional[str] = None
meta: Optional[Dict] = None
# -------------------------
# ํ•ต์‹ฌ ํ•ธ๋“ค๋Ÿฌ
# -------------------------
class SubmissionHandler:
"""
์ œ์ถœ ํŒŒ์ผ ์ฒ˜๋ฆฌ ๋ฐ FreshQA ํ‰๊ฐ€ ์˜ค์ผ€์ŠคํŠธ๋ ˆ์ด์…˜.
- Tracker/Config ์˜์กด์„ฑ ์ฃผ์ž…
- ๋‚ด๋ถ€ helper๋Š” Result/๋ช…ํ™•ํ•œ ํƒ€์ž… ๋ฐ˜ํ™˜
- ์‹ค์ œ ์ €์žฅ/ํ•œ๋„/์‚ฌ์šฉ์ž ID๋Š” tracker๊ฐ€ ์ฒ˜๋ฆฌ(ํ•ธ๋“ค๋Ÿฌ๋Š” ํ˜ธ์ถœ๋งŒ)
"""
def __init__(self, tracker: Optional[SubmissionTracker] = None, cfg: Optional[type] = None):
# Dependency Injection
self.tracker = tracker
self.cfg = cfg or Config
self.enable_limit = getattr(self.cfg, "ENABLE_SUBMISSION_LIMIT", False)
self.repo_id = getattr(self.cfg, "FRESHQA_DATA_REPO_ID", None)
self.filename = getattr(self.cfg, "FRESHQA_DATA_FILENAME", None)
self.hf_token = getattr(self.cfg, "HF_TOKEN", None)
# ํ•„์ˆ˜ ์„ค์ • ์ ๊ฒ€
if not self.repo_id:
raise ValueError("โŒ FRESHQA_DATA_REPO_ID ํ™˜๊ฒฝ ๋ณ€์ˆ˜๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
if not self.filename:
raise ValueError("โŒ FRESHQA_DATA_FILENAME ํ™˜๊ฒฝ ๋ณ€์ˆ˜๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
if not self.hf_token:
raise ValueError("โŒ HF_TOKEN ํ™˜๊ฒฝ ๋ณ€์ˆ˜๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
# --------- 1) ์ œ์ถœ ํŒŒ์ผ ๊ฒ€์ฆ ----------
def _validate_submission_file(self, file) -> Result:
if file is None:
return Result(ok=False, error="โŒ CSV ํŒŒ์ผ์„ ์—…๋กœ๋“œํ•ด์ฃผ์„ธ์š”.")
try:
df = pd.read_csv(file.name)
except Exception as e:
return Result(ok=False, error=f"โŒ CSV ๋กœ๋”ฉ ์‹คํŒจ: {e}")
required_columns = ["question", "model_response"]
for col in required_columns:
if col not in df.columns:
return Result(ok=False, error=f"โŒ CSV ํŒŒ์ผ์˜ ์ปฌ๋Ÿผ์— '{col}'์ด(๊ฐ€) ์—†์Šต๋‹ˆ๋‹ค.")
if len(df) == 0:
return Result(ok=False, error="โŒ CSV ํŒŒ์ผ์— ๋ฐ์ดํ„ฐ๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
if df["question"].isnull().any() or df["model_response"].isnull().any():
return Result(ok=False, error="โŒ 'question' ๋˜๋Š” 'model_response' ์ปฌ๋Ÿผ์— ๋ˆ„๋ฝ๋œ ๊ฐ’์ด ์žˆ์Šต๋‹ˆ๋‹ค.")
return Result(ok=True)
# --------- 2) ๋น ๋ฅธ ๋กœ๋”ฉ ----------
def _load_submission_df(self, file) -> Result:
try:
df = quick_load_csv(self.repo_id, self.filename, self.hf_token)
except Exception as e:
return Result(ok=False, error=f"โŒ CSV ๋กœ๋”ฉ ์‹คํŒจ: {e}")
return Result(ok=True, data=df)
# --------- 3) ๋ณ‘ํ•ฉ ----------
def _merge_with_base(self, submission_df: pd.DataFrame, file_name: str) -> Result:
try:
merged_df = merge_dataframe_with_model_response_df(submission_df, file_name)
return Result(ok=True, data=merged_df)
except Exception as e:
return Result(ok=False, error=f"โŒ ๊ธฐ์ค€ ๋ฐ์ดํ„ฐ์™€ ๋ณ‘ํ•ฉ ์‹คํŒจ: {e}")
# --------- 4) ํ‰๊ฐ€ ----------
def _evaluate_freshqa(
self,
merged_df: pd.DataFrame,
on_progress: Optional[Callable[[int, int, str], None]] = None,
) -> Result:
"""Relaxed/Strict ๋™์‹œ ์‹คํ–‰ + ํ ๊ธฐ๋ฐ˜ ์ง„ํ–‰๋ฅ  ๊ฐฑ์‹ """
q: "queue.Queue[Tuple[int, int, str]]" = queue.Queue()
# ๋‘ ๋ชจ๋“œ(Relaxed, Strict)๋ฅผ ๋ณ‘๋ ฌ๋กœ ์ฒ˜๋ฆฌํ•˜๋ฏ€๋กœ ์ด ์ง„ํ–‰ ๋‹จ์œ„๋Š” 2๋ฐฐ
total_items = len(merged_df) * 2
done_count = 0
def _drain_queue(block: bool = False):
nonlocal done_count
while True:
try:
item = q.get(block=block, timeout=0.05 if block else 0)
except Exception:
break
try:
# ์ตœ์‹  ์ปค๋ฐ‹ ๊ธฐ์ค€: progress_queue์—๋Š” 1์”ฉ ์ฆ๊ฐ€ํ•˜๋Š” ์ •์ˆ˜๋งŒ ๋“ค์–ด์˜ต๋‹ˆ๋‹ค.
if isinstance(item, int):
done_count += item
if on_progress:
remaining = max(total_items - done_count, 0)
desc_text = f"ํ‰๊ฐ€ ์ค‘... {done_count}/{total_items}"
on_progress(done_count, total_items, desc_text)
# ํ˜น์‹œ ๊ณผ๊ฑฐ ํฌ๋งท(tuple)์ด ๋“ค์–ด์˜ค๋”๋ผ๋„ ๋ฐฉ์–ด์ ์œผ๋กœ ์ฒ˜๋ฆฌ
elif isinstance(item, tuple) and len(item) == 3 and on_progress:
on_progress(item[0], item[1], item[2])
finally:
q.task_done()
from concurrent.futures import ThreadPoolExecutor
try:
with ThreadPoolExecutor(max_workers=2) as ex:
relaxed_f = ex.submit(
evaluate_dataframe_parallel,
df=merged_df,
mode="Relaxed",
on_item_done=None,
progress_queue=q,
)
strict_f = ex.submit(
evaluate_dataframe_parallel,
df=merged_df,
mode="Strict",
on_item_done=None,
progress_queue=q,
)
while True:
_drain_queue(block=False)
if relaxed_f.done() and strict_f.done():
break
time.sleep(0.05)
_drain_queue(block=True)
relaxed = relaxed_f.result()
strict = strict_f.result()
return Result(ok=True, data=(relaxed, strict))
except Exception as e:
return Result(ok=False, error=f"โŒ ํ‰๊ฐ€ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
# --------- 5) ์ •ํ™•๋„ ๊ณ„์‚ฐ ----------
def _calculate_accuracy(self, fresheval_df: pd.DataFrame) -> Result:
try:
processed = process_freshqa_dataframe(fresheval_df)
accs, counts = calculate_accuracy(processed)
return Result(ok=True, data=(processed, accs, counts))
except Exception as e:
return Result(ok=False, error=f"โŒ ๊ฒฐ๊ณผ ์ง‘๊ณ„ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {e}")
# --------- 6) ์š”์•ฝ ----------
def _build_summary(self, name: str, relaxed_accs: dict, strict_accs: dict) -> str:
submitter = name if name else "(์ด๋ฆ„ ๋ฏธ์ž…๋ ฅ)"
lines = []
lines.append(f"**์ œ์ถœ์ž**: {submitter}")
lines.append("")
lines.append("**์ •ํ™•๋„ (ํ…Œ์ŠคํŠธ์…‹ ๊ธฐ์ค€)**")
lines.append(f"- Relaxed: {relaxed_accs.get('acc_test', 0):.1f}%")
lines.append(f"- Strict: {strict_accs.get('acc_test', 0):.1f}%")
lines.append("")
lines.append("**์„ธ๋ถ€ ์ง€ํ‘œ (ํ…Œ์ŠคํŠธ์…‹)**")
lines.append(
f"- Fast Changing: R {relaxed_accs.get('acc_test_fast_changing', 0):.1f}% / "
f"S {strict_accs.get('acc_test_fast_changing', 0):.1f}%"
)
lines.append(
f"- Two-hop: R {relaxed_accs.get('acc_test_two_hop', 0):.1f}% / "
f"S {strict_accs.get('acc_test_two_hop', 0):.1f}%"
)
lines.append(
f"- Old: R {relaxed_accs.get('acc_test_old', 0):.1f}% / "
f"S {strict_accs.get('acc_test_old', 0):.1f}%"
)
lines.append(
f"- New: R {relaxed_accs.get('acc_test_new', 0):.1f}% / "
f"S {strict_accs.get('acc_test_new', 0):.1f}%"
)
lines.append(
f"- VP: R {relaxed_accs.get('acc_test_vp', 0):.1f}% / "
f"S {strict_accs.get('acc_test_vp', 0):.1f}%"
)
lines.append(
f"- FP: R {relaxed_accs.get('acc_test_fp', 0):.1f}% / "
f"S {strict_accs.get('acc_test_fp', 0):.1f}%"
)
return "\n".join(lines)
def _get_result_summary(
self,
file_name: str,
name: str,
relaxed_accs: dict,
strict_accs: dict,
relaxed_table: pd.DataFrame,
strict_table: pd.DataFrame,
) -> str:
display_file = os.path.basename(file_name) if file_name else ""
lines: list[str] = []
lines.append("โœ… ์ œ์ถœ ๋ฐ ํ‰๊ฐ€ ์™„๋ฃŒ")
lines.append("")
lines.append("[๊ธฐ๋ณธ ์ •๋ณด]")
lines.append(f"- ์ œ์ถœ ํŒŒ์ผ: {display_file}")
lines.append(f"- ํ‰๊ฐ€ ์‹œ์Šคํ…œ: Solar Pro API")
lines.append("")
lines.append("[๊ฒฐ๊ณผ ์š”์•ฝ]")
lines.append("- Relaxed ๋ชจ๋“œ")
lines.append(f" ยท ์ „์ฒด ์ •ํ™•๋„: {float(relaxed_accs.get('acc', 0)):.1f}%")
lines.append(
f" ยท Fast-changing: {float(relaxed_accs.get('acc_fast_changing', 0)):.1f}% | "
f"Slow-changing: {float(relaxed_accs.get('acc_slow_changing', 0)):.1f}% | "
f"Never-changing: {float(relaxed_accs.get('acc_never_changing', 0)):.1f}%"
)
lines.append(f" ยท False premise: {float(relaxed_accs.get('acc_fp', 0)):.1f}%")
lines.append("")
lines.append("- Strict ๋ชจ๋“œ")
lines.append(f" ยท ์ „์ฒด ์ •ํ™•๋„: {float(strict_accs.get('acc', 0)):.1f}%")
lines.append(
f" ยท Fast-changing: {float(strict_accs.get('acc_fast_changing', 0)):.1f}% | "
f"Slow-changing: {float(strict_accs.get('acc_slow_changing', 0)):.1f}% | "
f"Never-changing: {float(strict_accs.get('acc_never_changing', 0)):.1f}%"
)
lines.append(f" ยท False premise: {float(strict_accs.get('acc_fp', 0)):.1f}%")
lines.append("")
lines.append("[์ œ์ถœ ๋ฉ”ํƒ€]")
lines.append(f"- ์ œ์ถœ์ž: {name if name else 'Unknown'}")
lines.append(f"- ํ‰๊ฐ€ ์ผ์‹œ: {get_current_datetime_str()}")
lines.append(f"- ๋น„๊ณ : Relaxed/Strict ๊ฒฐ๊ณผ๊ฐ€ ๋ฆฌ๋”๋ณด๋“œ์— ๋ฐ˜์˜๋˜์—ˆ์Šต๋‹ˆ๋‹ค.")
lines.append("")
sep = "-" * 60
lines.append(sep)
lines.append("์ƒ์„ธ ๊ฒฐ๊ณผ ํ…Œ์ด๋ธ” (Relaxed)")
lines.append(sep)
lines.append(relaxed_table.to_string(index=False))
lines.append("")
lines.append(sep)
lines.append("์ƒ์„ธ ๊ฒฐ๊ณผ ํ…Œ์ด๋ธ” (Strict)")
lines.append(sep)
lines.append(strict_table.to_string(index=False))
return "\n".join(lines)
# --------- 7) ์ •ํ™•๋„ ํ‘œ ----------
def _create_detailed_results_table(self, accs: dict, counts: dict) -> pd.DataFrame:
table_data = []
# ์ „์ฒด ์ •ํ™•๋„
table_data.append({
'์นดํ…Œ๊ณ ๋ฆฌ': '์ „์ฒด ์ •ํ™•๋„',
'์ „์ฒด': f"{accs.get('acc', 0):.1f}% ({counts.get('acc', 0)}๊ฐœ)",
'ํ…Œ์ŠคํŠธ': f"{accs.get('acc_test', 0):.1f}% ({counts.get('acc_test', 0)}๊ฐœ)",
'๊ฐœ๋ฐœ': f"{accs.get('acc_dev', 0):.1f}% ({counts.get('acc_dev', 0)}๊ฐœ)"
})
# ์‚ฌ์‹ค ์œ ํ˜•๋ณ„ ์ •ํ™•๋„
fact_types = {
'fast_changing': '๋น ๋ฅด๊ฒŒ ๋ณ€ํ•˜๋Š” ์‚ฌ์‹ค',
'slow_changing': '์ฒœ์ฒœํžˆ ๋ณ€ํ•˜๋Š” ์‚ฌ์‹ค',
'never_changing': '๋ณ€ํ•˜์ง€ ์•Š๋Š” ์‚ฌ์‹ค'
}
for key, name in fact_types.items():
table_data.append({
'์นดํ…Œ๊ณ ๋ฆฌ': name,
'์ „์ฒด': f"{accs.get(f'acc_{key}', 0):.1f}% ({counts.get(f'acc_{key}', 0)}๊ฐœ)",
'ํ…Œ์ŠคํŠธ': f"{accs.get(f'acc_test_{key}', 0):.1f}% ({counts.get(f'acc_test_{key}', 0)}๊ฐœ)",
'๊ฐœ๋ฐœ': f"{accs.get(f'acc_dev_{key}', 0):.1f}% ({counts.get(f'acc_dev_{key}', 0)}๊ฐœ)"
})
# ์งˆ๋ฌธ ์œ ํ˜•๋ณ„ ์ •ํ™•๋„
question_types = {
'vp': '์œ ํšจํ•œ ์ „์ œ (Valid Premise)',
'fp': '์ž˜๋ชป๋œ ์ „์ œ (False Premise)'
}
for key, name in question_types.items():
table_data.append({
'์นดํ…Œ๊ณ ๋ฆฌ': name,
'์ „์ฒด': f"{accs.get(f'acc_{key}', 0):.1f}% ({counts.get(f'acc_{key}', 0)}๊ฐœ)",
'ํ…Œ์ŠคํŠธ': f"{accs.get(f'acc_test_{key}', 0):.1f}% ({counts.get(f'acc_test_{key}', 0)}๊ฐœ)",
'๊ฐœ๋ฐœ': f"{accs.get(f'acc_dev_{key}', 0):.1f}% ({counts.get(f'acc_dev_{key}', 0)}๊ฐœ)"
})
# ํ™‰ ์ˆ˜๋ณ„ ์ •ํ™•๋„
table_data.append({
'์นดํ…Œ๊ณ ๋ฆฌ': f" โ”” {name} (๋‹จ์ผ ํ™‰)",
'์ „์ฒด': f"{accs.get(f'acc_{key}_one_hop', 0):.1f}% ({counts.get(f'acc_{key}_one_hop', 0)}๊ฐœ)",
'ํ…Œ์ŠคํŠธ': f"{accs.get(f'acc_test_{key}_one_hop', 0):.1f}% ({counts.get(f'acc_test_{key}_one_hop', 0)}๊ฐœ)",
'๊ฐœ๋ฐœ': f"{accs.get(f'acc_dev_{key}_one_hop', 0):.1f}% ({counts.get(f'acc_dev_{key}_one_hop', 0)}๊ฐœ)"
})
table_data.append({
'์นดํ…Œ๊ณ ๋ฆฌ': f" โ”” {name} (๋‹ค์ค‘ ํ™‰)",
'์ „์ฒด': f"{accs.get(f'acc_{key}_two_hop', 0):.1f}% ({counts.get(f'acc_{key}_two_hop', 0)}๊ฐœ)",
'ํ…Œ์ŠคํŠธ': f"{accs.get(f'acc_test_{key}_two_hop', 0):.1f}% ({counts.get(f'acc_test_{key}_two_hop', 0)}๊ฐœ)",
'๊ฐœ๋ฐœ': f"{accs.get(f'acc_dev_{key}_two_hop', 0):.1f}% ({counts.get(f'acc_dev_{key}_two_hop', 0)}๊ฐœ)"
})
# ์—ฐ๋„๋ณ„ ์ •ํ™•๋„
table_data.append({
'์นดํ…Œ๊ณ ๋ฆฌ': f" โ”” {name} (์˜ค๋ž˜๋œ ๋ฐ์ดํ„ฐ)",
'์ „์ฒด': f"{accs.get(f'acc_{key}_old', 0):.1f}% ({counts.get(f'acc_{key}_old', 0)}๊ฐœ)",
'ํ…Œ์ŠคํŠธ': f"{accs.get(f'acc_test_{key}_old', 0):.1f}% ({counts.get(f'acc_test_{key}_old', 0)}๊ฐœ)",
'๊ฐœ๋ฐœ': f"{accs.get(f'acc_dev_{key}_old', 0):.1f}% ({counts.get(f'acc_dev_{key}_old', 0)}๊ฐœ)"
})
table_data.append({
'์นดํ…Œ๊ณ ๋ฆฌ': f" โ”” {name} (์ตœ์‹  ๋ฐ์ดํ„ฐ)",
'์ „์ฒด': f"{accs.get(f'acc_{key}_new', 0):.1f}% ({counts.get(f'acc_{key}_new', 0)}๊ฐœ)",
'ํ…Œ์ŠคํŠธ': f"{accs.get(f'acc_test_{key}_new', 0):.1f}% ({counts.get(f'acc_test_{key}_new', 0)}๊ฐœ)",
'๊ฐœ๋ฐœ': f"{accs.get(f'acc_dev_{key}_new', 0):.1f}% ({counts.get(f'acc_dev_{key}_new', 0)}๊ฐœ)"
})
return pd.DataFrame(table_data)
# --------- 8) ๋ฆฌ๋”๋ณด๋“œ ํ–‰ ์ƒ์„ฑ ----------
def _build_leaderboard_rows(
self,
name: str,
submit_model: str,
submit_description: Optional[str],
mode: str,
accs: dict
):
submitter_id = f"{name}".strip()
result = {
'id': submitter_id if submitter_id else "Unknown",
'model': submit_model,
'description': submit_description,
'accuracy': float(accs.get('acc_test', 0)),
'fast_changing_accuracy': float(accs.get('acc_test_fast_changing', 0)),
'slow_changing_accuracy': float(accs.get('acc_test_slow_changing', 0)),
'never_changing_accuracy': float(accs.get('acc_test_never_changing', 0)),
'acc_vp': float(accs.get('acc_test_vp', 0)),
'acc_fp': float(accs.get('acc_test_fp', 0)),
'acc_vp_one_hop': float(accs.get('acc_test_vp_one_hop', 0)),
'acc_vp_two_hop': float(accs.get('acc_test_vp_two_hop', 0)),
'acc_fp_one_hop': float(accs.get('acc_test_fp_one_hop', 0)),
'acc_fp_two_hop': float(accs.get('acc_test_fp_two_hop', 0)),
'acc_vp_old': float(accs.get('acc_test_vp_old', 0)),
'acc_vp_new': float(accs.get('acc_test_vp_new', 0)),
'acc_fp_old': float(accs.get('acc_test_fp_old', 0)),
'acc_fp_new': float(accs.get('acc_test_fp_new', 0)),
# ๋„๋ฉ”์ธ๋ณ„ ์ •ํ™•๋„ ์ถ”๊ฐ€ (test ๊ฒฐ๊ณผ๋งŒ ์‚ฌ์šฉ)
'acc_politics': float(accs.get('acc_test_politics', 0)),
'acc_sports': float(accs.get('acc_test_sports', 0)),
'acc_entertainment': float(accs.get('acc_test_entertainment', 0)),
'acc_weather': float(accs.get('acc_test_weather', 0)),
'acc_world': float(accs.get('acc_test_world', 0)),
'acc_economy': float(accs.get('acc_test_economy', 0)),
'acc_society': float(accs.get('acc_test_society', 0)),
'acc_it_science': float(accs.get('acc_test_it_science', 0)),
'acc_life_culture': float(accs.get('acc_test_life_culture', 0)),
'acc_unknown': float(accs.get('acc_test_unknown', 0)),
'total_questions': int(accs.get('acc_test', 0)),
'evaluation_date': get_current_datetime_str(),
'evaluation_mode': mode
}
return result
def _save_leaderboard(
self,
name: str,
submit_model: str,
submit_description: Optional[str],
relaxed_accs: dict,
strict_accs: dict
):
rows = [
self._build_leaderboard_rows(name, submit_model, submit_description, 'Relaxed', relaxed_accs),
self._build_leaderboard_rows(name, submit_model, submit_description, 'Strict', strict_accs),
]
try:
append_to_leaderboard_data(rows)
except Exception as e:
print(f"โš ๏ธ ๋ฆฌ๋”๋ณด๋“œ ์ €์žฅ ์‹คํŒจ: {e}")
# --------- 9) ๊ณต๊ฐœ ์—”๋“œํฌ์ธํŠธ(ํ•ต์‹ฌ) ----------
def process_submission(
self,
file,
name: str,
submit_model: str,
submit_description: str,
user_id: Optional[str] = None,
progress: gr.Progress = gr.Progress(),
) -> str:
"""
์ œ์ถœ ํŒŒ์ผ ์ฒ˜๋ฆฌ ๋ฐ ํ‰๊ฐ€
- ๋‚ด๋ถ€ helper๋Š” Result ๊ธฐ๋ฐ˜์œผ๋กœ ๋ฆฌํ„ด
- ์ตœ์ข… Gradio ์ถœ๋ ฅ์€ ๋ฌธ์ž์—ด(๊ธฐ์กด ํ˜ธํ™˜)
"""
start = time.time()
normalized_model = (submit_model or "").strip() or "Anonymous Model"
normalized_description_raw = (submit_description or "").strip()
normalized_description = normalized_description_raw if normalized_description_raw else None
# 1) ์ œ์ถœ ์ œํ•œ ํ™•์ธ
tracker: Optional[SubmissionTracker] = None
if self.enable_limit:
# ๋กœ๊ทธ์ธ ๊ธฐ๋ฐ˜ ์ œ์ถœ ์ œ์•ฝ: user_id ์—†์œผ๋ฉด ๋ฐ”๋กœ ์—๋Ÿฌ
if not user_id:
return "โŒ HuggingFace ๋กœ๊ทธ์ธ ์ƒํƒœ์—์„œ๋งŒ ์ œ์ถœ ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค. ๋จผ์ € ๋กœ๊ทธ์ธ ํ›„ ๋‹ค์‹œ ์‹œ๋„ํ•ด์ฃผ์„ธ์š”."
tracker = self.tracker or get_submission_tracker()
if tracker is not None:
self.tracker = tracker
if self.enable_limit and tracker:
try:
can_submit, message, remaining = tracker.can_submit(user_id=user_id)
if not can_submit:
return f"โŒ ์ œ์ถœ ์ œํ•œ: {message}"
except Exception as e:
return f"โŒ ์ œ์ถœ ์ œํ•œ ํ™•์ธ ์‹คํŒจ: {e}"
# 2) ํŒŒ์ผ ๊ฒ€์ฆ
progress(0.05, desc="์ œ์ถœ ํŒŒ์ผ ๊ฒ€์ฆ ์ค‘...")
v = self._validate_submission_file(file)
if not v.ok:
return v.error or "โŒ ์ œ์ถœ ํŒŒ์ผ ๊ฒ€์ฆ ์‹คํŒจ"
# 3) ๋กœ๋“œ
progress(0.1, desc="๊ธฐ์ค€ ๋ฐ์ดํ„ฐ ๋กœ๋“œ ์ค‘...")
loaded = self._load_submission_df(file)
if not loaded.ok:
return loaded.error or "โŒ CSV ๋กœ๋”ฉ ์‹คํŒจ"
submission_df: pd.DataFrame = loaded.data
# 4) ๋ณ‘ํ•ฉ
progress(0.15, desc="๊ธฐ์ค€ ๋ฐ์ดํ„ฐ์™€ ๋ณ‘ํ•ฉ ์ค‘...")
mg = self._merge_with_base(submission_df, file.name)
if not mg.ok:
return mg.error or "โŒ ๊ธฐ์ค€ ๋ฐ์ดํ„ฐ ๋ณ‘ํ•ฉ ์‹คํŒจ"
merged_df: pd.DataFrame = mg.data
# 5) ํ‰๊ฐ€ (0.15 ~ 0.9 ๊ตฌ๊ฐ„ ์ง„ํ–‰๋ฅ  ๋งคํ•‘)
progress(0.15, desc="FreshQA ํ‰๊ฐ€ ์ค€๋น„ ์ค‘...")
def on_inner_progress(done: int, total: int, desc: str):
frac = 0.15 + 0.75 * (done / max(total, 1))
progress(frac, desc=desc)
ev = self._evaluate_freshqa(merged_df, on_progress=on_inner_progress)
if not ev.ok:
# ์‹คํŒจ ๊ธฐ๋ก
if self.enable_limit and tracker and user_id:
try:
tracker.record_submission(
user_id=user_id,
submitter_name=name,
file_name=os.path.basename(file.name),
success=False,
error_message=ev.error or "ํ‰๊ฐ€ ์‹คํŒจ",
submit_model=normalized_model,
submit_description=normalized_description,
)
except Exception:
pass
return ev.error or "โŒ ํ‰๊ฐ€ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค"
relaxed_df, strict_df = ev.data # type: ignore[assignment]
# 6) ๊ฒฐ๊ณผ ์ง‘๊ณ„
progress(0.8, desc="ํ‰๊ฐ€ ๊ฒฐ๊ณผ ๋ถ„์„ ์ค‘...")
r = self._calculate_accuracy(relaxed_df)
if not r.ok:
if self.enable_limit and tracker and user_id:
try:
tracker.record_submission(
user_id=user_id,
submitter_name=name,
file_name=os.path.basename(file.name),
success=False,
error_message=r.error or "์ง‘๊ณ„ ์‹คํŒจ",
submit_model=normalized_model,
submit_description=normalized_description,
)
except Exception:
pass
return r.error or "โŒ ๊ฒฐ๊ณผ ์ง‘๊ณ„ ์‹คํŒจ"
s = self._calculate_accuracy(strict_df)
if not s.ok:
if self.enable_limit and tracker and user_id:
try:
tracker.record_submission(
user_id=user_id,
submitter_name=name,
file_name=os.path.basename(file.name),
success=False,
error_message=s.error or "์ง‘๊ณ„ ์‹คํŒจ",
submit_model=normalized_model,
submit_description=normalized_description,
)
except Exception:
pass
return s.error or "โŒ ๊ฒฐ๊ณผ ์ง‘๊ณ„ ์‹คํŒจ"
relaxed_processed, relaxed_accs, relaxed_counts = r.data # type: ignore[misc]
strict_processed, strict_accs, strict_counts = s.data # type: ignore[misc]
# 7) ์š”์•ฝ/ํ‘œ
relaxed_table = self._create_detailed_results_table(relaxed_accs, relaxed_counts)
strict_table = self._create_detailed_results_table(strict_accs, strict_counts)
result_summary = self._get_result_summary(
file_name=file.name if file else "",
name=name,
relaxed_accs=relaxed_accs,
strict_accs=strict_accs,
relaxed_table=relaxed_table,
strict_table=strict_table,
)
# 8) ์ œ์ถœ ์„ฑ๊ณต ๊ธฐ๋ก ๋ฐ ๋ฆฌ๋”๋ณด๋“œ ์ €์žฅ
if self.enable_limit and tracker and user_id:
progress(0.85, desc="์ œ์ถœ ๋‚ด์—ญ ์ €์žฅ ์ค‘...")
save_ok = tracker.record_submission(
user_id=user_id,
submitter_name=name,
file_name=os.path.basename(file.name),
success=True,
submit_model=normalized_model,
submit_description=normalized_description,
)
# save_ok ์‹คํŒจํ•ด๋„ ํ‰๊ฐ€ ๊ฒฐ๊ณผ๋Š” ๋ฆฌ๋”๋ณด๋“œ์— ๋ฐ˜์˜
progress(0.9, desc="๋ฆฌ๋”๋ณด๋“œ ์—…๋ฐ์ดํŠธ ์ค‘...")
self._save_leaderboard(name, normalized_model, normalized_description, relaxed_accs, strict_accs)
else:
self._save_leaderboard(name, normalized_model, normalized_description, relaxed_accs, strict_accs)
# 9) ๊ฒฐ๊ณผ ๋ฌธ์ž์—ด ๊ตฌ์„ฑ
progress(1.0, desc="์™„๋ฃŒ")
return result_summary
# -------------------------
# ๋ชจ๋“ˆ-๋ ˆ๋ฒจ ์—”ํŠธ๋ฆฌํฌ์ธํŠธ (๊ธฐ์กด UI ํ˜ธํ™˜)
# -------------------------
def process_submission(
file,
name: str,
submit_model: str,
submit_description: str,
user_id: Optional[str] = None,
progress: gr.Progress = gr.Progress(),
) -> str:
"""
Gradio์—์„œ ์ง์ ‘ ํ˜ธ์ถœํ•˜๋Š” ์—”ํŠธ๋ฆฌํฌ์ธํŠธ.
๋‚ด๋ถ€์ ์œผ๋กœ DI๋ฅผ ์ ์šฉํ•œ SubmissionHandler๋ฅผ ์ƒ์„ฑํ•ด ํ˜ธ์ถœํ•œ๋‹ค.
"""
tracker = get_submission_tracker() if Config.ENABLE_SUBMISSION_LIMIT else None
handler = SubmissionHandler(tracker=tracker, cfg=Config)
try:
return handler.process_submission(
file=file,
name=name,
submit_model=submit_model,
submit_description=submit_description,
user_id=user_id,
progress=progress,
)
except Exception as e:
# ์ตœ์ƒ์œ„ ๋ณดํ˜ธ๋ง‰: ์˜ˆ์ƒ์น˜ ๋ชปํ•œ ์˜ˆ์™ธ๋„ ์‚ฌ์šฉ์ž ์นœํ™”์ ์œผ๋กœ ๋ฐ˜ํ™˜
try:
if handler.enable_limit and handler.tracker and user_id:
handler.tracker.record_submission(
user_id=user_id,
submitter_name=name,
file_name=os.path.basename(file.name) if file else "(unknown)",
success=False,
error_message=str(e),
submit_model=(submit_model or "").strip() or "Anonymous Model",
submit_description=(submit_description or "").strip() or None,
)
except Exception:
# ๊ธฐ๋ก ์‹คํŒจ๋Š” ์กฐ์šฉํžˆ ๋ฌด์‹œ
pass
total_time = 0.0 # ์ƒ๋‹จ์—์„œ ์ธก์ •ํ•˜์ง€ ๋ชปํ–ˆ์„ ์ˆ˜ ์žˆ์œผ๋ฏ€๋กœ 0์œผ๋กœ
error_message = str(e)
return (
"โŒ ํ‰๊ฐ€ ์‹คํŒจ\n\n"
"์˜ค๋ฅ˜ ๋‚ด์šฉ:\n"
f"{error_message}\n\n"
f"์†Œ์š” ์‹œ๊ฐ„: {total_time:.2f}์ดˆ ({total_time/60:.2f}๋ถ„)\n\n"
"์ œ์ถœ์€ ์ •์ƒ์ ์œผ๋กœ ์ฒ˜๋ฆฌ๋˜์—ˆ์ง€๋งŒ, ํ‰๊ฐ€ ๊ณผ์ •์—์„œ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค.\n"
"์ œ์ถœ ๊ธฐ๋ก์€ ์ €์žฅ๋˜์—ˆ์Šต๋‹ˆ๋‹ค."
)