Spaces:
Running
Running
| from __future__ import annotations | |
| import os | |
| import time | |
| import queue | |
| from dataclasses import dataclass | |
| from typing import Any, Optional, Dict, Tuple, Callable | |
| import pandas as pd | |
| import gradio as gr | |
| from config import Config | |
| from src.submission_tracker import get_submission_tracker, SubmissionTracker | |
| from src.quick_csv_loader import quick_load_csv | |
| from src.leaderboard_manager import append_to_leaderboard_data | |
| from src.utils import get_current_datetime_str | |
| from freshqa.fresheval_parallel import evaluate_dataframe_parallel | |
| from freshqa.freshqa_acc import process_freshqa_dataframe, calculate_accuracy | |
| from freshqa.merge_csv_with_model_response import merge_dataframe_with_model_response_df | |
| # ------------------------- | |
| # ๊ณตํต ๋ฐํํ(Result) | |
| # ------------------------- | |
| class Result: | |
| ok: bool | |
| data: Optional[Any] = None | |
| error: Optional[str] = None | |
| meta: Optional[Dict] = None | |
| # ------------------------- | |
| # ํต์ฌ ํธ๋ค๋ฌ | |
| # ------------------------- | |
| class SubmissionHandler: | |
| """ | |
| ์ ์ถ ํ์ผ ์ฒ๋ฆฌ ๋ฐ FreshQA ํ๊ฐ ์ค์ผ์คํธ๋ ์ด์ . | |
| - Tracker/Config ์์กด์ฑ ์ฃผ์ | |
| - ๋ด๋ถ helper๋ Result/๋ช ํํ ํ์ ๋ฐํ | |
| - ์ค์ ์ ์ฅ/ํ๋/์ฌ์ฉ์ ID๋ tracker๊ฐ ์ฒ๋ฆฌ(ํธ๋ค๋ฌ๋ ํธ์ถ๋ง) | |
| """ | |
| def __init__(self, tracker: Optional[SubmissionTracker] = None, cfg: Optional[type] = None): | |
| # Dependency Injection | |
| self.tracker = tracker | |
| self.cfg = cfg or Config | |
| self.enable_limit = getattr(self.cfg, "ENABLE_SUBMISSION_LIMIT", False) | |
| self.repo_id = getattr(self.cfg, "FRESHQA_DATA_REPO_ID", None) | |
| self.filename = getattr(self.cfg, "FRESHQA_DATA_FILENAME", None) | |
| self.hf_token = getattr(self.cfg, "HF_TOKEN", None) | |
| # ํ์ ์ค์ ์ ๊ฒ | |
| if not self.repo_id: | |
| raise ValueError("โ FRESHQA_DATA_REPO_ID ํ๊ฒฝ ๋ณ์๊ฐ ์ค์ ๋์ง ์์์ต๋๋ค.") | |
| if not self.filename: | |
| raise ValueError("โ FRESHQA_DATA_FILENAME ํ๊ฒฝ ๋ณ์๊ฐ ์ค์ ๋์ง ์์์ต๋๋ค.") | |
| if not self.hf_token: | |
| raise ValueError("โ HF_TOKEN ํ๊ฒฝ ๋ณ์๊ฐ ์ค์ ๋์ง ์์์ต๋๋ค.") | |
| # --------- 1) ์ ์ถ ํ์ผ ๊ฒ์ฆ ---------- | |
| def _validate_submission_file(self, file) -> Result: | |
| if file is None: | |
| return Result(ok=False, error="โ CSV ํ์ผ์ ์ ๋ก๋ํด์ฃผ์ธ์.") | |
| try: | |
| df = pd.read_csv(file.name) | |
| except Exception as e: | |
| return Result(ok=False, error=f"โ CSV ๋ก๋ฉ ์คํจ: {e}") | |
| required_columns = ["question", "model_response"] | |
| for col in required_columns: | |
| if col not in df.columns: | |
| return Result(ok=False, error=f"โ CSV ํ์ผ์ ์ปฌ๋ผ์ '{col}'์ด(๊ฐ) ์์ต๋๋ค.") | |
| if len(df) == 0: | |
| return Result(ok=False, error="โ CSV ํ์ผ์ ๋ฐ์ดํฐ๊ฐ ์์ต๋๋ค.") | |
| if df["question"].isnull().any() or df["model_response"].isnull().any(): | |
| return Result(ok=False, error="โ 'question' ๋๋ 'model_response' ์ปฌ๋ผ์ ๋๋ฝ๋ ๊ฐ์ด ์์ต๋๋ค.") | |
| return Result(ok=True) | |
| # --------- 2) ๋น ๋ฅธ ๋ก๋ฉ ---------- | |
| def _load_submission_df(self, file) -> Result: | |
| try: | |
| df = quick_load_csv(self.repo_id, self.filename, self.hf_token) | |
| except Exception as e: | |
| return Result(ok=False, error=f"โ CSV ๋ก๋ฉ ์คํจ: {e}") | |
| return Result(ok=True, data=df) | |
| # --------- 3) ๋ณํฉ ---------- | |
| def _merge_with_base(self, submission_df: pd.DataFrame, file_name: str) -> Result: | |
| try: | |
| merged_df = merge_dataframe_with_model_response_df(submission_df, file_name) | |
| return Result(ok=True, data=merged_df) | |
| except Exception as e: | |
| return Result(ok=False, error=f"โ ๊ธฐ์ค ๋ฐ์ดํฐ์ ๋ณํฉ ์คํจ: {e}") | |
| # --------- 4) ํ๊ฐ ---------- | |
| def _evaluate_freshqa( | |
| self, | |
| merged_df: pd.DataFrame, | |
| on_progress: Optional[Callable[[int, int, str], None]] = None, | |
| ) -> Result: | |
| """Relaxed/Strict ๋์ ์คํ + ํ ๊ธฐ๋ฐ ์งํ๋ฅ ๊ฐฑ์ """ | |
| q: "queue.Queue[Tuple[int, int, str]]" = queue.Queue() | |
| # ๋ ๋ชจ๋(Relaxed, Strict)๋ฅผ ๋ณ๋ ฌ๋ก ์ฒ๋ฆฌํ๋ฏ๋ก ์ด ์งํ ๋จ์๋ 2๋ฐฐ | |
| total_items = len(merged_df) * 2 | |
| done_count = 0 | |
| def _drain_queue(block: bool = False): | |
| nonlocal done_count | |
| while True: | |
| try: | |
| item = q.get(block=block, timeout=0.05 if block else 0) | |
| except Exception: | |
| break | |
| try: | |
| # ์ต์ ์ปค๋ฐ ๊ธฐ์ค: progress_queue์๋ 1์ฉ ์ฆ๊ฐํ๋ ์ ์๋ง ๋ค์ด์ต๋๋ค. | |
| if isinstance(item, int): | |
| done_count += item | |
| if on_progress: | |
| remaining = max(total_items - done_count, 0) | |
| desc_text = f"ํ๊ฐ ์ค... {done_count}/{total_items}" | |
| on_progress(done_count, total_items, desc_text) | |
| # ํน์ ๊ณผ๊ฑฐ ํฌ๋งท(tuple)์ด ๋ค์ด์ค๋๋ผ๋ ๋ฐฉ์ด์ ์ผ๋ก ์ฒ๋ฆฌ | |
| elif isinstance(item, tuple) and len(item) == 3 and on_progress: | |
| on_progress(item[0], item[1], item[2]) | |
| finally: | |
| q.task_done() | |
| from concurrent.futures import ThreadPoolExecutor | |
| try: | |
| with ThreadPoolExecutor(max_workers=2) as ex: | |
| relaxed_f = ex.submit( | |
| evaluate_dataframe_parallel, | |
| df=merged_df, | |
| mode="Relaxed", | |
| on_item_done=None, | |
| progress_queue=q, | |
| ) | |
| strict_f = ex.submit( | |
| evaluate_dataframe_parallel, | |
| df=merged_df, | |
| mode="Strict", | |
| on_item_done=None, | |
| progress_queue=q, | |
| ) | |
| while True: | |
| _drain_queue(block=False) | |
| if relaxed_f.done() and strict_f.done(): | |
| break | |
| time.sleep(0.05) | |
| _drain_queue(block=True) | |
| relaxed = relaxed_f.result() | |
| strict = strict_f.result() | |
| return Result(ok=True, data=(relaxed, strict)) | |
| except Exception as e: | |
| return Result(ok=False, error=f"โ ํ๊ฐ ์ค ์ค๋ฅ ๋ฐ์: {e}") | |
| # --------- 5) ์ ํ๋ ๊ณ์ฐ ---------- | |
| def _calculate_accuracy(self, fresheval_df: pd.DataFrame) -> Result: | |
| try: | |
| processed = process_freshqa_dataframe(fresheval_df) | |
| accs, counts = calculate_accuracy(processed) | |
| return Result(ok=True, data=(processed, accs, counts)) | |
| except Exception as e: | |
| return Result(ok=False, error=f"โ ๊ฒฐ๊ณผ ์ง๊ณ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค: {e}") | |
| # --------- 6) ์์ฝ ---------- | |
| def _build_summary(self, name: str, relaxed_accs: dict, strict_accs: dict) -> str: | |
| submitter = name if name else "(์ด๋ฆ ๋ฏธ์ ๋ ฅ)" | |
| lines = [] | |
| lines.append(f"**์ ์ถ์**: {submitter}") | |
| lines.append("") | |
| lines.append("**์ ํ๋ (ํ ์คํธ์ ๊ธฐ์ค)**") | |
| lines.append(f"- Relaxed: {relaxed_accs.get('acc_test', 0):.1f}%") | |
| lines.append(f"- Strict: {strict_accs.get('acc_test', 0):.1f}%") | |
| lines.append("") | |
| lines.append("**์ธ๋ถ ์งํ (ํ ์คํธ์ )**") | |
| lines.append( | |
| f"- Fast Changing: R {relaxed_accs.get('acc_test_fast_changing', 0):.1f}% / " | |
| f"S {strict_accs.get('acc_test_fast_changing', 0):.1f}%" | |
| ) | |
| lines.append( | |
| f"- Two-hop: R {relaxed_accs.get('acc_test_two_hop', 0):.1f}% / " | |
| f"S {strict_accs.get('acc_test_two_hop', 0):.1f}%" | |
| ) | |
| lines.append( | |
| f"- Old: R {relaxed_accs.get('acc_test_old', 0):.1f}% / " | |
| f"S {strict_accs.get('acc_test_old', 0):.1f}%" | |
| ) | |
| lines.append( | |
| f"- New: R {relaxed_accs.get('acc_test_new', 0):.1f}% / " | |
| f"S {strict_accs.get('acc_test_new', 0):.1f}%" | |
| ) | |
| lines.append( | |
| f"- VP: R {relaxed_accs.get('acc_test_vp', 0):.1f}% / " | |
| f"S {strict_accs.get('acc_test_vp', 0):.1f}%" | |
| ) | |
| lines.append( | |
| f"- FP: R {relaxed_accs.get('acc_test_fp', 0):.1f}% / " | |
| f"S {strict_accs.get('acc_test_fp', 0):.1f}%" | |
| ) | |
| return "\n".join(lines) | |
| def _get_result_summary( | |
| self, | |
| file_name: str, | |
| name: str, | |
| relaxed_accs: dict, | |
| strict_accs: dict, | |
| relaxed_table: pd.DataFrame, | |
| strict_table: pd.DataFrame, | |
| ) -> str: | |
| display_file = os.path.basename(file_name) if file_name else "" | |
| lines: list[str] = [] | |
| lines.append("โ ์ ์ถ ๋ฐ ํ๊ฐ ์๋ฃ") | |
| lines.append("") | |
| lines.append("[๊ธฐ๋ณธ ์ ๋ณด]") | |
| lines.append(f"- ์ ์ถ ํ์ผ: {display_file}") | |
| lines.append(f"- ํ๊ฐ ์์คํ : Solar Pro API") | |
| lines.append("") | |
| lines.append("[๊ฒฐ๊ณผ ์์ฝ]") | |
| lines.append("- Relaxed ๋ชจ๋") | |
| lines.append(f" ยท ์ ์ฒด ์ ํ๋: {float(relaxed_accs.get('acc', 0)):.1f}%") | |
| lines.append( | |
| f" ยท Fast-changing: {float(relaxed_accs.get('acc_fast_changing', 0)):.1f}% | " | |
| f"Slow-changing: {float(relaxed_accs.get('acc_slow_changing', 0)):.1f}% | " | |
| f"Never-changing: {float(relaxed_accs.get('acc_never_changing', 0)):.1f}%" | |
| ) | |
| lines.append(f" ยท False premise: {float(relaxed_accs.get('acc_fp', 0)):.1f}%") | |
| lines.append("") | |
| lines.append("- Strict ๋ชจ๋") | |
| lines.append(f" ยท ์ ์ฒด ์ ํ๋: {float(strict_accs.get('acc', 0)):.1f}%") | |
| lines.append( | |
| f" ยท Fast-changing: {float(strict_accs.get('acc_fast_changing', 0)):.1f}% | " | |
| f"Slow-changing: {float(strict_accs.get('acc_slow_changing', 0)):.1f}% | " | |
| f"Never-changing: {float(strict_accs.get('acc_never_changing', 0)):.1f}%" | |
| ) | |
| lines.append(f" ยท False premise: {float(strict_accs.get('acc_fp', 0)):.1f}%") | |
| lines.append("") | |
| lines.append("[์ ์ถ ๋ฉํ]") | |
| lines.append(f"- ์ ์ถ์: {name if name else 'Unknown'}") | |
| lines.append(f"- ํ๊ฐ ์ผ์: {get_current_datetime_str()}") | |
| lines.append(f"- ๋น๊ณ : Relaxed/Strict ๊ฒฐ๊ณผ๊ฐ ๋ฆฌ๋๋ณด๋์ ๋ฐ์๋์์ต๋๋ค.") | |
| lines.append("") | |
| sep = "-" * 60 | |
| lines.append(sep) | |
| lines.append("์์ธ ๊ฒฐ๊ณผ ํ ์ด๋ธ (Relaxed)") | |
| lines.append(sep) | |
| lines.append(relaxed_table.to_string(index=False)) | |
| lines.append("") | |
| lines.append(sep) | |
| lines.append("์์ธ ๊ฒฐ๊ณผ ํ ์ด๋ธ (Strict)") | |
| lines.append(sep) | |
| lines.append(strict_table.to_string(index=False)) | |
| return "\n".join(lines) | |
| # --------- 7) ์ ํ๋ ํ ---------- | |
| def _create_detailed_results_table(self, accs: dict, counts: dict) -> pd.DataFrame: | |
| table_data = [] | |
| # ์ ์ฒด ์ ํ๋ | |
| table_data.append({ | |
| '์นดํ ๊ณ ๋ฆฌ': '์ ์ฒด ์ ํ๋', | |
| '์ ์ฒด': f"{accs.get('acc', 0):.1f}% ({counts.get('acc', 0)}๊ฐ)", | |
| 'ํ ์คํธ': f"{accs.get('acc_test', 0):.1f}% ({counts.get('acc_test', 0)}๊ฐ)", | |
| '๊ฐ๋ฐ': f"{accs.get('acc_dev', 0):.1f}% ({counts.get('acc_dev', 0)}๊ฐ)" | |
| }) | |
| # ์ฌ์ค ์ ํ๋ณ ์ ํ๋ | |
| fact_types = { | |
| 'fast_changing': '๋น ๋ฅด๊ฒ ๋ณํ๋ ์ฌ์ค', | |
| 'slow_changing': '์ฒ์ฒํ ๋ณํ๋ ์ฌ์ค', | |
| 'never_changing': '๋ณํ์ง ์๋ ์ฌ์ค' | |
| } | |
| for key, name in fact_types.items(): | |
| table_data.append({ | |
| '์นดํ ๊ณ ๋ฆฌ': name, | |
| '์ ์ฒด': f"{accs.get(f'acc_{key}', 0):.1f}% ({counts.get(f'acc_{key}', 0)}๊ฐ)", | |
| 'ํ ์คํธ': f"{accs.get(f'acc_test_{key}', 0):.1f}% ({counts.get(f'acc_test_{key}', 0)}๊ฐ)", | |
| '๊ฐ๋ฐ': f"{accs.get(f'acc_dev_{key}', 0):.1f}% ({counts.get(f'acc_dev_{key}', 0)}๊ฐ)" | |
| }) | |
| # ์ง๋ฌธ ์ ํ๋ณ ์ ํ๋ | |
| question_types = { | |
| 'vp': '์ ํจํ ์ ์ (Valid Premise)', | |
| 'fp': '์๋ชป๋ ์ ์ (False Premise)' | |
| } | |
| for key, name in question_types.items(): | |
| table_data.append({ | |
| '์นดํ ๊ณ ๋ฆฌ': name, | |
| '์ ์ฒด': f"{accs.get(f'acc_{key}', 0):.1f}% ({counts.get(f'acc_{key}', 0)}๊ฐ)", | |
| 'ํ ์คํธ': f"{accs.get(f'acc_test_{key}', 0):.1f}% ({counts.get(f'acc_test_{key}', 0)}๊ฐ)", | |
| '๊ฐ๋ฐ': f"{accs.get(f'acc_dev_{key}', 0):.1f}% ({counts.get(f'acc_dev_{key}', 0)}๊ฐ)" | |
| }) | |
| # ํ ์๋ณ ์ ํ๋ | |
| table_data.append({ | |
| '์นดํ ๊ณ ๋ฆฌ': f" โ {name} (๋จ์ผ ํ)", | |
| '์ ์ฒด': f"{accs.get(f'acc_{key}_one_hop', 0):.1f}% ({counts.get(f'acc_{key}_one_hop', 0)}๊ฐ)", | |
| 'ํ ์คํธ': f"{accs.get(f'acc_test_{key}_one_hop', 0):.1f}% ({counts.get(f'acc_test_{key}_one_hop', 0)}๊ฐ)", | |
| '๊ฐ๋ฐ': f"{accs.get(f'acc_dev_{key}_one_hop', 0):.1f}% ({counts.get(f'acc_dev_{key}_one_hop', 0)}๊ฐ)" | |
| }) | |
| table_data.append({ | |
| '์นดํ ๊ณ ๋ฆฌ': f" โ {name} (๋ค์ค ํ)", | |
| '์ ์ฒด': f"{accs.get(f'acc_{key}_two_hop', 0):.1f}% ({counts.get(f'acc_{key}_two_hop', 0)}๊ฐ)", | |
| 'ํ ์คํธ': f"{accs.get(f'acc_test_{key}_two_hop', 0):.1f}% ({counts.get(f'acc_test_{key}_two_hop', 0)}๊ฐ)", | |
| '๊ฐ๋ฐ': f"{accs.get(f'acc_dev_{key}_two_hop', 0):.1f}% ({counts.get(f'acc_dev_{key}_two_hop', 0)}๊ฐ)" | |
| }) | |
| # ์ฐ๋๋ณ ์ ํ๋ | |
| table_data.append({ | |
| '์นดํ ๊ณ ๋ฆฌ': f" โ {name} (์ค๋๋ ๋ฐ์ดํฐ)", | |
| '์ ์ฒด': f"{accs.get(f'acc_{key}_old', 0):.1f}% ({counts.get(f'acc_{key}_old', 0)}๊ฐ)", | |
| 'ํ ์คํธ': f"{accs.get(f'acc_test_{key}_old', 0):.1f}% ({counts.get(f'acc_test_{key}_old', 0)}๊ฐ)", | |
| '๊ฐ๋ฐ': f"{accs.get(f'acc_dev_{key}_old', 0):.1f}% ({counts.get(f'acc_dev_{key}_old', 0)}๊ฐ)" | |
| }) | |
| table_data.append({ | |
| '์นดํ ๊ณ ๋ฆฌ': f" โ {name} (์ต์ ๋ฐ์ดํฐ)", | |
| '์ ์ฒด': f"{accs.get(f'acc_{key}_new', 0):.1f}% ({counts.get(f'acc_{key}_new', 0)}๊ฐ)", | |
| 'ํ ์คํธ': f"{accs.get(f'acc_test_{key}_new', 0):.1f}% ({counts.get(f'acc_test_{key}_new', 0)}๊ฐ)", | |
| '๊ฐ๋ฐ': f"{accs.get(f'acc_dev_{key}_new', 0):.1f}% ({counts.get(f'acc_dev_{key}_new', 0)}๊ฐ)" | |
| }) | |
| return pd.DataFrame(table_data) | |
| # --------- 8) ๋ฆฌ๋๋ณด๋ ํ ์์ฑ ---------- | |
| def _build_leaderboard_rows( | |
| self, | |
| name: str, | |
| submit_model: str, | |
| submit_description: Optional[str], | |
| mode: str, | |
| accs: dict | |
| ): | |
| submitter_id = f"{name}".strip() | |
| result = { | |
| 'id': submitter_id if submitter_id else "Unknown", | |
| 'model': submit_model, | |
| 'description': submit_description, | |
| 'accuracy': float(accs.get('acc_test', 0)), | |
| 'fast_changing_accuracy': float(accs.get('acc_test_fast_changing', 0)), | |
| 'slow_changing_accuracy': float(accs.get('acc_test_slow_changing', 0)), | |
| 'never_changing_accuracy': float(accs.get('acc_test_never_changing', 0)), | |
| 'acc_vp': float(accs.get('acc_test_vp', 0)), | |
| 'acc_fp': float(accs.get('acc_test_fp', 0)), | |
| 'acc_vp_one_hop': float(accs.get('acc_test_vp_one_hop', 0)), | |
| 'acc_vp_two_hop': float(accs.get('acc_test_vp_two_hop', 0)), | |
| 'acc_fp_one_hop': float(accs.get('acc_test_fp_one_hop', 0)), | |
| 'acc_fp_two_hop': float(accs.get('acc_test_fp_two_hop', 0)), | |
| 'acc_vp_old': float(accs.get('acc_test_vp_old', 0)), | |
| 'acc_vp_new': float(accs.get('acc_test_vp_new', 0)), | |
| 'acc_fp_old': float(accs.get('acc_test_fp_old', 0)), | |
| 'acc_fp_new': float(accs.get('acc_test_fp_new', 0)), | |
| # ๋๋ฉ์ธ๋ณ ์ ํ๋ ์ถ๊ฐ (test ๊ฒฐ๊ณผ๋ง ์ฌ์ฉ) | |
| 'acc_politics': float(accs.get('acc_test_politics', 0)), | |
| 'acc_sports': float(accs.get('acc_test_sports', 0)), | |
| 'acc_entertainment': float(accs.get('acc_test_entertainment', 0)), | |
| 'acc_weather': float(accs.get('acc_test_weather', 0)), | |
| 'acc_world': float(accs.get('acc_test_world', 0)), | |
| 'acc_economy': float(accs.get('acc_test_economy', 0)), | |
| 'acc_society': float(accs.get('acc_test_society', 0)), | |
| 'acc_it_science': float(accs.get('acc_test_it_science', 0)), | |
| 'acc_life_culture': float(accs.get('acc_test_life_culture', 0)), | |
| 'acc_unknown': float(accs.get('acc_test_unknown', 0)), | |
| 'total_questions': int(accs.get('acc_test', 0)), | |
| 'evaluation_date': get_current_datetime_str(), | |
| 'evaluation_mode': mode | |
| } | |
| return result | |
| def _save_leaderboard( | |
| self, | |
| name: str, | |
| submit_model: str, | |
| submit_description: Optional[str], | |
| relaxed_accs: dict, | |
| strict_accs: dict | |
| ): | |
| rows = [ | |
| self._build_leaderboard_rows(name, submit_model, submit_description, 'Relaxed', relaxed_accs), | |
| self._build_leaderboard_rows(name, submit_model, submit_description, 'Strict', strict_accs), | |
| ] | |
| try: | |
| append_to_leaderboard_data(rows) | |
| except Exception as e: | |
| print(f"โ ๏ธ ๋ฆฌ๋๋ณด๋ ์ ์ฅ ์คํจ: {e}") | |
| # --------- 9) ๊ณต๊ฐ ์๋ํฌ์ธํธ(ํต์ฌ) ---------- | |
| def process_submission( | |
| self, | |
| file, | |
| name: str, | |
| submit_model: str, | |
| submit_description: str, | |
| user_id: Optional[str] = None, | |
| progress: gr.Progress = gr.Progress(), | |
| ) -> str: | |
| """ | |
| ์ ์ถ ํ์ผ ์ฒ๋ฆฌ ๋ฐ ํ๊ฐ | |
| - ๋ด๋ถ helper๋ Result ๊ธฐ๋ฐ์ผ๋ก ๋ฆฌํด | |
| - ์ต์ข Gradio ์ถ๋ ฅ์ ๋ฌธ์์ด(๊ธฐ์กด ํธํ) | |
| """ | |
| start = time.time() | |
| normalized_model = (submit_model or "").strip() or "Anonymous Model" | |
| normalized_description_raw = (submit_description or "").strip() | |
| normalized_description = normalized_description_raw if normalized_description_raw else None | |
| # 1) ์ ์ถ ์ ํ ํ์ธ | |
| tracker: Optional[SubmissionTracker] = None | |
| if self.enable_limit: | |
| # ๋ก๊ทธ์ธ ๊ธฐ๋ฐ ์ ์ถ ์ ์ฝ: user_id ์์ผ๋ฉด ๋ฐ๋ก ์๋ฌ | |
| if not user_id: | |
| return "โ HuggingFace ๋ก๊ทธ์ธ ์ํ์์๋ง ์ ์ถ ๊ฐ๋ฅํฉ๋๋ค. ๋จผ์ ๋ก๊ทธ์ธ ํ ๋ค์ ์๋ํด์ฃผ์ธ์." | |
| tracker = self.tracker or get_submission_tracker() | |
| if tracker is not None: | |
| self.tracker = tracker | |
| if self.enable_limit and tracker: | |
| try: | |
| can_submit, message, remaining = tracker.can_submit(user_id=user_id) | |
| if not can_submit: | |
| return f"โ ์ ์ถ ์ ํ: {message}" | |
| except Exception as e: | |
| return f"โ ์ ์ถ ์ ํ ํ์ธ ์คํจ: {e}" | |
| # 2) ํ์ผ ๊ฒ์ฆ | |
| progress(0.05, desc="์ ์ถ ํ์ผ ๊ฒ์ฆ ์ค...") | |
| v = self._validate_submission_file(file) | |
| if not v.ok: | |
| return v.error or "โ ์ ์ถ ํ์ผ ๊ฒ์ฆ ์คํจ" | |
| # 3) ๋ก๋ | |
| progress(0.1, desc="๊ธฐ์ค ๋ฐ์ดํฐ ๋ก๋ ์ค...") | |
| loaded = self._load_submission_df(file) | |
| if not loaded.ok: | |
| return loaded.error or "โ CSV ๋ก๋ฉ ์คํจ" | |
| submission_df: pd.DataFrame = loaded.data | |
| # 4) ๋ณํฉ | |
| progress(0.15, desc="๊ธฐ์ค ๋ฐ์ดํฐ์ ๋ณํฉ ์ค...") | |
| mg = self._merge_with_base(submission_df, file.name) | |
| if not mg.ok: | |
| return mg.error or "โ ๊ธฐ์ค ๋ฐ์ดํฐ ๋ณํฉ ์คํจ" | |
| merged_df: pd.DataFrame = mg.data | |
| # 5) ํ๊ฐ (0.15 ~ 0.9 ๊ตฌ๊ฐ ์งํ๋ฅ ๋งคํ) | |
| progress(0.15, desc="FreshQA ํ๊ฐ ์ค๋น ์ค...") | |
| def on_inner_progress(done: int, total: int, desc: str): | |
| frac = 0.15 + 0.75 * (done / max(total, 1)) | |
| progress(frac, desc=desc) | |
| ev = self._evaluate_freshqa(merged_df, on_progress=on_inner_progress) | |
| if not ev.ok: | |
| # ์คํจ ๊ธฐ๋ก | |
| if self.enable_limit and tracker and user_id: | |
| try: | |
| tracker.record_submission( | |
| user_id=user_id, | |
| submitter_name=name, | |
| file_name=os.path.basename(file.name), | |
| success=False, | |
| error_message=ev.error or "ํ๊ฐ ์คํจ", | |
| submit_model=normalized_model, | |
| submit_description=normalized_description, | |
| ) | |
| except Exception: | |
| pass | |
| return ev.error or "โ ํ๊ฐ ์ค ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค" | |
| relaxed_df, strict_df = ev.data # type: ignore[assignment] | |
| # 6) ๊ฒฐ๊ณผ ์ง๊ณ | |
| progress(0.8, desc="ํ๊ฐ ๊ฒฐ๊ณผ ๋ถ์ ์ค...") | |
| r = self._calculate_accuracy(relaxed_df) | |
| if not r.ok: | |
| if self.enable_limit and tracker and user_id: | |
| try: | |
| tracker.record_submission( | |
| user_id=user_id, | |
| submitter_name=name, | |
| file_name=os.path.basename(file.name), | |
| success=False, | |
| error_message=r.error or "์ง๊ณ ์คํจ", | |
| submit_model=normalized_model, | |
| submit_description=normalized_description, | |
| ) | |
| except Exception: | |
| pass | |
| return r.error or "โ ๊ฒฐ๊ณผ ์ง๊ณ ์คํจ" | |
| s = self._calculate_accuracy(strict_df) | |
| if not s.ok: | |
| if self.enable_limit and tracker and user_id: | |
| try: | |
| tracker.record_submission( | |
| user_id=user_id, | |
| submitter_name=name, | |
| file_name=os.path.basename(file.name), | |
| success=False, | |
| error_message=s.error or "์ง๊ณ ์คํจ", | |
| submit_model=normalized_model, | |
| submit_description=normalized_description, | |
| ) | |
| except Exception: | |
| pass | |
| return s.error or "โ ๊ฒฐ๊ณผ ์ง๊ณ ์คํจ" | |
| relaxed_processed, relaxed_accs, relaxed_counts = r.data # type: ignore[misc] | |
| strict_processed, strict_accs, strict_counts = s.data # type: ignore[misc] | |
| # 7) ์์ฝ/ํ | |
| relaxed_table = self._create_detailed_results_table(relaxed_accs, relaxed_counts) | |
| strict_table = self._create_detailed_results_table(strict_accs, strict_counts) | |
| result_summary = self._get_result_summary( | |
| file_name=file.name if file else "", | |
| name=name, | |
| relaxed_accs=relaxed_accs, | |
| strict_accs=strict_accs, | |
| relaxed_table=relaxed_table, | |
| strict_table=strict_table, | |
| ) | |
| # 8) ์ ์ถ ์ฑ๊ณต ๊ธฐ๋ก ๋ฐ ๋ฆฌ๋๋ณด๋ ์ ์ฅ | |
| if self.enable_limit and tracker and user_id: | |
| progress(0.85, desc="์ ์ถ ๋ด์ญ ์ ์ฅ ์ค...") | |
| save_ok = tracker.record_submission( | |
| user_id=user_id, | |
| submitter_name=name, | |
| file_name=os.path.basename(file.name), | |
| success=True, | |
| submit_model=normalized_model, | |
| submit_description=normalized_description, | |
| ) | |
| # save_ok ์คํจํด๋ ํ๊ฐ ๊ฒฐ๊ณผ๋ ๋ฆฌ๋๋ณด๋์ ๋ฐ์ | |
| progress(0.9, desc="๋ฆฌ๋๋ณด๋ ์ ๋ฐ์ดํธ ์ค...") | |
| self._save_leaderboard(name, normalized_model, normalized_description, relaxed_accs, strict_accs) | |
| else: | |
| self._save_leaderboard(name, normalized_model, normalized_description, relaxed_accs, strict_accs) | |
| # 9) ๊ฒฐ๊ณผ ๋ฌธ์์ด ๊ตฌ์ฑ | |
| progress(1.0, desc="์๋ฃ") | |
| return result_summary | |
| # ------------------------- | |
| # ๋ชจ๋-๋ ๋ฒจ ์ํธ๋ฆฌํฌ์ธํธ (๊ธฐ์กด UI ํธํ) | |
| # ------------------------- | |
| def process_submission( | |
| file, | |
| name: str, | |
| submit_model: str, | |
| submit_description: str, | |
| user_id: Optional[str] = None, | |
| progress: gr.Progress = gr.Progress(), | |
| ) -> str: | |
| """ | |
| Gradio์์ ์ง์ ํธ์ถํ๋ ์ํธ๋ฆฌํฌ์ธํธ. | |
| ๋ด๋ถ์ ์ผ๋ก DI๋ฅผ ์ ์ฉํ SubmissionHandler๋ฅผ ์์ฑํด ํธ์ถํ๋ค. | |
| """ | |
| tracker = get_submission_tracker() if Config.ENABLE_SUBMISSION_LIMIT else None | |
| handler = SubmissionHandler(tracker=tracker, cfg=Config) | |
| try: | |
| return handler.process_submission( | |
| file=file, | |
| name=name, | |
| submit_model=submit_model, | |
| submit_description=submit_description, | |
| user_id=user_id, | |
| progress=progress, | |
| ) | |
| except Exception as e: | |
| # ์ต์์ ๋ณดํธ๋ง: ์์์น ๋ชปํ ์์ธ๋ ์ฌ์ฉ์ ์นํ์ ์ผ๋ก ๋ฐํ | |
| try: | |
| if handler.enable_limit and handler.tracker and user_id: | |
| handler.tracker.record_submission( | |
| user_id=user_id, | |
| submitter_name=name, | |
| file_name=os.path.basename(file.name) if file else "(unknown)", | |
| success=False, | |
| error_message=str(e), | |
| submit_model=(submit_model or "").strip() or "Anonymous Model", | |
| submit_description=(submit_description or "").strip() or None, | |
| ) | |
| except Exception: | |
| # ๊ธฐ๋ก ์คํจ๋ ์กฐ์ฉํ ๋ฌด์ | |
| pass | |
| total_time = 0.0 # ์๋จ์์ ์ธก์ ํ์ง ๋ชปํ์ ์ ์์ผ๋ฏ๋ก 0์ผ๋ก | |
| error_message = str(e) | |
| return ( | |
| "โ ํ๊ฐ ์คํจ\n\n" | |
| "์ค๋ฅ ๋ด์ฉ:\n" | |
| f"{error_message}\n\n" | |
| f"์์ ์๊ฐ: {total_time:.2f}์ด ({total_time/60:.2f}๋ถ)\n\n" | |
| "์ ์ถ์ ์ ์์ ์ผ๋ก ์ฒ๋ฆฌ๋์์ง๋ง, ํ๊ฐ ๊ณผ์ ์์ ์ค๋ฅ๊ฐ ๋ฐ์ํ์ต๋๋ค.\n" | |
| "์ ์ถ ๊ธฐ๋ก์ ์ ์ฅ๋์์ต๋๋ค." | |
| ) | |