ko-freshqa-leaderboard / src /leaderboard_manager.py
jisubae
feat: Add optional HF dataset sync for leaderboard
4a43fed
"""
๋ฆฌ๋”๋ณด๋“œ ๊ด€๋ฆฌ ๋ชจ๋“ˆ
๋ฆฌ๋”๋ณด๋“œ ๋ฐ์ดํ„ฐ์˜ ๋กœ๋“œ, ์ €์žฅ, ํ‘œ์‹œ ์ค€๋น„๋ฅผ ๋‹ด๋‹นํ•ฉ๋‹ˆ๋‹ค.
- ๋กœ์ปฌ CSV: ํ”„๋กœ์ ํŠธ ๋ฃจํŠธ์˜ data/leaderboard_results.csv
- ์„ ํƒ์  HF ์—ฐ๋™:
- repo_id: Config.FRESHQA_DATA_REPO_ID
- token : Config.HF_TOKEN
- ํŒŒ์ผ๋ช… : leaderboard_results.csv (repo ๋ฃจํŠธ)
- Config.UPLOAD_LEADERBOARD_TO_HF == True ์ผ ๋•Œ๋งŒ HF๋ฅผ ์ฝ๊ณ /์“ด๋‹ค.
"""
import os
import time
import tempfile
from typing import Optional
import pandas as pd
from huggingface_hub import HfApi, hf_hub_download
from config import Config
from src.utils import file_lock
# -------------------------
# ์ƒ์ˆ˜ ๋ฐ ์„ค์ •
# -------------------------
HF_LEADERBOARD_FILENAME = "leaderboard_results.csv" # HF dataset ๋‚ด ํŒŒ์ผ๋ช… (๋ฃจํŠธ)
LOCAL_LEADERBOARD_FILENAME = "leaderboard_results.csv" # ๋กœ์ปฌ data ํด๋” ๋‚ด ํŒŒ์ผ๋ช… (๊ธฐ์กด ์œ ์ง€)
HF_REPO_ID = Config.FRESHQA_DATA_REPO_ID
HF_ADMIN_TOKEN = Config.HF_TOKEN
UPLOAD_LEADERBOARD_TO_HF = Config.UPLOAD_LEADERBOARD_TO_HF
hf_api = HfApi()
# -------------------------
# ๊ฒฝ๋กœ/์ดˆ๊ธฐ ์Šคํ‚ค๋งˆ/์ •๊ทœํ™” ํ—ฌํผ
# -------------------------
def _get_local_leaderboard_path() -> str:
"""ํ”„๋กœ์ ํŠธ ๋ฃจํŠธ ๊ธฐ์ค€ ๋กœ์ปฌ ๋ฆฌ๋”๋ณด๋“œ CSV ๊ฒฝ๋กœ ๋ฐ˜ํ™˜."""
current_dir = os.path.dirname(os.path.abspath(__file__)) # src/ ํด๋”
project_root = os.path.dirname(current_dir) # ํ”„๋กœ์ ํŠธ ๋ฃจํŠธ
return os.path.join(project_root, "data", LOCAL_LEADERBOARD_FILENAME)
def _init_empty_leaderboard_df() -> pd.DataFrame:
"""์ดˆ๊ธฐ ๋นˆ ๋ฆฌ๋”๋ณด๋“œ ์Šคํ‚ค๋งˆ DataFrame."""
return pd.DataFrame({
"id": [],
"model": [],
"description": [],
"accuracy": [],
"fast_changing_accuracy": [],
"slow_changing_accuracy": [],
"never_changing_accuracy": [],
"acc_vp": [],
"acc_fp": [],
"acc_vp_one_hop": [],
"acc_vp_two_hop": [],
"acc_fp_one_hop": [],
"acc_fp_two_hop": [],
"acc_vp_old": [],
"acc_vp_new": [],
"acc_fp_old": [],
"acc_fp_new": [],
"acc_politics": [],
"acc_sports": [],
"acc_entertainment": [],
"acc_weather": [],
"acc_world": [],
"acc_economy": [],
"acc_society": [],
"acc_it_science": [],
"acc_life_culture": [],
"acc_unknown": [],
"total_questions": [],
"evaluation_date": [],
"evaluation_mode": [],
})
def _normalize_leaderboard_df(df: pd.DataFrame) -> pd.DataFrame:
"""
๋ฆฌ๋”๋ณด๋“œ DF๋ฅผ ์Šคํ‚ค๋งˆ/์ •๋ ฌ/์ปฌ๋Ÿผ ์ˆœ์„œ ๊ธฐ์ค€์— ๋งž์ถฐ ์ •๊ทœํ™”ํ•œ๋‹ค.
(๊ธฐ์กด load_leaderboard_data์˜ ๋กœ์ง์„ ํ•จ์ˆ˜๋กœ ๋ถ„๋ฆฌ)
"""
if df is None or df.empty:
return _init_empty_leaderboard_df()
df = df.copy()
# evaluation_mode๊ฐ€ ์—†์œผ๋ฉด ์ถ”๊ฐ€
if "evaluation_mode" not in df.columns:
df["evaluation_mode"] = "Unknown"
# ํ…์ŠคํŠธ ์ปฌ๋Ÿผ ๋ณด์ •
text_columns = ["model", "description"]
for col in text_columns:
if col not in df.columns:
df[col] = pd.Series(dtype="object")
# ์ƒ์„ธ ๋ถ„์„ ์ปฌ๋Ÿผ ์—†์œผ๋ฉด ์ถ”๊ฐ€
detailed_columns = [
"acc_test", "acc_dev", "acc_vp", "acc_fp", "acc_vp_one_hop", "acc_vp_two_hop",
"acc_fp_one_hop", "acc_fp_two_hop", "acc_vp_old", "acc_vp_new", "acc_fp_old", "acc_fp_new",
]
for col in detailed_columns:
if col not in df.columns:
df[col] = 0.0
# ๋„๋ฉ”์ธ๋ณ„ ์ •ํ™•๋„ ์ปฌ๋Ÿผ ์—†์œผ๋ฉด ์ถ”๊ฐ€
domain_columns = [
"acc_politics", "acc_sports", "acc_entertainment",
"acc_weather", "acc_world", "acc_economy",
"acc_society", "acc_it_science", "acc_life_culture", "acc_unknown",
]
for col in domain_columns:
if col not in df.columns:
df[col] = 0.0
# accuracy ๊ธฐ์ค€ ์ •๋ ฌ
if "accuracy" in df.columns and not df.empty:
df = df.sort_values("accuracy", ascending=False).reset_index(drop=True)
# ์ปฌ๋Ÿผ ์ˆœ์„œ ์ •๋ ฌ (rank ์ œ์™ธ)
column_order = [
"id", "model", "description", "accuracy", "fast_changing_accuracy",
"slow_changing_accuracy", "never_changing_accuracy", "acc_vp", "acc_fp",
"acc_vp_one_hop", "acc_vp_two_hop", "acc_fp_one_hop", "acc_fp_two_hop",
"acc_vp_old", "acc_vp_new", "acc_fp_old", "acc_fp_new",
"acc_politics", "acc_sports", "acc_entertainment", "acc_weather",
"acc_world", "acc_economy", "acc_society", "acc_it_science",
"acc_life_culture", "acc_unknown", "total_questions",
"evaluation_date", "evaluation_mode",
]
available_columns = [col for col in column_order if col in df.columns]
df = df[available_columns]
return df
def _load_local_leaderboard_df() -> pd.DataFrame:
"""๋กœ์ปฌ CSV์—์„œ ๋ฆฌ๋”๋ณด๋“œ ๋กœ๋“œ (์—†์œผ๋ฉด ๋นˆ ์Šคํ‚ค๋งˆ)."""
data_path = _get_local_leaderboard_path()
try:
df = pd.read_csv(data_path)
return _normalize_leaderboard_df(df)
except FileNotFoundError:
return _init_empty_leaderboard_df()
except Exception as e:
print(f"โš ๏ธ ๋กœ์ปฌ ๋ฆฌ๋”๋ณด๋“œ ๋กœ๋“œ ์‹คํŒจ: {e}")
return _init_empty_leaderboard_df()
# -------------------------
# HF ์—ฐ๋™ ํ—ฌํผ
# -------------------------
def _can_use_hf() -> bool:
"""HF ์—ฐ๋™์ด ๊ฐ€๋Šฅํ•œ ์ƒํƒœ์ธ์ง€ ์—ฌ๋ถ€ (Config ๊ธฐ๋ฐ˜)."""
if not UPLOAD_LEADERBOARD_TO_HF:
return False
if not HF_REPO_ID or not HF_ADMIN_TOKEN:
# ์„ค์ •์ด ์—†์œผ๋ฉด HF๋Š” ๊ฑด๋„ˆ๋œ€
return False
return True
def _load_leaderboard_from_hf(retries: int = 3, delay: float = 1.0) -> Optional[pd.DataFrame]:
"""
HF dataset์—์„œ ๋ฆฌ๋”๋ณด๋“œ CSV๋ฅผ ๋‹ค์šด๋กœ๋“œํ•˜์—ฌ DataFrame์œผ๋กœ ๋ฐ˜ํ™˜.
์‹คํŒจ ์‹œ None ๋ฐ˜ํ™˜. ์žฌ์‹œ๋„ ๋กœ์ง ํฌํ•จ.
"""
if not _can_use_hf():
return None
last_err: Optional[Exception] = None
for attempt in range(1, retries + 1):
try:
with tempfile.TemporaryDirectory() as tmpdir:
file_path = hf_hub_download(
repo_id=HF_REPO_ID,
filename=HF_LEADERBOARD_FILENAME,
repo_type="dataset",
local_dir=tmpdir,
token=HF_ADMIN_TOKEN,
)
df = pd.read_csv(file_path)
return _normalize_leaderboard_df(df)
except Exception as e:
last_err = e
print(f"โš ๏ธ HF ๋ฆฌ๋”๋ณด๋“œ ๋กœ๋“œ ์‹คํŒจ (์‹œ๋„ {attempt}/{retries}): {e}")
if attempt < retries:
time.sleep(delay)
delay *= 2
print("โŒ HF ๋ฆฌ๋”๋ณด๋“œ ๋กœ๋“œ ์žฌ์‹œ๋„ ๋ชจ๋‘ ์‹คํŒจ")
return None
def _save_leaderboard_to_hf(df: pd.DataFrame, retries: int = 3, delay: float = 1.0) -> bool:
"""
HF dataset์— ๋ฆฌ๋”๋ณด๋“œ CSV ์—…๋กœ๋“œ.
์‹คํŒจ ์‹œ False ๋ฐ˜ํ™˜. ์žฌ์‹œ๋„ ๋กœ์ง ํฌํ•จ.
"""
if not _can_use_hf():
return False
df = _normalize_leaderboard_df(df)
last_err: Optional[Exception] = None
for attempt in range(1, retries + 1):
try:
with tempfile.NamedTemporaryFile(
mode="w",
encoding="utf-8",
suffix=".csv",
delete=False,
) as tmpfile:
df.to_csv(tmpfile.name, index=False)
tmp_path = tmpfile.name
hf_api.upload_file(
path_or_fileobj=tmp_path,
path_in_repo=HF_LEADERBOARD_FILENAME,
repo_id=HF_REPO_ID,
repo_type="dataset",
token=HF_ADMIN_TOKEN,
commit_message="Update leaderboard results",
)
os.unlink(tmp_path)
return True
except Exception as e:
last_err = e
print(f"โš ๏ธ HF ๋ฆฌ๋”๋ณด๋“œ ์—…๋กœ๋“œ ์‹คํŒจ (์‹œ๋„ {attempt}/{retries}): {e}")
if attempt < retries:
time.sleep(delay)
delay *= 2
print(f"โŒ HF ๋ฆฌ๋”๋ณด๋“œ ์—…๋กœ๋“œ ์žฌ์‹œ๋„ ๋ชจ๋‘ ์‹คํŒจ: {last_err}")
return False
# -------------------------
# ๊ณต๊ฐœ API: ๋กœ๋“œ / ์ถ”๊ฐ€
# -------------------------
def load_leaderboard_data() -> pd.DataFrame:
"""
๋ฆฌ๋”๋ณด๋“œ ๋ฐ์ดํ„ฐ ๋กœ๋“œ.
๋™์ž‘ ์šฐ์„ ์ˆœ์œ„:
1) Config.UPLOAD_LEADERBOARD_TO_HF == True && HF ์„ค์ • OK:
- HF์—์„œ ์ตœ์‹  ๋ฆฌ๋”๋ณด๋“œ ๋กœ๋“œ ์‹œ๋„
- ์„ฑ๊ณต ์‹œ: ๊ทธ ๋‚ด์šฉ์„ ๋กœ์ปฌ CSV์— ๋ฎ์–ด์“ด ๋’ค ๋ฐ˜ํ™˜
- ์‹คํŒจ ์‹œ: ๋กœ์ปฌ CSV๋ฅผ ์‚ฌ์šฉ (์—†์œผ๋ฉด ๋นˆ ์Šคํ‚ค๋งˆ)
2) ๊ทธ ์™ธ:
- ๋กœ์ปฌ CSV๋งŒ ์‚ฌ์šฉ (์—†์œผ๋ฉด ๋นˆ ์Šคํ‚ค๋งˆ)
"""
data_path = _get_local_leaderboard_path()
lock_path = data_path + ".lock"
# HF๋ฅผ ์‚ฌ์šฉํ•  ์ˆ˜ ์žˆ๋Š” ๊ฒฝ์šฐ์—๋งŒ HF ์šฐ์„  ์‹œ๋„
if _can_use_hf():
with file_lock(lock_path):
hf_df = _load_leaderboard_from_hf()
if hf_df is not None:
# HF๊ฐ€ ์†Œ์Šค ์˜ค๋ธŒ ํŠธ๋ฃจ์Šค: ๋กœ์ปฌ CSV๋„ HF ๊ธฐ์ค€์œผ๋กœ ๋™๊ธฐํ™”
try:
os.makedirs(os.path.dirname(data_path), exist_ok=True)
hf_df.to_csv(data_path, index=False)
except Exception as e:
print(f"โš ๏ธ ๋กœ์ปฌ ๋ฆฌ๋”๋ณด๋“œ ๋™๊ธฐํ™” ์‹คํŒจ: {e}")
return hf_df
# HF์—์„œ ๋ชป ๊ฐ€์ ธ์˜ค๋ฉด ๋กœ์ปฌ๋กœ ํด๋ฐฑ
local_df = _load_local_leaderboard_df()
return local_df
# HF๋ฅผ ์‚ฌ์šฉํ•˜์ง€ ์•Š๋Š” ๊ฒฝ์šฐ: ๋กœ์ปฌ๋งŒ
return _load_local_leaderboard_df()
def append_to_leaderboard_data(new_data_list):
"""
๋ฆฌ๋”๋ณด๋“œ ๋ฐ์ดํ„ฐ์— ์ƒˆ๋กœ์šด ๊ฒฐ๊ณผ ์ถ”๊ฐ€ (ํŒŒ์ผ ์ž ๊ธˆ ์‚ฌ์šฉ).
- ํ•ญ์ƒ ๋กœ์ปฌ CSV๋ฅผ ์—…๋ฐ์ดํŠธ
- Config.UPLOAD_LEADERBOARD_TO_HF == True ์ด๊ณ  HF ์„ค์ •์ด ์œ ํšจํ•˜๋ฉด,
์—…๋ฐ์ดํŠธ๋œ ์ „์ฒด DF๋ฅผ HF์—๋„ ์—…๋กœ๋“œ (์žฌ์‹œ๋„ ํฌํ•จ).
"""
data_path = _get_local_leaderboard_path()
lock_path = data_path + ".lock"
with file_lock(lock_path):
# 1) ๋กœ์ปฌ ๊ธฐ์กด ๋ฐ์ดํ„ฐ ๋กœ๋“œ
if os.path.exists(data_path):
try:
existing_df = pd.read_csv(data_path)
except Exception as e:
print(f"โš ๏ธ ๋กœ์ปฌ ๋ฆฌ๋”๋ณด๋“œ ์ฝ๊ธฐ ์‹คํŒจ, ๋นˆ ์Šคํ‚ค๋งˆ๋กœ ์ง„ํ–‰: {e}")
existing_df = _init_empty_leaderboard_df()
else:
existing_df = _init_empty_leaderboard_df()
existing_df = _normalize_leaderboard_df(existing_df)
# 2) ์ƒˆ๋กœ์šด ๋ฐ์ดํ„ฐ ์ถ”๊ฐ€
new_df = pd.DataFrame(new_data_list)
if not new_df.empty:
new_df = _normalize_leaderboard_df(new_df)
frames_to_concat = []
if not existing_df.empty:
frames_to_concat.append(existing_df)
if not new_df.empty:
frames_to_concat.append(new_df)
if len(frames_to_concat) == 0:
combined_df = existing_df.copy()
elif len(frames_to_concat) == 1:
combined_df = frames_to_concat[0].copy()
else:
combined_df = pd.concat(frames_to_concat, ignore_index=True)
combined_df = _normalize_leaderboard_df(combined_df)
# 3) ๋กœ์ปฌ ์ €์žฅ
try:
os.makedirs(os.path.dirname(data_path), exist_ok=True)
combined_df.to_csv(data_path, index=False)
except Exception as e:
print(f"โŒ ๋กœ์ปฌ ๋ฆฌ๋”๋ณด๋“œ ์ €์žฅ ์‹คํŒจ: {e}")
# 4) HF์—๋„ ์—…๋กœ๋“œ (์˜ต์…˜)
if _can_use_hf():
ok = _save_leaderboard_to_hf(combined_df)
if not ok:
print("โš ๏ธ ๋ฆฌ๋”๋ณด๋“œ HF ์—…๋กœ๋“œ ์‹คํŒจ (๋กœ์ปฌ์—๋Š” ์ €์žฅ๋จ)")
return combined_df