Spaces:
Running
Running
Commit
·
c797bf2
1
Parent(s):
3f46491
Add 'Reasoning' tag for model names
Browse files- app.py +3 -3
- results/parse.py +56 -7
- results/results_icarus.json +0 -0
- results/results_verilator.json +0 -0
- utils.py +27 -9
app.py
CHANGED
|
@@ -291,14 +291,14 @@ with gr.Blocks(
|
|
| 291 |
show_row_numbers=True,
|
| 292 |
wrap=True,
|
| 293 |
datatype=[
|
| 294 |
-
"
|
| 295 |
"html",
|
| 296 |
],
|
| 297 |
interactive=False,
|
| 298 |
column_widths=[
|
| 299 |
"7%",
|
| 300 |
-
"
|
| 301 |
-
"
|
| 302 |
"10%",
|
| 303 |
"13%",
|
| 304 |
"10%",
|
|
|
|
| 291 |
show_row_numbers=True,
|
| 292 |
wrap=True,
|
| 293 |
datatype=[
|
| 294 |
+
"html",
|
| 295 |
"html",
|
| 296 |
],
|
| 297 |
interactive=False,
|
| 298 |
column_widths=[
|
| 299 |
"7%",
|
| 300 |
+
"28%",
|
| 301 |
+
"13%",
|
| 302 |
"10%",
|
| 303 |
"13%",
|
| 304 |
"10%",
|
results/parse.py
CHANGED
|
@@ -13,177 +13,224 @@ model_details = {
|
|
| 13 |
685,
|
| 14 |
"General",
|
| 15 |
"V2",
|
|
|
|
| 16 |
),
|
| 17 |
"DeepSeek R1": (
|
| 18 |
"https://huggingface.co/deepseek-ai/DeepSeek-R1",
|
| 19 |
685,
|
| 20 |
"General",
|
| 21 |
"V1",
|
|
|
|
| 22 |
),
|
| 23 |
"Llama 3.1 405B": (
|
| 24 |
"https://huggingface.co/RedHatAI/Meta-Llama-3.1-405B-FP8",
|
| 25 |
406,
|
| 26 |
"General",
|
| 27 |
"V1",
|
|
|
|
| 28 |
),
|
| 29 |
"Qwen3 236B A22B": (
|
| 30 |
"https://huggingface.co/Qwen/Qwen3-235B-A22B",
|
| 31 |
235,
|
| 32 |
"General",
|
| 33 |
"V2",
|
|
|
|
| 34 |
),
|
| 35 |
"Llama 3.(1-3) 70B": (
|
| 36 |
"https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct",
|
| 37 |
70.6,
|
| 38 |
"General",
|
| 39 |
"V1",
|
|
|
|
| 40 |
),
|
| 41 |
"Qwen2.5 72B": (
|
| 42 |
"https://huggingface.co/Qwen/Qwen2.5-72B-Instruct",
|
| 43 |
72.7,
|
| 44 |
"General",
|
| 45 |
"V1",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
),
|
| 47 |
-
"QwQ 32B": ("https://huggingface.co/Qwen/QwQ-32B", 32.8, "General", "V2"),
|
| 48 |
-
"Qwen2.5 32B": ("https://huggingface.co/Qwen/Qwen2.5-32B", 32.5, "General", "V1"),
|
| 49 |
"StarChat2 15B v0.1": (
|
| 50 |
"https://huggingface.co/HuggingFaceH4/starchat2-15b-v0.1",
|
| 51 |
16,
|
| 52 |
"General",
|
| 53 |
"V1",
|
|
|
|
| 54 |
),
|
| 55 |
"DeepSeek R1 Distill Qwen 14B": (
|
| 56 |
"https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
|
| 57 |
14.8,
|
| 58 |
"General",
|
| 59 |
"V1",
|
|
|
|
| 60 |
),
|
| 61 |
"CodeLlama 70B": (
|
| 62 |
"https://huggingface.co/codellama/CodeLlama-70b-hf",
|
| 63 |
69,
|
| 64 |
"Coding",
|
| 65 |
"V1",
|
|
|
|
| 66 |
),
|
| 67 |
"QwenCoder 2.5 32B": (
|
| 68 |
"https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct",
|
| 69 |
32.5,
|
| 70 |
"Coding",
|
| 71 |
"V1",
|
|
|
|
| 72 |
),
|
| 73 |
"DeepSeek Coder 33B": (
|
| 74 |
"https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct",
|
| 75 |
33.3,
|
| 76 |
"Coding",
|
| 77 |
"V1",
|
|
|
|
| 78 |
),
|
| 79 |
"QwenCoder 2.5 14B": (
|
| 80 |
"https://huggingface.co/Qwen/Qwen2.5-Coder-14B-Instruct",
|
| 81 |
14.7,
|
| 82 |
"Coding",
|
| 83 |
"V1",
|
|
|
|
| 84 |
),
|
| 85 |
"DeepCoder 14B": (
|
| 86 |
"https://huggingface.co/agentica-org/DeepCoder-14B-Preview",
|
| 87 |
14.8,
|
| 88 |
"Coding",
|
| 89 |
"V2",
|
|
|
|
| 90 |
),
|
| 91 |
"OpenCoder 8B": (
|
| 92 |
"https://huggingface.co/infly/OpenCoder-8B-Instruct",
|
| 93 |
7.77,
|
| 94 |
"Coding",
|
| 95 |
"V1",
|
|
|
|
| 96 |
),
|
| 97 |
"SeedCoder 8B": (
|
| 98 |
"https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Instruct",
|
| 99 |
8.25,
|
| 100 |
"Coding",
|
| 101 |
"V2",
|
|
|
|
| 102 |
),
|
| 103 |
"SeedCoder 8B Reasoning": (
|
| 104 |
"https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Reasoning-bf16",
|
| 105 |
8.25,
|
| 106 |
"Coding",
|
| 107 |
"V2",
|
|
|
|
| 108 |
),
|
| 109 |
"QwenCoder 2.5 7B": (
|
| 110 |
"https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct",
|
| 111 |
7.61,
|
| 112 |
"Coding",
|
| 113 |
"V1",
|
|
|
|
| 114 |
),
|
| 115 |
"DeepSeek Coder 6.7B": (
|
| 116 |
"https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct",
|
| 117 |
6.74,
|
| 118 |
"Coding",
|
| 119 |
"V1",
|
|
|
|
| 120 |
),
|
| 121 |
"HaVen-CodeQwen": (
|
| 122 |
"https://huggingface.co/yangyiyao/HaVen-CodeQwen",
|
| 123 |
7.25,
|
| 124 |
"RTL-Specific",
|
| 125 |
"V1",
|
|
|
|
| 126 |
),
|
| 127 |
"CodeV R1 Distill Qwen 7B": (
|
| 128 |
"https://huggingface.co/zhuyaoyu/CodeV-R1-Distill-Qwen-7B",
|
| 129 |
7.62,
|
| 130 |
"RTL-Specific",
|
| 131 |
"V2",
|
|
|
|
| 132 |
),
|
| 133 |
"CodeV-CL-7B": (
|
| 134 |
"https://huggingface.co/yang-z/CodeV-CL-7B",
|
| 135 |
6.74,
|
| 136 |
"RTL-Specific",
|
| 137 |
"V1",
|
|
|
|
| 138 |
),
|
| 139 |
"CodeV-QW-7B": (
|
| 140 |
"https://huggingface.co/yang-z/CodeV-QW-7B",
|
| 141 |
7.25,
|
| 142 |
"RTL-Specific",
|
| 143 |
"V1",
|
|
|
|
| 144 |
),
|
| 145 |
"CodeV-DS-6.7B": (
|
| 146 |
"https://huggingface.co/yang-z/CodeV-DS-6.7B",
|
| 147 |
6.74,
|
| 148 |
"RTL-Specific",
|
| 149 |
"V1",
|
|
|
|
| 150 |
),
|
| 151 |
"RTLCoder Mistral": (
|
| 152 |
"https://huggingface.co/ishorn5/RTLCoder-v1.1",
|
| 153 |
7.24,
|
| 154 |
"RTL-Specific",
|
| 155 |
"V1",
|
|
|
|
| 156 |
),
|
| 157 |
"RTLCoder DeepSeek": (
|
| 158 |
"https://huggingface.co/ishorn5/RTLCoder-Deepseek-v1.1",
|
| 159 |
6.74,
|
| 160 |
"RTL-Specific",
|
| 161 |
"V1",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 162 |
),
|
| 163 |
-
"OriGen": ("https://huggingface.co/henryen/OriGen", 6.74, "RTL-Specific", "V1"),
|
| 164 |
"Qwen3 Coder 480B A35B": (
|
| 165 |
"https://huggingface.co/Qwen/Qwen3-Coder-480B-A35B-Instruct",
|
| 166 |
480,
|
| 167 |
"Coding",
|
| 168 |
"V2",
|
|
|
|
| 169 |
),
|
| 170 |
"Magistral Small 2506": (
|
| 171 |
"https://huggingface.co/mistralai/Magistral-Small-2506",
|
| 172 |
23.6,
|
| 173 |
"General",
|
| 174 |
"V2",
|
|
|
|
| 175 |
),
|
| 176 |
"gpt-oss-20b": (
|
| 177 |
"https://huggingface.co/openai/gpt-oss-20b",
|
| 178 |
21.5,
|
| 179 |
"General",
|
| 180 |
"V2",
|
|
|
|
| 181 |
),
|
| 182 |
"gpt-oss-120b": (
|
| 183 |
"https://huggingface.co/openai/gpt-oss-120b",
|
| 184 |
120,
|
| 185 |
"General",
|
| 186 |
"V2",
|
|
|
|
| 187 |
),
|
| 188 |
}
|
| 189 |
|
|
@@ -201,14 +248,15 @@ def get_headers(reader, agg=False) -> Union[list, list]:
|
|
| 201 |
return metrics, benchs
|
| 202 |
|
| 203 |
|
| 204 |
-
def get_model_params_and_url(model) -> Union[str, str, float, str]:
|
| 205 |
if model not in model_details:
|
| 206 |
-
return "-", 0.0, "
|
| 207 |
url = model_details[model][0]
|
| 208 |
params = model_details[model][1]
|
| 209 |
type = model_details[model][2]
|
| 210 |
release = model_details[model][3]
|
| 211 |
-
|
|
|
|
| 212 |
|
| 213 |
|
| 214 |
def parse_results(csv_path: str) -> list[dict]:
|
|
@@ -227,7 +275,7 @@ def parse_results(csv_path: str) -> list[dict]:
|
|
| 227 |
model = row[0]
|
| 228 |
if not model:
|
| 229 |
continue
|
| 230 |
-
url, params, type, release = get_model_params_and_url(model)
|
| 231 |
models.append(model)
|
| 232 |
row = row[1:]
|
| 233 |
ctr = 0
|
|
@@ -243,6 +291,7 @@ def parse_results(csv_path: str) -> list[dict]:
|
|
| 243 |
record["Model URL"] = url
|
| 244 |
record["Params"] = params
|
| 245 |
record["Release"] = release
|
|
|
|
| 246 |
dataset.append(record)
|
| 247 |
ctr += 1
|
| 248 |
print(models)
|
|
|
|
| 13 |
685,
|
| 14 |
"General",
|
| 15 |
"V2",
|
| 16 |
+
"Reasoning", # "Dense" or "Reasoning"
|
| 17 |
),
|
| 18 |
"DeepSeek R1": (
|
| 19 |
"https://huggingface.co/deepseek-ai/DeepSeek-R1",
|
| 20 |
685,
|
| 21 |
"General",
|
| 22 |
"V1",
|
| 23 |
+
"Reasoning",
|
| 24 |
),
|
| 25 |
"Llama 3.1 405B": (
|
| 26 |
"https://huggingface.co/RedHatAI/Meta-Llama-3.1-405B-FP8",
|
| 27 |
406,
|
| 28 |
"General",
|
| 29 |
"V1",
|
| 30 |
+
"Dense",
|
| 31 |
),
|
| 32 |
"Qwen3 236B A22B": (
|
| 33 |
"https://huggingface.co/Qwen/Qwen3-235B-A22B",
|
| 34 |
235,
|
| 35 |
"General",
|
| 36 |
"V2",
|
| 37 |
+
"Reasoning",
|
| 38 |
),
|
| 39 |
"Llama 3.(1-3) 70B": (
|
| 40 |
"https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct",
|
| 41 |
70.6,
|
| 42 |
"General",
|
| 43 |
"V1",
|
| 44 |
+
"Dense",
|
| 45 |
),
|
| 46 |
"Qwen2.5 72B": (
|
| 47 |
"https://huggingface.co/Qwen/Qwen2.5-72B-Instruct",
|
| 48 |
72.7,
|
| 49 |
"General",
|
| 50 |
"V1",
|
| 51 |
+
"Dense",
|
| 52 |
+
),
|
| 53 |
+
"QwQ 32B": (
|
| 54 |
+
"https://huggingface.co/Qwen/QwQ-32B",
|
| 55 |
+
32.8,
|
| 56 |
+
"General",
|
| 57 |
+
"V2",
|
| 58 |
+
"Reasoning",
|
| 59 |
+
),
|
| 60 |
+
"Qwen2.5 32B": (
|
| 61 |
+
"https://huggingface.co/Qwen/Qwen2.5-32B",
|
| 62 |
+
32.5,
|
| 63 |
+
"General",
|
| 64 |
+
"V1",
|
| 65 |
+
"Dense",
|
| 66 |
),
|
|
|
|
|
|
|
| 67 |
"StarChat2 15B v0.1": (
|
| 68 |
"https://huggingface.co/HuggingFaceH4/starchat2-15b-v0.1",
|
| 69 |
16,
|
| 70 |
"General",
|
| 71 |
"V1",
|
| 72 |
+
"Dense",
|
| 73 |
),
|
| 74 |
"DeepSeek R1 Distill Qwen 14B": (
|
| 75 |
"https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B",
|
| 76 |
14.8,
|
| 77 |
"General",
|
| 78 |
"V1",
|
| 79 |
+
"Reasoning",
|
| 80 |
),
|
| 81 |
"CodeLlama 70B": (
|
| 82 |
"https://huggingface.co/codellama/CodeLlama-70b-hf",
|
| 83 |
69,
|
| 84 |
"Coding",
|
| 85 |
"V1",
|
| 86 |
+
"Dense",
|
| 87 |
),
|
| 88 |
"QwenCoder 2.5 32B": (
|
| 89 |
"https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct",
|
| 90 |
32.5,
|
| 91 |
"Coding",
|
| 92 |
"V1",
|
| 93 |
+
"Dense",
|
| 94 |
),
|
| 95 |
"DeepSeek Coder 33B": (
|
| 96 |
"https://huggingface.co/deepseek-ai/deepseek-coder-33b-instruct",
|
| 97 |
33.3,
|
| 98 |
"Coding",
|
| 99 |
"V1",
|
| 100 |
+
"Dense",
|
| 101 |
),
|
| 102 |
"QwenCoder 2.5 14B": (
|
| 103 |
"https://huggingface.co/Qwen/Qwen2.5-Coder-14B-Instruct",
|
| 104 |
14.7,
|
| 105 |
"Coding",
|
| 106 |
"V1",
|
| 107 |
+
"Dense",
|
| 108 |
),
|
| 109 |
"DeepCoder 14B": (
|
| 110 |
"https://huggingface.co/agentica-org/DeepCoder-14B-Preview",
|
| 111 |
14.8,
|
| 112 |
"Coding",
|
| 113 |
"V2",
|
| 114 |
+
"Reasoning",
|
| 115 |
),
|
| 116 |
"OpenCoder 8B": (
|
| 117 |
"https://huggingface.co/infly/OpenCoder-8B-Instruct",
|
| 118 |
7.77,
|
| 119 |
"Coding",
|
| 120 |
"V1",
|
| 121 |
+
"Dense",
|
| 122 |
),
|
| 123 |
"SeedCoder 8B": (
|
| 124 |
"https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Instruct",
|
| 125 |
8.25,
|
| 126 |
"Coding",
|
| 127 |
"V2",
|
| 128 |
+
"Dense",
|
| 129 |
),
|
| 130 |
"SeedCoder 8B Reasoning": (
|
| 131 |
"https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Reasoning-bf16",
|
| 132 |
8.25,
|
| 133 |
"Coding",
|
| 134 |
"V2",
|
| 135 |
+
"Reasoning",
|
| 136 |
),
|
| 137 |
"QwenCoder 2.5 7B": (
|
| 138 |
"https://huggingface.co/Qwen/Qwen2.5-Coder-7B-Instruct",
|
| 139 |
7.61,
|
| 140 |
"Coding",
|
| 141 |
"V1",
|
| 142 |
+
"Dense",
|
| 143 |
),
|
| 144 |
"DeepSeek Coder 6.7B": (
|
| 145 |
"https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct",
|
| 146 |
6.74,
|
| 147 |
"Coding",
|
| 148 |
"V1",
|
| 149 |
+
"Dense",
|
| 150 |
),
|
| 151 |
"HaVen-CodeQwen": (
|
| 152 |
"https://huggingface.co/yangyiyao/HaVen-CodeQwen",
|
| 153 |
7.25,
|
| 154 |
"RTL-Specific",
|
| 155 |
"V1",
|
| 156 |
+
"Dense",
|
| 157 |
),
|
| 158 |
"CodeV R1 Distill Qwen 7B": (
|
| 159 |
"https://huggingface.co/zhuyaoyu/CodeV-R1-Distill-Qwen-7B",
|
| 160 |
7.62,
|
| 161 |
"RTL-Specific",
|
| 162 |
"V2",
|
| 163 |
+
"Reasoning",
|
| 164 |
),
|
| 165 |
"CodeV-CL-7B": (
|
| 166 |
"https://huggingface.co/yang-z/CodeV-CL-7B",
|
| 167 |
6.74,
|
| 168 |
"RTL-Specific",
|
| 169 |
"V1",
|
| 170 |
+
"Dense",
|
| 171 |
),
|
| 172 |
"CodeV-QW-7B": (
|
| 173 |
"https://huggingface.co/yang-z/CodeV-QW-7B",
|
| 174 |
7.25,
|
| 175 |
"RTL-Specific",
|
| 176 |
"V1",
|
| 177 |
+
"Dense",
|
| 178 |
),
|
| 179 |
"CodeV-DS-6.7B": (
|
| 180 |
"https://huggingface.co/yang-z/CodeV-DS-6.7B",
|
| 181 |
6.74,
|
| 182 |
"RTL-Specific",
|
| 183 |
"V1",
|
| 184 |
+
"Dense",
|
| 185 |
),
|
| 186 |
"RTLCoder Mistral": (
|
| 187 |
"https://huggingface.co/ishorn5/RTLCoder-v1.1",
|
| 188 |
7.24,
|
| 189 |
"RTL-Specific",
|
| 190 |
"V1",
|
| 191 |
+
"Dense",
|
| 192 |
),
|
| 193 |
"RTLCoder DeepSeek": (
|
| 194 |
"https://huggingface.co/ishorn5/RTLCoder-Deepseek-v1.1",
|
| 195 |
6.74,
|
| 196 |
"RTL-Specific",
|
| 197 |
"V1",
|
| 198 |
+
"Dense",
|
| 199 |
+
),
|
| 200 |
+
"OriGen": (
|
| 201 |
+
"https://huggingface.co/henryen/OriGen",
|
| 202 |
+
6.74,
|
| 203 |
+
"RTL-Specific",
|
| 204 |
+
"V1",
|
| 205 |
+
"Dense",
|
| 206 |
),
|
|
|
|
| 207 |
"Qwen3 Coder 480B A35B": (
|
| 208 |
"https://huggingface.co/Qwen/Qwen3-Coder-480B-A35B-Instruct",
|
| 209 |
480,
|
| 210 |
"Coding",
|
| 211 |
"V2",
|
| 212 |
+
"Dense",
|
| 213 |
),
|
| 214 |
"Magistral Small 2506": (
|
| 215 |
"https://huggingface.co/mistralai/Magistral-Small-2506",
|
| 216 |
23.6,
|
| 217 |
"General",
|
| 218 |
"V2",
|
| 219 |
+
"Reasoning",
|
| 220 |
),
|
| 221 |
"gpt-oss-20b": (
|
| 222 |
"https://huggingface.co/openai/gpt-oss-20b",
|
| 223 |
21.5,
|
| 224 |
"General",
|
| 225 |
"V2",
|
| 226 |
+
"Reasoning",
|
| 227 |
),
|
| 228 |
"gpt-oss-120b": (
|
| 229 |
"https://huggingface.co/openai/gpt-oss-120b",
|
| 230 |
120,
|
| 231 |
"General",
|
| 232 |
"V2",
|
| 233 |
+
"Reasoning",
|
| 234 |
),
|
| 235 |
}
|
| 236 |
|
|
|
|
| 248 |
return metrics, benchs
|
| 249 |
|
| 250 |
|
| 251 |
+
def get_model_params_and_url(model) -> Union[str, str, float, str, str]:
|
| 252 |
if model not in model_details:
|
| 253 |
+
return "-", 0.0, "-", "-", "-"
|
| 254 |
url = model_details[model][0]
|
| 255 |
params = model_details[model][1]
|
| 256 |
type = model_details[model][2]
|
| 257 |
release = model_details[model][3]
|
| 258 |
+
reasoning = model_details[model][4]
|
| 259 |
+
return url, params, type, release, reasoning
|
| 260 |
|
| 261 |
|
| 262 |
def parse_results(csv_path: str) -> list[dict]:
|
|
|
|
| 275 |
model = row[0]
|
| 276 |
if not model:
|
| 277 |
continue
|
| 278 |
+
url, params, type, release, reasoning = get_model_params_and_url(model)
|
| 279 |
models.append(model)
|
| 280 |
row = row[1:]
|
| 281 |
ctr = 0
|
|
|
|
| 291 |
record["Model URL"] = url
|
| 292 |
record["Params"] = params
|
| 293 |
record["Release"] = release
|
| 294 |
+
record["Thinking"] = reasoning
|
| 295 |
dataset.append(record)
|
| 296 |
ctr += 1
|
| 297 |
print(models)
|
results/results_icarus.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
results/results_verilator.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
utils.py
CHANGED
|
@@ -15,11 +15,20 @@ type_emoji = {
|
|
| 15 |
# fmt: on
|
| 16 |
|
| 17 |
|
| 18 |
-
def model_hyperlink(link, model_name, release):
|
|
|
|
| 19 |
if release == "V1":
|
| 20 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
else:
|
| 22 |
-
return
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
|
| 25 |
def handle_special_cases(benchmark, metric):
|
|
@@ -33,14 +42,18 @@ def handle_special_cases(benchmark, metric):
|
|
| 33 |
def filter_RTLRepo(subset: pd.DataFrame) -> pd.DataFrame:
|
| 34 |
subset = subset.drop(subset[subset.Score < 0.0].index)
|
| 35 |
details = subset[
|
| 36 |
-
["Model", "Model URL", "Model Type", "Params", "Release"]
|
| 37 |
].drop_duplicates("Model")
|
| 38 |
filtered_df = subset[["Model", "Score"]].rename(
|
| 39 |
columns={"Score": "Exact Matching (EM)"}
|
| 40 |
)
|
| 41 |
filtered_df = pd.merge(filtered_df, details, on="Model", how="left")
|
| 42 |
filtered_df["Model"] = filtered_df.apply(
|
| 43 |
-
lambda row: model_hyperlink(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
axis=1,
|
| 45 |
)
|
| 46 |
filtered_df["Type"] = filtered_df["Model Type"].map(lambda x: type_emoji.get(x, ""))
|
|
@@ -53,7 +66,7 @@ def filter_RTLRepo(subset: pd.DataFrame) -> pd.DataFrame:
|
|
| 53 |
|
| 54 |
def filter_bench(subset: pd.DataFrame, df_agg=None, agg_column=None) -> pd.DataFrame:
|
| 55 |
details = subset[
|
| 56 |
-
["Model", "Model URL", "Model Type", "Params", "Release"]
|
| 57 |
].drop_duplicates("Model")
|
| 58 |
if "RTLLM" in subset["Benchmark"].unique():
|
| 59 |
pivot_df = (
|
|
@@ -82,7 +95,9 @@ def filter_bench(subset: pd.DataFrame, df_agg=None, agg_column=None) -> pd.DataF
|
|
| 82 |
|
| 83 |
pivot_df = pd.merge(pivot_df, details, on="Model", how="left")
|
| 84 |
pivot_df["Model"] = pivot_df.apply(
|
| 85 |
-
lambda row: model_hyperlink(
|
|
|
|
|
|
|
| 86 |
axis=1,
|
| 87 |
)
|
| 88 |
pivot_df["Type"] = pivot_df["Model Type"].map(lambda x: type_emoji.get(x, ""))
|
|
@@ -144,7 +159,7 @@ def filter_bench_all(
|
|
| 144 |
subset: pd.DataFrame, df_agg=None, agg_column=None
|
| 145 |
) -> pd.DataFrame:
|
| 146 |
details = subset[
|
| 147 |
-
["Model", "Model URL", "Model Type", "Params", "Release"]
|
| 148 |
].drop_duplicates("Model")
|
| 149 |
if "RTLLM" in subset["Benchmark"].unique():
|
| 150 |
pivot_df = (
|
|
@@ -164,8 +179,11 @@ def filter_bench_all(
|
|
| 164 |
)
|
| 165 |
|
| 166 |
pivot_df = pd.merge(pivot_df, details, on="Model", how="left")
|
|
|
|
| 167 |
pivot_df["Model"] = pivot_df.apply(
|
| 168 |
-
lambda row: model_hyperlink(
|
|
|
|
|
|
|
| 169 |
axis=1,
|
| 170 |
)
|
| 171 |
pivot_df["Type"] = pivot_df["Model Type"].map(lambda x: type_emoji.get(x, ""))
|
|
|
|
| 15 |
# fmt: on
|
| 16 |
|
| 17 |
|
| 18 |
+
def model_hyperlink(link, model_name, release, thinking=False):
|
| 19 |
+
ret = f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
|
| 20 |
if release == "V1":
|
| 21 |
+
return ret
|
| 22 |
+
elif thinking == False:
|
| 23 |
+
return (
|
| 24 |
+
ret
|
| 25 |
+
+ f' <span style="opacity: 0.7; font-variant: all-small-caps; font-weight: 600">new</span>'
|
| 26 |
+
)
|
| 27 |
else:
|
| 28 |
+
return (
|
| 29 |
+
ret
|
| 30 |
+
+ f' <span style="opacity: 0.7; font-variant: all-small-caps; font-weight: 600">new</span> <span style="opacity: 0.9; font-variant: all-small-caps; font-weight: 600; color: #5C6BC0">(reasoning)</span>'
|
| 31 |
+
)
|
| 32 |
|
| 33 |
|
| 34 |
def handle_special_cases(benchmark, metric):
|
|
|
|
| 42 |
def filter_RTLRepo(subset: pd.DataFrame) -> pd.DataFrame:
|
| 43 |
subset = subset.drop(subset[subset.Score < 0.0].index)
|
| 44 |
details = subset[
|
| 45 |
+
["Model", "Model URL", "Model Type", "Params", "Release", "Thinking"]
|
| 46 |
].drop_duplicates("Model")
|
| 47 |
filtered_df = subset[["Model", "Score"]].rename(
|
| 48 |
columns={"Score": "Exact Matching (EM)"}
|
| 49 |
)
|
| 50 |
filtered_df = pd.merge(filtered_df, details, on="Model", how="left")
|
| 51 |
filtered_df["Model"] = filtered_df.apply(
|
| 52 |
+
lambda row: model_hyperlink(
|
| 53 |
+
row["Model URL"],
|
| 54 |
+
row["Model"],
|
| 55 |
+
row["Release"],
|
| 56 |
+
),
|
| 57 |
axis=1,
|
| 58 |
)
|
| 59 |
filtered_df["Type"] = filtered_df["Model Type"].map(lambda x: type_emoji.get(x, ""))
|
|
|
|
| 66 |
|
| 67 |
def filter_bench(subset: pd.DataFrame, df_agg=None, agg_column=None) -> pd.DataFrame:
|
| 68 |
details = subset[
|
| 69 |
+
["Model", "Model URL", "Model Type", "Params", "Release", "Thinking"]
|
| 70 |
].drop_duplicates("Model")
|
| 71 |
if "RTLLM" in subset["Benchmark"].unique():
|
| 72 |
pivot_df = (
|
|
|
|
| 95 |
|
| 96 |
pivot_df = pd.merge(pivot_df, details, on="Model", how="left")
|
| 97 |
pivot_df["Model"] = pivot_df.apply(
|
| 98 |
+
lambda row: model_hyperlink(
|
| 99 |
+
row["Model URL"], row["Model"], row["Release"], row["Thinking"]
|
| 100 |
+
),
|
| 101 |
axis=1,
|
| 102 |
)
|
| 103 |
pivot_df["Type"] = pivot_df["Model Type"].map(lambda x: type_emoji.get(x, ""))
|
|
|
|
| 159 |
subset: pd.DataFrame, df_agg=None, agg_column=None
|
| 160 |
) -> pd.DataFrame:
|
| 161 |
details = subset[
|
| 162 |
+
["Model", "Model URL", "Model Type", "Params", "Release", "Thinking"]
|
| 163 |
].drop_duplicates("Model")
|
| 164 |
if "RTLLM" in subset["Benchmark"].unique():
|
| 165 |
pivot_df = (
|
|
|
|
| 179 |
)
|
| 180 |
|
| 181 |
pivot_df = pd.merge(pivot_df, details, on="Model", how="left")
|
| 182 |
+
print(pivot_df.columns)
|
| 183 |
pivot_df["Model"] = pivot_df.apply(
|
| 184 |
+
lambda row: model_hyperlink(
|
| 185 |
+
row["Model URL"], row["Model"], row["Release"], row["Thinking"]
|
| 186 |
+
),
|
| 187 |
axis=1,
|
| 188 |
)
|
| 189 |
pivot_df["Type"] = pivot_df["Model Type"].map(lambda x: type_emoji.get(x, ""))
|