Spaces:

filbench
/

filbench-leaderboard

Running

Lj Miranda

Add plots in the leaderboard (#5)

e2c374a unverified 7 months ago

4.06 kB

	import gradio as gr
	import pandas as pd
	import plotly.express as px


	def plot_parameter_efficiency(df) -> gr.Plot:
	df = df[["Model", "Average", "# Parameters", "Multilingual"]]
	df = df[df["# Parameters"] != -1]
	fig = px.scatter(
	df,
	x="# Parameters",
	y="Average",
	color="Multilingual",
	hover_name="Model",
	hover_data={"Average": ":.1f", "# Parameters": ":.0f"},
	labels={
	"Average": "FilBench Score",
	"# Parameters": "Number of Parameters (B)",
	},
	width=700,
	height=500, # Makes it square
	)

	# Customize layout
	fig.update_layout(
	# Font sizes
	title_font_size=20,
	legend_title_font_size=16,
	legend_title_text="Model Type",
	legend_font_size=14,
	xaxis_title_font_size=16,
	yaxis_title_font_size=16,
	xaxis_tickfont_size=14,
	yaxis_tickfont_size=14,
	# Square aspect ratio
	autosize=False,
	# Axis limits and grid
	yaxis_range=[0, 100],
	plot_bgcolor="white",
	xaxis_showgrid=True,
	yaxis_showgrid=True,
	xaxis_gridcolor="lightgray",
	yaxis_gridcolor="lightgray",
	# Legend position
	legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01),
	)

	# Marker size and style
	fig.update_traces(
	marker=dict(size=12, line=dict(width=1, color="DarkSlateGrey")),
	selector=dict(mode="markers"),
	)

	return gr.Plot(fig, container=False)


	def plot_cost_efficiency(df) -> gr.Plot:
	MODEL_PRICES = {
	"gpt-4o-2024-08-06": 10,
	"gpt-4o-mini": 0.6,
	"meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8": 0.6,
	"meta-llama/Llama-4-Scout-17B-16E-Instruct": 0.3,
	"meta-llama/Llama-3.1-70B-Instruct": 0.28,
	"meta-llama/Llama-3.1-8B-Instruct": 0.03,
	"Qwen/Qwen2.5-72B-Instruct": 0.39,
	"Qwen/Qwen2.5-7B-Instruct": 0.1,
	"google/gemma-3-27b-it": 0.2,
	"google/gemma-2-27b-it": 0.3,
	"google/gemma-2-9b-it": 0.06,
	"mistralai/Ministral-8B-Instruct-2410": 0.1,
	"mistralai/Mixtral-8x22B-Instruct-v0.1": 1.2,
	"aisingapore/Llama-SEA-LION-v3-70B-IT": 0.28,
	"aisingapore/gemma2-9b-cpt-sea-lionv3-instruct": 0.06,
	"aisingapore/llama3.1-8b-cpt-sea-lionv3-instruct": 0.03,
	}

	df = df[["Model", "Average", "# Parameters", "Multilingual"]]

	price_df = (
	pd.DataFrame([MODEL_PRICES])
	.T.reset_index()
	.rename(columns={"index": "Model", 0: "Price-per-token"})
	)
	df = price_df.merge(df, on="Model", how="left")
	# df = df[df["# Parameters"] <= 399]
	fig = px.scatter(
	df,
	x="Price-per-token",
	y="Average",
	color="Multilingual",
	hover_name="Model",
	hover_data={"Price-per-token": ":.1f", "# Parameters": ":.0f"},
	labels={
	"Average": "FilBench Score",
	"Price-per-token": "Price-per-token ($/1M output tokens), log scale",
	},
	width=700,
	height=500, # Makes it square
	log_x=True,
	)

	# Customize layout
	fig.update_layout(
	# Font sizes
	title_font_size=20,
	legend_title_font_size=16,
	legend_title_text="Model Type",
	legend_font_size=14,
	xaxis_title_font_size=16,
	yaxis_title_font_size=16,
	xaxis_tickfont_size=14,
	yaxis_tickfont_size=14,
	# Square aspect ratio
	autosize=False,
	# Axis limits and grid
	yaxis_range=[0, 100],
	plot_bgcolor="white",
	xaxis_showgrid=True,
	yaxis_showgrid=True,
	xaxis_gridcolor="lightgray",
	yaxis_gridcolor="lightgray",
	# Legend position
	legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01),
	)

	# Marker size and style
	fig.update_traces(
	marker=dict(size=12, line=dict(width=1, color="DarkSlateGrey")),
	selector=dict(mode="markers"),
	)

	return gr.Plot(fig, container=False)