Spaces:

rubenaghayan
/

llm_memory_visualizer

Sleeping

App Files Files Community

llm_memory_visualizer / defaults.py

rubenaghayan

fsdp + bugfixes

f45427d 2 months ago

raw

history blame contribute delete

2.93 kB

	from state import Model

	# https://huggingface.co/google/gemma-3-270m/blob/main/config.json
	GEMMA3_270M = Model(
	vocab_size=262144,
	num_layers=18,
	hidden_dim=640,
	intermediate_size=2048,
	weight_tied_embeddings=True,
	active_experts=1,
	total_experts=1,
	is_moe=False,
	)
	GEMMA3_1B = Model(
	vocab_size=262144,
	num_layers=26,
	hidden_dim=1152,
	intermediate_size=6912,
	weight_tied_embeddings=True,
	active_experts=1,
	total_experts=1,
	is_moe=False,
	)
	GEMMA3_4B = Model(
	vocab_size=262144,
	num_layers=34,
	hidden_dim=2560,
	intermediate_size=10240,
	weight_tied_embeddings=True,
	active_experts=1,
	total_experts=1,
	is_moe=False,
	)
	GEMMA3_12B = Model(
	vocab_size=262144,
	num_layers=48,
	hidden_dim=3840,
	intermediate_size=15360,
	weight_tied_embeddings=True,
	active_experts=1,
	total_experts=1,
	is_moe=False,
	)
	GEMMA3_27B = Model(
	vocab_size=262144,
	num_layers=62,
	hidden_dim=5376,
	intermediate_size=21504,
	weight_tied_embeddings=True,
	active_experts=1,
	total_experts=1,
	is_moe=False,
	)
	# No maverick, don't support non-homogenous layers yet

	# https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct/blob/main/config.json
	LLAMA4_SCOUT = Model(
	vocab_size=202048,
	num_layers=48,
	hidden_dim=5120,
	intermediate_size=8192,
	weight_tied_embeddings=True,
	active_experts=2,
	total_experts=17,
	is_moe=True,
	)

	# https://huggingface.co/unsloth/Llama-3.2-1B-Instruct/blob/main/config.json
	LLAMA3_1B = Model(
	vocab_size=128256,
	num_layers=16,
	hidden_dim=2048,
	intermediate_size=8192,
	weight_tied_embeddings=True,
	active_experts=1,
	total_experts=1,
	is_moe=False,
	)

	# https://huggingface.co/unsloth/Llama-3.2-3B-Instruct/blob/main/config.json
	LLAMA3_3B = Model(
	vocab_size=128256,
	num_layers=28,
	hidden_dim=3072,
	intermediate_size=8192,
	weight_tied_embeddings=True,
	active_experts=1,
	total_experts=1,
	is_moe=False,
	)

	# https://huggingface.co/unsloth/llama-3-8b-Instruct/blob/main/config.json
	LLAMA3_8B = Model(
	vocab_size=128256,
	num_layers=32,
	hidden_dim=4096,
	intermediate_size=14336,
	weight_tied_embeddings=True,
	active_experts=1,
	total_experts=1,
	is_moe=False,
	)

	# https://huggingface.co/unsloth/Llama-3.3-70B-Instruct/blob/main/config.json
	LLAMA3_70B = Model(
	vocab_size=128256,
	num_layers=80,
	hidden_dim=8192,
	intermediate_size=28672,
	weight_tied_embeddings=True,
	active_experts=1,
	total_experts=1,
	is_moe=False,
	)

	DEFAULTS = {
	"Gemma3 270M": GEMMA3_270M,
	"Gemma3 1B": GEMMA3_1B,
	"Gemma3 4B": GEMMA3_4B,
	"Gemma3 12B": GEMMA3_12B,
	"Gemma3 27B": GEMMA3_27B,
	"Llama3 1B": LLAMA3_1B,
	"Llama3 3B": LLAMA3_3B,
	"Llama3 8B": LLAMA3_8B,
	"Llama3 70B": LLAMA3_70B,
	"Llama4 Scout": LLAMA4_SCOUT,
	}