Spaces:
Sleeping
Sleeping
| from state import Model | |
| # https://huggingface.co/google/gemma-3-270m/blob/main/config.json | |
| GEMMA3_270M = Model( | |
| vocab_size=262144, | |
| num_layers=18, | |
| hidden_dim=640, | |
| intermediate_size=2048, | |
| weight_tied_embeddings=True, | |
| active_experts=1, | |
| total_experts=1, | |
| is_moe=False, | |
| ) | |
| GEMMA3_1B = Model( | |
| vocab_size=262144, | |
| num_layers=26, | |
| hidden_dim=1152, | |
| intermediate_size=6912, | |
| weight_tied_embeddings=True, | |
| active_experts=1, | |
| total_experts=1, | |
| is_moe=False, | |
| ) | |
| GEMMA3_4B = Model( | |
| vocab_size=262144, | |
| num_layers=34, | |
| hidden_dim=2560, | |
| intermediate_size=10240, | |
| weight_tied_embeddings=True, | |
| active_experts=1, | |
| total_experts=1, | |
| is_moe=False, | |
| ) | |
| GEMMA3_12B = Model( | |
| vocab_size=262144, | |
| num_layers=48, | |
| hidden_dim=3840, | |
| intermediate_size=15360, | |
| weight_tied_embeddings=True, | |
| active_experts=1, | |
| total_experts=1, | |
| is_moe=False, | |
| ) | |
| GEMMA3_27B = Model( | |
| vocab_size=262144, | |
| num_layers=62, | |
| hidden_dim=5376, | |
| intermediate_size=21504, | |
| weight_tied_embeddings=True, | |
| active_experts=1, | |
| total_experts=1, | |
| is_moe=False, | |
| ) | |
| # No maverick, don't support non-homogenous layers yet | |
| # https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct/blob/main/config.json | |
| LLAMA4_SCOUT = Model( | |
| vocab_size=202048, | |
| num_layers=48, | |
| hidden_dim=5120, | |
| intermediate_size=8192, | |
| weight_tied_embeddings=True, | |
| active_experts=2, | |
| total_experts=17, | |
| is_moe=True, | |
| ) | |
| # https://huggingface.co/unsloth/Llama-3.2-1B-Instruct/blob/main/config.json | |
| LLAMA3_1B = Model( | |
| vocab_size=128256, | |
| num_layers=16, | |
| hidden_dim=2048, | |
| intermediate_size=8192, | |
| weight_tied_embeddings=True, | |
| active_experts=1, | |
| total_experts=1, | |
| is_moe=False, | |
| ) | |
| # https://huggingface.co/unsloth/Llama-3.2-3B-Instruct/blob/main/config.json | |
| LLAMA3_3B = Model( | |
| vocab_size=128256, | |
| num_layers=28, | |
| hidden_dim=3072, | |
| intermediate_size=8192, | |
| weight_tied_embeddings=True, | |
| active_experts=1, | |
| total_experts=1, | |
| is_moe=False, | |
| ) | |
| # https://huggingface.co/unsloth/llama-3-8b-Instruct/blob/main/config.json | |
| LLAMA3_8B = Model( | |
| vocab_size=128256, | |
| num_layers=32, | |
| hidden_dim=4096, | |
| intermediate_size=14336, | |
| weight_tied_embeddings=True, | |
| active_experts=1, | |
| total_experts=1, | |
| is_moe=False, | |
| ) | |
| # https://huggingface.co/unsloth/Llama-3.3-70B-Instruct/blob/main/config.json | |
| LLAMA3_70B = Model( | |
| vocab_size=128256, | |
| num_layers=80, | |
| hidden_dim=8192, | |
| intermediate_size=28672, | |
| weight_tied_embeddings=True, | |
| active_experts=1, | |
| total_experts=1, | |
| is_moe=False, | |
| ) | |
| DEFAULTS = { | |
| "Gemma3 270M": GEMMA3_270M, | |
| "Gemma3 1B": GEMMA3_1B, | |
| "Gemma3 4B": GEMMA3_4B, | |
| "Gemma3 12B": GEMMA3_12B, | |
| "Gemma3 27B": GEMMA3_27B, | |
| "Llama3 1B": LLAMA3_1B, | |
| "Llama3 3B": LLAMA3_3B, | |
| "Llama3 8B": LLAMA3_8B, | |
| "Llama3 70B": LLAMA3_70B, | |
| "Llama4 Scout": LLAMA4_SCOUT, | |
| } | |