Spaces:
Sleeping
Sleeping
File size: 1,100 Bytes
97e312a 84f0b80 97e312a b79954f 97e312a b79954f 97e312a b79954f 97e312a b79954f 97e312a b79954f 84f0b80 b79954f 84f0b80 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
from state import Model
GEMMA3_270M = Model(
vocab_size=256000, num_layers=9, hidden_dim=1152, intermediate_size=4608,
weight_tied_embeddings=True, active_experts=2, total_experts=8, is_moe=False
)
GEMMA3_1B = Model(
vocab_size=262208, num_layers=26, hidden_dim=2304, intermediate_size=9216,
weight_tied_embeddings=True, active_experts=2, total_experts=8, is_moe=False
)
GEMMA3_4B = Model(
vocab_size=262208, num_layers=28, hidden_dim=3072, intermediate_size=12288,
weight_tied_embeddings=True, active_experts=2, total_experts=8, is_moe=False
)
GEMMA3_12B = Model(
vocab_size=262208, num_layers=42, hidden_dim=4608, intermediate_size=18432,
weight_tied_embeddings=True, active_experts=2, total_experts=8, is_moe=False
)
GEMMA3_27B = Model(
vocab_size=262208, num_layers=46, hidden_dim=6144, intermediate_size=24576,
weight_tied_embeddings=True, active_experts=2, total_experts=8, is_moe=False
)
DEFAULTS = {
"Gemma3 270M": GEMMA3_270M,
"Gemma3 1B": GEMMA3_1B,
"Gemma3 4B": GEMMA3_4B,
"Gemma3 12B": GEMMA3_12B,
"Gemma3 27B": GEMMA3_27B,
}
|