File size: 1,100 Bytes
97e312a
84f0b80
97e312a
 
 
b79954f
97e312a
 
 
b79954f
97e312a
 
 
b79954f
97e312a
 
 
b79954f
97e312a
 
 
b79954f
84f0b80
 
 
 
 
 
b79954f
84f0b80
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
from state import Model

GEMMA3_270M = Model(
    vocab_size=256000, num_layers=9, hidden_dim=1152, intermediate_size=4608,
    weight_tied_embeddings=True, active_experts=2, total_experts=8, is_moe=False
)
GEMMA3_1B = Model(
    vocab_size=262208, num_layers=26, hidden_dim=2304, intermediate_size=9216,
    weight_tied_embeddings=True, active_experts=2, total_experts=8, is_moe=False
)
GEMMA3_4B = Model(
    vocab_size=262208, num_layers=28, hidden_dim=3072, intermediate_size=12288,
    weight_tied_embeddings=True, active_experts=2, total_experts=8, is_moe=False
)
GEMMA3_12B = Model(
    vocab_size=262208, num_layers=42, hidden_dim=4608, intermediate_size=18432,
    weight_tied_embeddings=True, active_experts=2, total_experts=8, is_moe=False
)
GEMMA3_27B = Model(
    vocab_size=262208, num_layers=46, hidden_dim=6144, intermediate_size=24576,
    weight_tied_embeddings=True, active_experts=2, total_experts=8, is_moe=False
)

DEFAULTS = {
    "Gemma3 270M": GEMMA3_270M,
    "Gemma3 1B": GEMMA3_1B,
    "Gemma3 4B": GEMMA3_4B,
    "Gemma3 12B": GEMMA3_12B,
    "Gemma3 27B": GEMMA3_27B,
}