rubenaghayan commited on
Commit
84f0b80
·
0 Parent(s):

calculator skeleton

Browse files
__init__.py ADDED
File without changes
__pycache__/defaults.cpython-311.pyc ADDED
Binary file (805 Bytes). View file
 
__pycache__/state.cpython-311.pyc ADDED
Binary file (639 Bytes). View file
 
app.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from defaults import DEFAULTS
3
+
4
+ def greet(name, intensity)->str:
5
+ return "Hello, " + name + "!" * int(intensity)
6
+
7
+ def create_parallelism_block():
8
+ with gr.Column():
9
+ gr.Markdown("# Parallelism Parameters")
10
+ tp = gr.Number(label="Tensor Parallelism", value=1, interactive=True)
11
+ pp = gr.Number(label="Pipeline Parallelism", value=1, interactive=True)
12
+ cp = gr.Number(label="Context Parallelism", value=1, interactive=True)
13
+ ep = gr.Number(label="Expert Parallelism", value=1, interactive=True)
14
+ return tp, pp, cp, ep
15
+
16
+ def create_model_block():
17
+ with gr.Column():
18
+ gr.Markdown("# Model Parameters")
19
+ layers = gr.Number(label="Number of Layers", value=32, interactive=True)
20
+ vocab = gr.Number(label="Vocab Size", value=32000, interactive=True)
21
+ hidden = gr.Number(label="Hidden Dim", value=4096, interactive=True)
22
+ intermediate = gr.Number(label="Intermediate Dim", value=11008, interactive=True)
23
+ presets = gr.Dropdown(list(DEFAULTS.keys()), label="Presets", interactive=True)
24
+ return layers, vocab, hidden, intermediate, presets
25
+
26
+ def create_training_block():
27
+ with gr.Column():
28
+ gr.Markdown('# Training Parameters')
29
+ seq_len = gr.Number(label="Sequence Length", value=8192, interactive=True)
30
+ batch_size = gr.Number(label="Batch Size", value=8, interactive=True)
31
+ return seq_len, batch_size
32
+
33
+ def calculate(*args)->int:
34
+ out = 1
35
+ for arg in args:
36
+ out *= arg
37
+ return arg
38
+
39
+
40
+
41
+ with gr.Blocks() as demo:
42
+ with gr.Column():
43
+ with gr.Row():
44
+ tp, pp, cp, ep = create_parallelism_block()
45
+ layers, vocab, hidden, intermediate, presets = create_model_block()
46
+ seq_len, batch_size = create_training_block()
47
+ calculate_button = gr.Button("Calculate")
48
+ output = gr.Number(label="Output")
49
+
50
+ calculate_button.click(fn=calculate, inputs=[tp,pp,cp,ep],outputs=output)
51
+
52
+
53
+ demo.launch()
defaults.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from state import ModelState
2
+
3
+ GEMMA3_270M = ModelState(vocab_size=256000, num_layers=9, hidden_size=1152, intermediate_size=4608)
4
+ GEMMA3_1B = ModelState(vocab_size=262208, num_layers=26, hidden_size=2304, intermediate_size=9216)
5
+ GEMMA3_4B = ModelState(vocab_size=262208, num_layers=28, hidden_size=3072, intermediate_size=12288)
6
+ GEMMA3_12B = ModelState(vocab_size=262208, num_layers=42, hidden_size=4608, intermediate_size=18432)
7
+ GEMMA3_27B = ModelState(vocab_size=262208, num_layers=46, hidden_size=6144, intermediate_size=24576)
8
+
9
+ DEFAULTS = {
10
+ "Gemma3 270M": GEMMA3_270M,
11
+ "Gemma3 1B": GEMMA3_1B,
12
+ "Gemma3 4B": GEMMA3_4B,
13
+ "Gemma3 12B": GEMMA3_12B,
14
+ "Gemma3 27B": GEMMA3_27B
15
+ }
16
+
state.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+
3
+ @dataclass
4
+ class ModelState:
5
+ vocab_size: int
6
+ num_layers: int
7
+ hidden_size: int
8
+ intermediate_size: int