guychuk commited on
Commit
425d406
Β·
verified Β·
1 Parent(s): cde4f1b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -44
app.py CHANGED
@@ -3,7 +3,8 @@ import numpy as np
3
  import onnxruntime as ort
4
  from PIL import Image
5
  from huggingface_hub import hf_hub_download
6
- from transformers import AutoTokenizer, AutoConfig
 
7
 
8
 
9
  # ---------------------------------------------------------
@@ -15,32 +16,28 @@ def softmax(x):
15
 
16
 
17
  def greedy_decode_onnx(session, tokenizer, prompt, max_new_tokens=64):
18
- """
19
- Minimal greedy decoding loop for decoder-only ONNX models that:
20
- - Take input_ids
21
- - Return logits for the last position
22
- """
23
- # Encode prompt
24
  ids = tokenizer(prompt, return_tensors="np")["input_ids"].astype(np.int64)
25
 
26
  for _ in range(max_new_tokens):
27
  ort_inputs = {"input_ids": ids}
28
- logits = session.run(None, ort_inputs)[0] # shape: [batch, seq, vocab]
29
- next_token_logits = logits[:, -1, :] # last position
30
  next_token = int(np.argmax(next_token_logits, axis=-1)[0])
31
  ids = np.concatenate([ids, [[next_token]]], axis=1)
32
- if next_token in tokenizer.eos_token_id or next_token == tokenizer.eos_token_id:
33
  break
34
 
35
  return tokenizer.decode(ids[0], skip_special_tokens=True)
36
 
37
 
 
38
  # ---------------------------------------------------------
39
  # Load ONNX models + tokenizers + configs
40
  # ---------------------------------------------------------
41
 
42
  # --- Model 1: Multilingual DistilBERT ---
43
- multilingual_onnx_path = hf_hub_download(
44
  repo_id="lxyuan/distilbert-base-multilingual-cased-sentiments-student",
45
  filename="onnx/model.onnx"
46
  )
@@ -51,59 +48,81 @@ config_multilingual = AutoConfig.from_pretrained(
51
  "lxyuan/distilbert-base-multilingual-cased-sentiments-student"
52
  )
53
  labels_multilingual = config_multilingual.id2label
54
- session_multilingual = ort.InferenceSession(multilingual_onnx_path, providers=["CPUExecutionProvider"])
55
 
56
 
57
  # --- Model 2: SDG-BERT ---
58
- sdg_onnx_path = hf_hub_download(
59
- repo_id="sadickam/sdgBERT",
60
- filename="onnx/model.onnx"
61
- )
62
  tokenizer_sdg = AutoTokenizer.from_pretrained("sadickam/sdgBERT")
63
  config_sdg = AutoConfig.from_pretrained("sadickam/sdgBERT")
64
  labels_sdg = config_sdg.id2label
65
- session_sdg = ort.InferenceSession(sdg_onnx_path, providers=["CPUExecutionProvider"])
66
 
67
 
68
  # --- Model 3: German Sentiment ---
69
- german_onnx_path = hf_hub_download(
70
- repo_id="oliverguhr/german-sentiment-bert",
71
- filename="onnx/model.onnx"
72
- )
73
  tokenizer_german = AutoTokenizer.from_pretrained("oliverguhr/german-sentiment-bert")
74
  config_german = AutoConfig.from_pretrained("oliverguhr/german-sentiment-bert")
75
  labels_german = config_german.id2label
76
- session_german = ort.InferenceSession(german_onnx_path, providers=["CPUExecutionProvider"])
77
 
78
 
79
  # --- Model 4: ViT Image Classifier ---
80
- vit_onnx_path = hf_hub_download(
81
- repo_id="WinKawaks/vit-small-patch16-224",
82
- filename="onnx/model.onnx"
83
- )
84
- session_vit = ort.InferenceSession(vit_onnx_path, providers=["CPUExecutionProvider"])
85
 
86
  IMAGE_SIZE = 224
87
  MEAN = [0.485, 0.456, 0.406]
88
  STD = [0.229, 0.224, 0.225]
89
 
90
 
91
- # --- Model 5: DeepSeek Coder (PR #8) ---
92
- ds_onnx_path = hf_hub_download(
93
- repo_id="deepseek-ai/deepseek-coder-1.3b-base",
94
- filename="model.onnx", # you said this exists ― so we trust you :)
95
- revision="refs/pr/8"
96
- )
97
  tokenizer_ds = AutoTokenizer.from_pretrained(
98
- "deepseek-ai/deepseek-coder-1.3b-base",
99
- revision="refs/pr/8"
100
  )
101
- session_ds = ort.InferenceSession(ds_onnx_path, providers=["CPUExecutionProvider"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
 
103
 
104
  # ---------------------------------------------------------
105
  # Inference functions for classification models
106
  # ---------------------------------------------------------
 
107
  def run_multilingual(text):
108
  inputs = tokenizer_multilingual(text, return_tensors="np", truncation=True, padding=True)
109
  inputs = {k: v.astype(np.int64) for k, v in inputs.items()}
@@ -145,57 +164,65 @@ def run_vit(image):
145
  return {f"class_{i}": float(probs[i]) for i in top5}
146
 
147
 
148
- def run_deepseek(prompt):
149
- return greedy_decode_onnx(session_ds, tokenizer_ds, prompt, max_new_tokens=64)
150
-
151
 
152
  # ---------------------------------------------------------
153
  # Unified model router
154
  # ---------------------------------------------------------
 
155
  def inference(model_name, text, image):
156
  if model_name == "Multilingual Sentiment":
157
  return run_multilingual(text)
 
158
  elif model_name == "SDG Classification":
159
  return run_sdg(text)
 
160
  elif model_name == "German Sentiment":
161
  return run_german(text)
 
162
  elif model_name == "ViT Image Classification":
163
  if image is None:
164
  return {"error": "Please upload an image."}
165
  return run_vit(image)
 
166
  elif model_name == "DeepSeek Coder":
167
  return {"generated_text": run_deepseek(text)}
 
168
  else:
169
  return {"error": "Invalid model selected."}
170
 
171
 
 
172
  # ---------------------------------------------------------
173
  # Gradio UI
174
  # ---------------------------------------------------------
 
175
  with gr.Blocks() as demo:
176
- gr.Markdown("# πŸ” Multi-Model ONNX Inference Demo")
177
- gr.Markdown("All models downloaded directly from the Hugging Face Hub via `hf_hub_download`.")
178
 
179
  model_selector = gr.Dropdown(
180
  [
181
  "Multilingual Sentiment",
182
  "SDG Classification",
183
  "German Sentiment",
184
- "ViT Image Classification",
185
  "DeepSeek Coder"
186
  ],
187
  label="Choose a Model"
188
  )
189
 
190
  text_input = gr.Textbox(lines=3, label="Text Prompt / Input")
191
- image_input = gr.Image(type="pil", label="Image Input (for ViT)", visible=True)
192
  output_box = gr.JSON(label="Output")
193
 
194
  run_button = gr.Button("Run")
195
 
196
  run_button.click(
197
  inference,
198
- inputs=[model_selector, text_input, image_input],
 
 
 
 
199
  outputs=output_box
200
  )
201
 
 
3
  import onnxruntime as ort
4
  from PIL import Image
5
  from huggingface_hub import hf_hub_download
6
+ from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM
7
+ import torch
8
 
9
 
10
  # ---------------------------------------------------------
 
16
 
17
 
18
  def greedy_decode_onnx(session, tokenizer, prompt, max_new_tokens=64):
19
+ """Greedy decoding loop for ONNX decoder-only models."""
 
 
 
 
 
20
  ids = tokenizer(prompt, return_tensors="np")["input_ids"].astype(np.int64)
21
 
22
  for _ in range(max_new_tokens):
23
  ort_inputs = {"input_ids": ids}
24
+ logits = session.run(None, ort_inputs)[0]
25
+ next_token_logits = logits[:, -1, :]
26
  next_token = int(np.argmax(next_token_logits, axis=-1)[0])
27
  ids = np.concatenate([ids, [[next_token]]], axis=1)
28
+ if next_token == tokenizer.eos_token_id:
29
  break
30
 
31
  return tokenizer.decode(ids[0], skip_special_tokens=True)
32
 
33
 
34
+
35
  # ---------------------------------------------------------
36
  # Load ONNX models + tokenizers + configs
37
  # ---------------------------------------------------------
38
 
39
  # --- Model 1: Multilingual DistilBERT ---
40
+ m_onx = hf_hub_download(
41
  repo_id="lxyuan/distilbert-base-multilingual-cased-sentiments-student",
42
  filename="onnx/model.onnx"
43
  )
 
48
  "lxyuan/distilbert-base-multilingual-cased-sentiments-student"
49
  )
50
  labels_multilingual = config_multilingual.id2label
51
+ session_multilingual = ort.InferenceSession(m_onx, providers=["CPUExecutionProvider"])
52
 
53
 
54
  # --- Model 2: SDG-BERT ---
55
+ sdg_onx = hf_hub_download("sadickam/sdgBERT", "onnx/model.onnx")
 
 
 
56
  tokenizer_sdg = AutoTokenizer.from_pretrained("sadickam/sdgBERT")
57
  config_sdg = AutoConfig.from_pretrained("sadickam/sdgBERT")
58
  labels_sdg = config_sdg.id2label
59
+ session_sdg = ort.InferenceSession(sdg_onx, providers=["CPUExecutionProvider"])
60
 
61
 
62
  # --- Model 3: German Sentiment ---
63
+ g_onx = hf_hub_download("oliverguhr/german-sentiment-bert", "onnx/model.onnx")
 
 
 
64
  tokenizer_german = AutoTokenizer.from_pretrained("oliverguhr/german-sentiment-bert")
65
  config_german = AutoConfig.from_pretrained("oliverguhr/german-sentiment-bert")
66
  labels_german = config_german.id2label
67
+ session_german = ort.InferenceSession(g_onx, providers=["CPUExecutionProvider"])
68
 
69
 
70
  # --- Model 4: ViT Image Classifier ---
71
+ vit_onx = hf_hub_download("WinKawaks/vit-small-patch16-224", "onnx/model.onnx")
72
+ session_vit = ort.InferenceSession(vit_onx, providers=["CPUExecutionProvider"])
 
 
 
73
 
74
  IMAGE_SIZE = 224
75
  MEAN = [0.485, 0.456, 0.406]
76
  STD = [0.229, 0.224, 0.225]
77
 
78
 
79
+ # ---------------------------------------------------------
80
+ # NEW: Model 5 β€” DeepSeek Coder from HF Hub (NOT ONNX)
81
+ # ---------------------------------------------------------
82
+
83
+ DS_REPO = "guychuk/dpsk-exmpl"
84
+
85
  tokenizer_ds = AutoTokenizer.from_pretrained(
86
+ DS_REPO,
87
+ trust_remote_code=True
88
  )
89
+
90
+ model_ds = AutoModelForCausalLM.from_pretrained(
91
+ DS_REPO,
92
+ device_map="auto",
93
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
94
+ low_cpu_mem_usage=True,
95
+ trust_remote_code=True
96
+ )
97
+
98
+ model_ds.eval()
99
+
100
+
101
+ def run_deepseek(prompt, max_new_tokens=128, temperature=0.7, top_p=0.9):
102
+ """Generate using PyTorch (DeepSeek HF model)."""
103
+ if not prompt.strip():
104
+ return "Please enter a prompt."
105
+
106
+ inputs = tokenizer_ds(prompt, return_tensors="pt").to(model_ds.device)
107
+
108
+ with torch.no_grad():
109
+ out = model_ds.generate(
110
+ **inputs,
111
+ max_new_tokens=max_new_tokens,
112
+ do_sample=True,
113
+ temperature=temperature,
114
+ top_p=top_p,
115
+ pad_token_id=tokenizer_ds.eos_token_id
116
+ )
117
+
118
+ return tokenizer_ds.decode(out[0], skip_special_tokens=True)
119
+
120
 
121
 
122
  # ---------------------------------------------------------
123
  # Inference functions for classification models
124
  # ---------------------------------------------------------
125
+
126
  def run_multilingual(text):
127
  inputs = tokenizer_multilingual(text, return_tensors="np", truncation=True, padding=True)
128
  inputs = {k: v.astype(np.int64) for k, v in inputs.items()}
 
164
  return {f"class_{i}": float(probs[i]) for i in top5}
165
 
166
 
 
 
 
167
 
168
  # ---------------------------------------------------------
169
  # Unified model router
170
  # ---------------------------------------------------------
171
+
172
  def inference(model_name, text, image):
173
  if model_name == "Multilingual Sentiment":
174
  return run_multilingual(text)
175
+
176
  elif model_name == "SDG Classification":
177
  return run_sdg(text)
178
+
179
  elif model_name == "German Sentiment":
180
  return run_german(text)
181
+
182
  elif model_name == "ViT Image Classification":
183
  if image is None:
184
  return {"error": "Please upload an image."}
185
  return run_vit(image)
186
+
187
  elif model_name == "DeepSeek Coder":
188
  return {"generated_text": run_deepseek(text)}
189
+
190
  else:
191
  return {"error": "Invalid model selected."}
192
 
193
 
194
+
195
  # ---------------------------------------------------------
196
  # Gradio UI
197
  # ---------------------------------------------------------
198
+
199
  with gr.Blocks() as demo:
200
+ gr.Markdown("# πŸ” Multi-Model Inference Demo (ONNX + DeepSeek Coder)")
 
201
 
202
  model_selector = gr.Dropdown(
203
  [
204
  "Multilingual Sentiment",
205
  "SDG Classification",
206
  "German Sentiment",
207
+ # "ViT Image Classification",
208
  "DeepSeek Coder"
209
  ],
210
  label="Choose a Model"
211
  )
212
 
213
  text_input = gr.Textbox(lines=3, label="Text Prompt / Input")
214
+ # image_input = gr.Image(type="pil", label="Image Input (ViT only)")
215
  output_box = gr.JSON(label="Output")
216
 
217
  run_button = gr.Button("Run")
218
 
219
  run_button.click(
220
  inference,
221
+ inputs=[
222
+ model_selector,
223
+ text_input,
224
+ # image_input
225
+ ],
226
  outputs=output_box
227
  )
228