Spaces:

OzLabs
/

pr-demo

Runtime error

App Files Files Community

guychuk commited on 19 days ago

Commit

425d406

verified ·

1 Parent(s): cde4f1b

Update app.py

Browse files

Files changed (1) hide show

app.py +71 -44

app.py CHANGED Viewed

@@ -3,7 +3,8 @@ import numpy as np
 import onnxruntime as ort
 from PIL import Image
 from huggingface_hub import hf_hub_download
-from transformers import AutoTokenizer, AutoConfig
 # ---------------------------------------------------------
@@ -15,32 +16,28 @@ def softmax(x):
 def greedy_decode_onnx(session, tokenizer, prompt, max_new_tokens=64):
-    """
-    Minimal greedy decoding loop for decoder-only ONNX models that:
-    - Take input_ids
-    - Return logits for the last position
-    """
-    # Encode prompt
     ids = tokenizer(prompt, return_tensors="np")["input_ids"].astype(np.int64)
     for _ in range(max_new_tokens):
         ort_inputs = {"input_ids": ids}
-        logits = session.run(None, ort_inputs)[0]   # shape: [batch, seq, vocab]
-        next_token_logits = logits[:, -1, :]        # last position
         next_token = int(np.argmax(next_token_logits, axis=-1)[0])
         ids = np.concatenate([ids, [[next_token]]], axis=1)
-        if next_token in tokenizer.eos_token_id or next_token == tokenizer.eos_token_id:
             break
     return tokenizer.decode(ids[0], skip_special_tokens=True)
 # ---------------------------------------------------------
 # Load ONNX models + tokenizers + configs
 # ---------------------------------------------------------
 # --- Model 1: Multilingual DistilBERT ---
-multilingual_onnx_path = hf_hub_download(
     repo_id="lxyuan/distilbert-base-multilingual-cased-sentiments-student",
     filename="onnx/model.onnx"
 )
@@ -51,59 +48,81 @@ config_multilingual = AutoConfig.from_pretrained(
     "lxyuan/distilbert-base-multilingual-cased-sentiments-student"
 )
 labels_multilingual = config_multilingual.id2label
-session_multilingual = ort.InferenceSession(multilingual_onnx_path, providers=["CPUExecutionProvider"])
 # --- Model 2: SDG-BERT ---
-sdg_onnx_path = hf_hub_download(
-    repo_id="sadickam/sdgBERT",
-    filename="onnx/model.onnx"
-)
 tokenizer_sdg = AutoTokenizer.from_pretrained("sadickam/sdgBERT")
 config_sdg = AutoConfig.from_pretrained("sadickam/sdgBERT")
 labels_sdg = config_sdg.id2label
-session_sdg = ort.InferenceSession(sdg_onnx_path, providers=["CPUExecutionProvider"])
 # --- Model 3: German Sentiment ---
-german_onnx_path = hf_hub_download(
-    repo_id="oliverguhr/german-sentiment-bert",
-    filename="onnx/model.onnx"
-)
 tokenizer_german = AutoTokenizer.from_pretrained("oliverguhr/german-sentiment-bert")
 config_german = AutoConfig.from_pretrained("oliverguhr/german-sentiment-bert")
 labels_german = config_german.id2label
-session_german = ort.InferenceSession(german_onnx_path, providers=["CPUExecutionProvider"])
 # --- Model 4: ViT Image Classifier ---
-vit_onnx_path = hf_hub_download(
-    repo_id="WinKawaks/vit-small-patch16-224",
-    filename="onnx/model.onnx"
-)
-session_vit = ort.InferenceSession(vit_onnx_path, providers=["CPUExecutionProvider"])
 IMAGE_SIZE = 224
 MEAN = [0.485, 0.456, 0.406]
 STD = [0.229, 0.224, 0.225]
-# --- Model 5: DeepSeek Coder (PR #8) ---
-ds_onnx_path = hf_hub_download(
-    repo_id="deepseek-ai/deepseek-coder-1.3b-base",
-    filename="model.onnx",            # you said this exists ― so we trust you :)
-    revision="refs/pr/8"
-)
 tokenizer_ds = AutoTokenizer.from_pretrained(
-    "deepseek-ai/deepseek-coder-1.3b-base",
-    revision="refs/pr/8"
 )
-session_ds = ort.InferenceSession(ds_onnx_path, providers=["CPUExecutionProvider"])
 # ---------------------------------------------------------
 # Inference functions for classification models
 # ---------------------------------------------------------
 def run_multilingual(text):
     inputs = tokenizer_multilingual(text, return_tensors="np", truncation=True, padding=True)
     inputs = {k: v.astype(np.int64) for k, v in inputs.items()}
@@ -145,57 +164,65 @@ def run_vit(image):
     return {f"class_{i}": float(probs[i]) for i in top5}
-def run_deepseek(prompt):
-    return greedy_decode_onnx(session_ds, tokenizer_ds, prompt, max_new_tokens=64)
 # ---------------------------------------------------------
 # Unified model router
 # ---------------------------------------------------------
 def inference(model_name, text, image):
     if model_name == "Multilingual Sentiment":
         return run_multilingual(text)
     elif model_name == "SDG Classification":
         return run_sdg(text)
     elif model_name == "German Sentiment":
         return run_german(text)
     elif model_name == "ViT Image Classification":
         if image is None:
             return {"error": "Please upload an image."}
         return run_vit(image)
     elif model_name == "DeepSeek Coder":
         return {"generated_text": run_deepseek(text)}
     else:
         return {"error": "Invalid model selected."}
 # ---------------------------------------------------------
 # Gradio UI
 # ---------------------------------------------------------
 with gr.Blocks() as demo:
-    gr.Markdown("# 🔍 Multi-Model ONNX Inference Demo")
-    gr.Markdown("All models downloaded directly from the Hugging Face Hub via `hf_hub_download`.")
     model_selector = gr.Dropdown(
         [
             "Multilingual Sentiment",
             "SDG Classification",
             "German Sentiment",
-            "ViT Image Classification",
             "DeepSeek Coder"
         ],
         label="Choose a Model"
     )
     text_input = gr.Textbox(lines=3, label="Text Prompt / Input")
-    image_input = gr.Image(type="pil", label="Image Input (for ViT)", visible=True)
     output_box = gr.JSON(label="Output")
     run_button = gr.Button("Run")
     run_button.click(
         inference,
-        inputs=[model_selector, text_input, image_input],
         outputs=output_box
     )

 import onnxruntime as ort
 from PIL import Image
 from huggingface_hub import hf_hub_download
+from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM
+import torch
 # ---------------------------------------------------------
 def greedy_decode_onnx(session, tokenizer, prompt, max_new_tokens=64):
+    """Greedy decoding loop for ONNX decoder-only models."""
     ids = tokenizer(prompt, return_tensors="np")["input_ids"].astype(np.int64)
     for _ in range(max_new_tokens):
         ort_inputs = {"input_ids": ids}
+        logits = session.run(None, ort_inputs)[0]
+        next_token_logits = logits[:, -1, :]
         next_token = int(np.argmax(next_token_logits, axis=-1)[0])
         ids = np.concatenate([ids, [[next_token]]], axis=1)
+        if next_token == tokenizer.eos_token_id:
             break
     return tokenizer.decode(ids[0], skip_special_tokens=True)
 # ---------------------------------------------------------
 # Load ONNX models + tokenizers + configs
 # ---------------------------------------------------------
 # --- Model 1: Multilingual DistilBERT ---
+m_onx = hf_hub_download(
     repo_id="lxyuan/distilbert-base-multilingual-cased-sentiments-student",
     filename="onnx/model.onnx"
 )
     "lxyuan/distilbert-base-multilingual-cased-sentiments-student"
 )
 labels_multilingual = config_multilingual.id2label
+session_multilingual = ort.InferenceSession(m_onx, providers=["CPUExecutionProvider"])
 # --- Model 2: SDG-BERT ---
+sdg_onx = hf_hub_download("sadickam/sdgBERT", "onnx/model.onnx")
 tokenizer_sdg = AutoTokenizer.from_pretrained("sadickam/sdgBERT")
 config_sdg = AutoConfig.from_pretrained("sadickam/sdgBERT")
 labels_sdg = config_sdg.id2label
+session_sdg = ort.InferenceSession(sdg_onx, providers=["CPUExecutionProvider"])
 # --- Model 3: German Sentiment ---
+g_onx = hf_hub_download("oliverguhr/german-sentiment-bert", "onnx/model.onnx")
 tokenizer_german = AutoTokenizer.from_pretrained("oliverguhr/german-sentiment-bert")
 config_german = AutoConfig.from_pretrained("oliverguhr/german-sentiment-bert")
 labels_german = config_german.id2label
+session_german = ort.InferenceSession(g_onx, providers=["CPUExecutionProvider"])
 # --- Model 4: ViT Image Classifier ---
+vit_onx = hf_hub_download("WinKawaks/vit-small-patch16-224", "onnx/model.onnx")
+session_vit = ort.InferenceSession(vit_onx, providers=["CPUExecutionProvider"])
 IMAGE_SIZE = 224
 MEAN = [0.485, 0.456, 0.406]
 STD = [0.229, 0.224, 0.225]
+# ---------------------------------------------------------
+# NEW: Model 5 — DeepSeek Coder from HF Hub (NOT ONNX)
+# ---------------------------------------------------------
+DS_REPO = "guychuk/dpsk-exmpl"
 tokenizer_ds = AutoTokenizer.from_pretrained(
+    DS_REPO,
+    trust_remote_code=True
 )
+model_ds = AutoModelForCausalLM.from_pretrained(
+    DS_REPO,
+    device_map="auto",
+    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+    low_cpu_mem_usage=True,
+    trust_remote_code=True
+)
+model_ds.eval()
+def run_deepseek(prompt, max_new_tokens=128, temperature=0.7, top_p=0.9):
+    """Generate using PyTorch (DeepSeek HF model)."""
+    if not prompt.strip():
+        return "Please enter a prompt."
+    inputs = tokenizer_ds(prompt, return_tensors="pt").to(model_ds.device)
+    with torch.no_grad():
+        out = model_ds.generate(
+            **inputs,
+            max_new_tokens=max_new_tokens,
+            do_sample=True,
+            temperature=temperature,
+            top_p=top_p,
+            pad_token_id=tokenizer_ds.eos_token_id
+        )
+    return tokenizer_ds.decode(out[0], skip_special_tokens=True)
 # ---------------------------------------------------------
 # Inference functions for classification models
 # ---------------------------------------------------------
 def run_multilingual(text):
     inputs = tokenizer_multilingual(text, return_tensors="np", truncation=True, padding=True)
     inputs = {k: v.astype(np.int64) for k, v in inputs.items()}
     return {f"class_{i}": float(probs[i]) for i in top5}
 # ---------------------------------------------------------
 # Unified model router
 # ---------------------------------------------------------
 def inference(model_name, text, image):
     if model_name == "Multilingual Sentiment":
         return run_multilingual(text)
     elif model_name == "SDG Classification":
         return run_sdg(text)
     elif model_name == "German Sentiment":
         return run_german(text)
     elif model_name == "ViT Image Classification":
         if image is None:
             return {"error": "Please upload an image."}
         return run_vit(image)
     elif model_name == "DeepSeek Coder":
         return {"generated_text": run_deepseek(text)}
     else:
         return {"error": "Invalid model selected."}
 # ---------------------------------------------------------
 # Gradio UI
 # ---------------------------------------------------------
 with gr.Blocks() as demo:
+    gr.Markdown("# 🔍 Multi-Model Inference Demo (ONNX + DeepSeek Coder)")
     model_selector = gr.Dropdown(
         [
             "Multilingual Sentiment",
             "SDG Classification",
             "German Sentiment",
+            # "ViT Image Classification",
             "DeepSeek Coder"
         ],
         label="Choose a Model"
     )
     text_input = gr.Textbox(lines=3, label="Text Prompt / Input")
+    # image_input = gr.Image(type="pil", label="Image Input (ViT only)")
     output_box = gr.JSON(label="Output")
     run_button = gr.Button("Run")
     run_button.click(
         inference,
+        inputs=[
+            model_selector,
+            text_input,
+            # image_input
+        ],
         outputs=output_box
     )