pr-demo / app.py
guychuk's picture
Update app.py
425d406 verified
import gradio as gr
import numpy as np
import onnxruntime as ort
from PIL import Image
from huggingface_hub import hf_hub_download
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM
import torch
# ---------------------------------------------------------
# Helper functions
# ---------------------------------------------------------
def softmax(x):
e = np.exp(x - np.max(x))
return e / e.sum()
def greedy_decode_onnx(session, tokenizer, prompt, max_new_tokens=64):
"""Greedy decoding loop for ONNX decoder-only models."""
ids = tokenizer(prompt, return_tensors="np")["input_ids"].astype(np.int64)
for _ in range(max_new_tokens):
ort_inputs = {"input_ids": ids}
logits = session.run(None, ort_inputs)[0]
next_token_logits = logits[:, -1, :]
next_token = int(np.argmax(next_token_logits, axis=-1)[0])
ids = np.concatenate([ids, [[next_token]]], axis=1)
if next_token == tokenizer.eos_token_id:
break
return tokenizer.decode(ids[0], skip_special_tokens=True)
# ---------------------------------------------------------
# Load ONNX models + tokenizers + configs
# ---------------------------------------------------------
# --- Model 1: Multilingual DistilBERT ---
m_onx = hf_hub_download(
repo_id="lxyuan/distilbert-base-multilingual-cased-sentiments-student",
filename="onnx/model.onnx"
)
tokenizer_multilingual = AutoTokenizer.from_pretrained(
"lxyuan/distilbert-base-multilingual-cased-sentiments-student"
)
config_multilingual = AutoConfig.from_pretrained(
"lxyuan/distilbert-base-multilingual-cased-sentiments-student"
)
labels_multilingual = config_multilingual.id2label
session_multilingual = ort.InferenceSession(m_onx, providers=["CPUExecutionProvider"])
# --- Model 2: SDG-BERT ---
sdg_onx = hf_hub_download("sadickam/sdgBERT", "onnx/model.onnx")
tokenizer_sdg = AutoTokenizer.from_pretrained("sadickam/sdgBERT")
config_sdg = AutoConfig.from_pretrained("sadickam/sdgBERT")
labels_sdg = config_sdg.id2label
session_sdg = ort.InferenceSession(sdg_onx, providers=["CPUExecutionProvider"])
# --- Model 3: German Sentiment ---
g_onx = hf_hub_download("oliverguhr/german-sentiment-bert", "onnx/model.onnx")
tokenizer_german = AutoTokenizer.from_pretrained("oliverguhr/german-sentiment-bert")
config_german = AutoConfig.from_pretrained("oliverguhr/german-sentiment-bert")
labels_german = config_german.id2label
session_german = ort.InferenceSession(g_onx, providers=["CPUExecutionProvider"])
# --- Model 4: ViT Image Classifier ---
vit_onx = hf_hub_download("WinKawaks/vit-small-patch16-224", "onnx/model.onnx")
session_vit = ort.InferenceSession(vit_onx, providers=["CPUExecutionProvider"])
IMAGE_SIZE = 224
MEAN = [0.485, 0.456, 0.406]
STD = [0.229, 0.224, 0.225]
# ---------------------------------------------------------
# NEW: Model 5 β€” DeepSeek Coder from HF Hub (NOT ONNX)
# ---------------------------------------------------------
DS_REPO = "guychuk/dpsk-exmpl"
tokenizer_ds = AutoTokenizer.from_pretrained(
DS_REPO,
trust_remote_code=True
)
model_ds = AutoModelForCausalLM.from_pretrained(
DS_REPO,
device_map="auto",
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
low_cpu_mem_usage=True,
trust_remote_code=True
)
model_ds.eval()
def run_deepseek(prompt, max_new_tokens=128, temperature=0.7, top_p=0.9):
"""Generate using PyTorch (DeepSeek HF model)."""
if not prompt.strip():
return "Please enter a prompt."
inputs = tokenizer_ds(prompt, return_tensors="pt").to(model_ds.device)
with torch.no_grad():
out = model_ds.generate(
**inputs,
max_new_tokens=max_new_tokens,
do_sample=True,
temperature=temperature,
top_p=top_p,
pad_token_id=tokenizer_ds.eos_token_id
)
return tokenizer_ds.decode(out[0], skip_special_tokens=True)
# ---------------------------------------------------------
# Inference functions for classification models
# ---------------------------------------------------------
def run_multilingual(text):
inputs = tokenizer_multilingual(text, return_tensors="np", truncation=True, padding=True)
inputs = {k: v.astype(np.int64) for k, v in inputs.items()}
logits = session_multilingual.run(None, inputs)[0][0]
probs = softmax(logits)
return {labels_multilingual[i]: float(probs[i]) for i in range(len(probs))}
def run_sdg(text):
inputs = tokenizer_sdg(text, return_tensors="np", truncation=True, padding=True)
inputs = {k: v.astype(np.int64) for k, v in inputs.items()}
logits = session_sdg.run(None, inputs)[0][0]
probs = softmax(logits)
return {labels_sdg[i]: float(probs[i]) for i in range(len(probs))}
def run_german(text):
inputs = tokenizer_german(text, return_tensors="np", truncation=True, padding=True)
inputs = {k: v.astype(np.int64) for k, v in inputs.items()}
logits = session_german.run(None, inputs)[0][0]
probs = softmax(logits)
return {labels_german[i]: float(probs[i]) for i in range(len(probs))}
def preprocess_vit(image):
image = image.convert("RGB").resize((IMAGE_SIZE, IMAGE_SIZE))
arr = np.array(image).astype(np.float32) / 255.0
arr = (arr - MEAN) / STD
arr = arr.transpose(2, 0, 1)
return arr[np.newaxis, :]
def run_vit(image):
arr = preprocess_vit(image)
input_name = session_vit.get_inputs()[0].name
logits = session_vit.run(None, {input_name: arr})[0][0]
probs = softmax(logits)
top5 = probs.argsort()[::-1][:5]
return {f"class_{i}": float(probs[i]) for i in top5}
# ---------------------------------------------------------
# Unified model router
# ---------------------------------------------------------
def inference(model_name, text, image):
if model_name == "Multilingual Sentiment":
return run_multilingual(text)
elif model_name == "SDG Classification":
return run_sdg(text)
elif model_name == "German Sentiment":
return run_german(text)
elif model_name == "ViT Image Classification":
if image is None:
return {"error": "Please upload an image."}
return run_vit(image)
elif model_name == "DeepSeek Coder":
return {"generated_text": run_deepseek(text)}
else:
return {"error": "Invalid model selected."}
# ---------------------------------------------------------
# Gradio UI
# ---------------------------------------------------------
with gr.Blocks() as demo:
gr.Markdown("# πŸ” Multi-Model Inference Demo (ONNX + DeepSeek Coder)")
model_selector = gr.Dropdown(
[
"Multilingual Sentiment",
"SDG Classification",
"German Sentiment",
# "ViT Image Classification",
"DeepSeek Coder"
],
label="Choose a Model"
)
text_input = gr.Textbox(lines=3, label="Text Prompt / Input")
# image_input = gr.Image(type="pil", label="Image Input (ViT only)")
output_box = gr.JSON(label="Output")
run_button = gr.Button("Run")
run_button.click(
inference,
inputs=[
model_selector,
text_input,
# image_input
],
outputs=output_box
)
demo.launch()