|
|
import gradio as gr |
|
|
import numpy as np |
|
|
import onnxruntime as ort |
|
|
from PIL import Image |
|
|
from huggingface_hub import hf_hub_download |
|
|
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM |
|
|
import torch |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def softmax(x): |
|
|
e = np.exp(x - np.max(x)) |
|
|
return e / e.sum() |
|
|
|
|
|
|
|
|
def greedy_decode_onnx(session, tokenizer, prompt, max_new_tokens=64): |
|
|
"""Greedy decoding loop for ONNX decoder-only models.""" |
|
|
ids = tokenizer(prompt, return_tensors="np")["input_ids"].astype(np.int64) |
|
|
|
|
|
for _ in range(max_new_tokens): |
|
|
ort_inputs = {"input_ids": ids} |
|
|
logits = session.run(None, ort_inputs)[0] |
|
|
next_token_logits = logits[:, -1, :] |
|
|
next_token = int(np.argmax(next_token_logits, axis=-1)[0]) |
|
|
ids = np.concatenate([ids, [[next_token]]], axis=1) |
|
|
if next_token == tokenizer.eos_token_id: |
|
|
break |
|
|
|
|
|
return tokenizer.decode(ids[0], skip_special_tokens=True) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
m_onx = hf_hub_download( |
|
|
repo_id="lxyuan/distilbert-base-multilingual-cased-sentiments-student", |
|
|
filename="onnx/model.onnx" |
|
|
) |
|
|
tokenizer_multilingual = AutoTokenizer.from_pretrained( |
|
|
"lxyuan/distilbert-base-multilingual-cased-sentiments-student" |
|
|
) |
|
|
config_multilingual = AutoConfig.from_pretrained( |
|
|
"lxyuan/distilbert-base-multilingual-cased-sentiments-student" |
|
|
) |
|
|
labels_multilingual = config_multilingual.id2label |
|
|
session_multilingual = ort.InferenceSession(m_onx, providers=["CPUExecutionProvider"]) |
|
|
|
|
|
|
|
|
|
|
|
sdg_onx = hf_hub_download("sadickam/sdgBERT", "onnx/model.onnx") |
|
|
tokenizer_sdg = AutoTokenizer.from_pretrained("sadickam/sdgBERT") |
|
|
config_sdg = AutoConfig.from_pretrained("sadickam/sdgBERT") |
|
|
labels_sdg = config_sdg.id2label |
|
|
session_sdg = ort.InferenceSession(sdg_onx, providers=["CPUExecutionProvider"]) |
|
|
|
|
|
|
|
|
|
|
|
g_onx = hf_hub_download("oliverguhr/german-sentiment-bert", "onnx/model.onnx") |
|
|
tokenizer_german = AutoTokenizer.from_pretrained("oliverguhr/german-sentiment-bert") |
|
|
config_german = AutoConfig.from_pretrained("oliverguhr/german-sentiment-bert") |
|
|
labels_german = config_german.id2label |
|
|
session_german = ort.InferenceSession(g_onx, providers=["CPUExecutionProvider"]) |
|
|
|
|
|
|
|
|
|
|
|
vit_onx = hf_hub_download("WinKawaks/vit-small-patch16-224", "onnx/model.onnx") |
|
|
session_vit = ort.InferenceSession(vit_onx, providers=["CPUExecutionProvider"]) |
|
|
|
|
|
IMAGE_SIZE = 224 |
|
|
MEAN = [0.485, 0.456, 0.406] |
|
|
STD = [0.229, 0.224, 0.225] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
DS_REPO = "guychuk/dpsk-exmpl" |
|
|
|
|
|
tokenizer_ds = AutoTokenizer.from_pretrained( |
|
|
DS_REPO, |
|
|
trust_remote_code=True |
|
|
) |
|
|
|
|
|
model_ds = AutoModelForCausalLM.from_pretrained( |
|
|
DS_REPO, |
|
|
device_map="auto", |
|
|
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, |
|
|
low_cpu_mem_usage=True, |
|
|
trust_remote_code=True |
|
|
) |
|
|
|
|
|
model_ds.eval() |
|
|
|
|
|
|
|
|
def run_deepseek(prompt, max_new_tokens=128, temperature=0.7, top_p=0.9): |
|
|
"""Generate using PyTorch (DeepSeek HF model).""" |
|
|
if not prompt.strip(): |
|
|
return "Please enter a prompt." |
|
|
|
|
|
inputs = tokenizer_ds(prompt, return_tensors="pt").to(model_ds.device) |
|
|
|
|
|
with torch.no_grad(): |
|
|
out = model_ds.generate( |
|
|
**inputs, |
|
|
max_new_tokens=max_new_tokens, |
|
|
do_sample=True, |
|
|
temperature=temperature, |
|
|
top_p=top_p, |
|
|
pad_token_id=tokenizer_ds.eos_token_id |
|
|
) |
|
|
|
|
|
return tokenizer_ds.decode(out[0], skip_special_tokens=True) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def run_multilingual(text): |
|
|
inputs = tokenizer_multilingual(text, return_tensors="np", truncation=True, padding=True) |
|
|
inputs = {k: v.astype(np.int64) for k, v in inputs.items()} |
|
|
logits = session_multilingual.run(None, inputs)[0][0] |
|
|
probs = softmax(logits) |
|
|
return {labels_multilingual[i]: float(probs[i]) for i in range(len(probs))} |
|
|
|
|
|
|
|
|
def run_sdg(text): |
|
|
inputs = tokenizer_sdg(text, return_tensors="np", truncation=True, padding=True) |
|
|
inputs = {k: v.astype(np.int64) for k, v in inputs.items()} |
|
|
logits = session_sdg.run(None, inputs)[0][0] |
|
|
probs = softmax(logits) |
|
|
return {labels_sdg[i]: float(probs[i]) for i in range(len(probs))} |
|
|
|
|
|
|
|
|
def run_german(text): |
|
|
inputs = tokenizer_german(text, return_tensors="np", truncation=True, padding=True) |
|
|
inputs = {k: v.astype(np.int64) for k, v in inputs.items()} |
|
|
logits = session_german.run(None, inputs)[0][0] |
|
|
probs = softmax(logits) |
|
|
return {labels_german[i]: float(probs[i]) for i in range(len(probs))} |
|
|
|
|
|
|
|
|
def preprocess_vit(image): |
|
|
image = image.convert("RGB").resize((IMAGE_SIZE, IMAGE_SIZE)) |
|
|
arr = np.array(image).astype(np.float32) / 255.0 |
|
|
arr = (arr - MEAN) / STD |
|
|
arr = arr.transpose(2, 0, 1) |
|
|
return arr[np.newaxis, :] |
|
|
|
|
|
|
|
|
def run_vit(image): |
|
|
arr = preprocess_vit(image) |
|
|
input_name = session_vit.get_inputs()[0].name |
|
|
logits = session_vit.run(None, {input_name: arr})[0][0] |
|
|
probs = softmax(logits) |
|
|
top5 = probs.argsort()[::-1][:5] |
|
|
return {f"class_{i}": float(probs[i]) for i in top5} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def inference(model_name, text, image): |
|
|
if model_name == "Multilingual Sentiment": |
|
|
return run_multilingual(text) |
|
|
|
|
|
elif model_name == "SDG Classification": |
|
|
return run_sdg(text) |
|
|
|
|
|
elif model_name == "German Sentiment": |
|
|
return run_german(text) |
|
|
|
|
|
elif model_name == "ViT Image Classification": |
|
|
if image is None: |
|
|
return {"error": "Please upload an image."} |
|
|
return run_vit(image) |
|
|
|
|
|
elif model_name == "DeepSeek Coder": |
|
|
return {"generated_text": run_deepseek(text)} |
|
|
|
|
|
else: |
|
|
return {"error": "Invalid model selected."} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown("# π Multi-Model Inference Demo (ONNX + DeepSeek Coder)") |
|
|
|
|
|
model_selector = gr.Dropdown( |
|
|
[ |
|
|
"Multilingual Sentiment", |
|
|
"SDG Classification", |
|
|
"German Sentiment", |
|
|
|
|
|
"DeepSeek Coder" |
|
|
], |
|
|
label="Choose a Model" |
|
|
) |
|
|
|
|
|
text_input = gr.Textbox(lines=3, label="Text Prompt / Input") |
|
|
|
|
|
output_box = gr.JSON(label="Output") |
|
|
|
|
|
run_button = gr.Button("Run") |
|
|
|
|
|
run_button.click( |
|
|
inference, |
|
|
inputs=[ |
|
|
model_selector, |
|
|
text_input, |
|
|
|
|
|
], |
|
|
outputs=output_box |
|
|
) |
|
|
|
|
|
demo.launch() |
|
|
|