import gradio as gr import numpy as np import onnxruntime as ort from PIL import Image from huggingface_hub import hf_hub_download from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM import torch # --------------------------------------------------------- # Helper functions # --------------------------------------------------------- def softmax(x): e = np.exp(x - np.max(x)) return e / e.sum() def greedy_decode_onnx(session, tokenizer, prompt, max_new_tokens=64): """Greedy decoding loop for ONNX decoder-only models.""" ids = tokenizer(prompt, return_tensors="np")["input_ids"].astype(np.int64) for _ in range(max_new_tokens): ort_inputs = {"input_ids": ids} logits = session.run(None, ort_inputs)[0] next_token_logits = logits[:, -1, :] next_token = int(np.argmax(next_token_logits, axis=-1)[0]) ids = np.concatenate([ids, [[next_token]]], axis=1) if next_token == tokenizer.eos_token_id: break return tokenizer.decode(ids[0], skip_special_tokens=True) # --------------------------------------------------------- # Load ONNX models + tokenizers + configs # --------------------------------------------------------- # --- Model 1: Multilingual DistilBERT --- m_onx = hf_hub_download( repo_id="lxyuan/distilbert-base-multilingual-cased-sentiments-student", filename="onnx/model.onnx" ) tokenizer_multilingual = AutoTokenizer.from_pretrained( "lxyuan/distilbert-base-multilingual-cased-sentiments-student" ) config_multilingual = AutoConfig.from_pretrained( "lxyuan/distilbert-base-multilingual-cased-sentiments-student" ) labels_multilingual = config_multilingual.id2label session_multilingual = ort.InferenceSession(m_onx, providers=["CPUExecutionProvider"]) # --- Model 2: SDG-BERT --- sdg_onx = hf_hub_download("sadickam/sdgBERT", "onnx/model.onnx") tokenizer_sdg = AutoTokenizer.from_pretrained("sadickam/sdgBERT") config_sdg = AutoConfig.from_pretrained("sadickam/sdgBERT") labels_sdg = config_sdg.id2label session_sdg = ort.InferenceSession(sdg_onx, providers=["CPUExecutionProvider"]) # --- Model 3: German Sentiment --- g_onx = hf_hub_download("oliverguhr/german-sentiment-bert", "onnx/model.onnx") tokenizer_german = AutoTokenizer.from_pretrained("oliverguhr/german-sentiment-bert") config_german = AutoConfig.from_pretrained("oliverguhr/german-sentiment-bert") labels_german = config_german.id2label session_german = ort.InferenceSession(g_onx, providers=["CPUExecutionProvider"]) # --- Model 4: ViT Image Classifier --- vit_onx = hf_hub_download("WinKawaks/vit-small-patch16-224", "onnx/model.onnx") session_vit = ort.InferenceSession(vit_onx, providers=["CPUExecutionProvider"]) IMAGE_SIZE = 224 MEAN = [0.485, 0.456, 0.406] STD = [0.229, 0.224, 0.225] # --------------------------------------------------------- # NEW: Model 5 — DeepSeek Coder from HF Hub (NOT ONNX) # --------------------------------------------------------- DS_REPO = "guychuk/dpsk-exmpl" tokenizer_ds = AutoTokenizer.from_pretrained( DS_REPO, trust_remote_code=True ) model_ds = AutoModelForCausalLM.from_pretrained( DS_REPO, device_map="auto", torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, low_cpu_mem_usage=True, trust_remote_code=True ) model_ds.eval() def run_deepseek(prompt, max_new_tokens=128, temperature=0.7, top_p=0.9): """Generate using PyTorch (DeepSeek HF model).""" if not prompt.strip(): return "Please enter a prompt." inputs = tokenizer_ds(prompt, return_tensors="pt").to(model_ds.device) with torch.no_grad(): out = model_ds.generate( **inputs, max_new_tokens=max_new_tokens, do_sample=True, temperature=temperature, top_p=top_p, pad_token_id=tokenizer_ds.eos_token_id ) return tokenizer_ds.decode(out[0], skip_special_tokens=True) # --------------------------------------------------------- # Inference functions for classification models # --------------------------------------------------------- def run_multilingual(text): inputs = tokenizer_multilingual(text, return_tensors="np", truncation=True, padding=True) inputs = {k: v.astype(np.int64) for k, v in inputs.items()} logits = session_multilingual.run(None, inputs)[0][0] probs = softmax(logits) return {labels_multilingual[i]: float(probs[i]) for i in range(len(probs))} def run_sdg(text): inputs = tokenizer_sdg(text, return_tensors="np", truncation=True, padding=True) inputs = {k: v.astype(np.int64) for k, v in inputs.items()} logits = session_sdg.run(None, inputs)[0][0] probs = softmax(logits) return {labels_sdg[i]: float(probs[i]) for i in range(len(probs))} def run_german(text): inputs = tokenizer_german(text, return_tensors="np", truncation=True, padding=True) inputs = {k: v.astype(np.int64) for k, v in inputs.items()} logits = session_german.run(None, inputs)[0][0] probs = softmax(logits) return {labels_german[i]: float(probs[i]) for i in range(len(probs))} def preprocess_vit(image): image = image.convert("RGB").resize((IMAGE_SIZE, IMAGE_SIZE)) arr = np.array(image).astype(np.float32) / 255.0 arr = (arr - MEAN) / STD arr = arr.transpose(2, 0, 1) return arr[np.newaxis, :] def run_vit(image): arr = preprocess_vit(image) input_name = session_vit.get_inputs()[0].name logits = session_vit.run(None, {input_name: arr})[0][0] probs = softmax(logits) top5 = probs.argsort()[::-1][:5] return {f"class_{i}": float(probs[i]) for i in top5} # --------------------------------------------------------- # Unified model router # --------------------------------------------------------- def inference(model_name, text, image): if model_name == "Multilingual Sentiment": return run_multilingual(text) elif model_name == "SDG Classification": return run_sdg(text) elif model_name == "German Sentiment": return run_german(text) elif model_name == "ViT Image Classification": if image is None: return {"error": "Please upload an image."} return run_vit(image) elif model_name == "DeepSeek Coder": return {"generated_text": run_deepseek(text)} else: return {"error": "Invalid model selected."} # --------------------------------------------------------- # Gradio UI # --------------------------------------------------------- with gr.Blocks() as demo: gr.Markdown("# 🔍 Multi-Model Inference Demo (ONNX + DeepSeek Coder)") model_selector = gr.Dropdown( [ "Multilingual Sentiment", "SDG Classification", "German Sentiment", # "ViT Image Classification", "DeepSeek Coder" ], label="Choose a Model" ) text_input = gr.Textbox(lines=3, label="Text Prompt / Input") # image_input = gr.Image(type="pil", label="Image Input (ViT only)") output_box = gr.JSON(label="Output") run_button = gr.Button("Run") run_button.click( inference, inputs=[ model_selector, text_input, # image_input ], outputs=output_box ) demo.launch()