🤟 Hand Gesture Classification

import gradio as gr
import torch
from PIL import Image
import numpy as np
from transformers import AutoImageProcessor, AutoModelForImageClassification

MODEL_NAME = "prithivMLmods/Hand-Gesture-19"

print("Loading model …")
processor = AutoImageProcessor.from_pretrained(MODEL_NAME)
model = AutoModelForImageClassification.from_pretrained(MODEL_NAME)
model.eval()
print("Model loaded ✅")

labels = {
    0: "call", 1: "dislike", 2: "fist", 3: "four", 4: "like",
    5: "mute", 6: "no_gesture", 7: "ok", 8: "one", 9: "palm",
    10: "peace", 11: "peace_inverted", 12: "rock", 13: "stop",
    14: "stop_inverted", 15: "three", 16: "three2", 17: "two_up", 18: "two_up_inverted"
}

def predict_hand_gesture(image: np.ndarray):
    try:
        img = Image.fromarray(image).convert("RGB")
        inputs = processor(images=img, return_tensors="pt")
        with torch.no_grad():
            outputs = model(**inputs)
            logits = outputs.logits
            probs = torch.nn.functional.softmax(logits, dim=1).squeeze().tolist()
        pred_idx = int(np.argmax(probs))
        pred_label = labels[pred_idx]
        pred_score = probs[pred_idx]
        return f"🎯 Predicted gesture: **{pred_label}**\nConfidence: {pred_score*100:.1f}%"
    except Exception as e:
        return f"❌ Error during prediction: {e}"

# ---------------------------
# HTML Header
# ---------------------------
html_intro = """
<h1 style='text-align:center; color:#2E86C1;'>🤟 Hand Gesture Classification</h1>
<h3 style='text-align:center; color:#34495E;'>By Jashandeep Kaur</h3>
<p> 

**Note:** Currently, the model can recognize **19 common hand gestures** such as "fist", "peace", "like", "stop", and more.  
This is a subset of all possible gestures. The limitation exists because the model was trained only on these 19 gestures.  

### 🔹 Benefits of this project
- Helps beginners explore **computer vision and deep learning** concepts.
- Can be extended for **sign language recognition** in apps and educational tools.
- Useful for learning **human-computer interaction** with gesture control.

*Future work:* We plan to expand the model to recognize more gestures (50–78+) including greetings like "hello", "thank you", and other commonly used signs.
</p>
"""

iface = gr.Interface(
    fn=predict_hand_gesture,
    inputs=gr.Image(type="numpy", label="Upload hand gesture image"),
    outputs=gr.Textbox(label="Prediction"),
    title="",  # leave empty because we are using HTML
    description="",
    allow_flagging="never",
)

# Add HTML at the top
iface = gr.Blocks()
with iface:
    gr.HTML(html_intro)
    gr.Interface(
        fn=predict_hand_gesture,
        inputs=gr.Image(type="numpy", label="Upload hand gesture image"),
        outputs=gr.Textbox(label="Prediction")
    )

if __name__ == "__main__":
    iface.launch()