Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| from PIL import Image | |
| import numpy as np | |
| from transformers import AutoImageProcessor, AutoModelForImageClassification | |
| MODEL_NAME = "prithivMLmods/Hand-Gesture-19" | |
| print("Loading model β¦") | |
| processor = AutoImageProcessor.from_pretrained(MODEL_NAME) | |
| model = AutoModelForImageClassification.from_pretrained(MODEL_NAME) | |
| model.eval() | |
| print("Model loaded β ") | |
| labels = { | |
| 0: "call", 1: "dislike", 2: "fist", 3: "four", 4: "like", | |
| 5: "mute", 6: "no_gesture", 7: "ok", 8: "one", 9: "palm", | |
| 10: "peace", 11: "peace_inverted", 12: "rock", 13: "stop", | |
| 14: "stop_inverted", 15: "three", 16: "three2", 17: "two_up", 18: "two_up_inverted" | |
| } | |
| def predict_hand_gesture(image: np.ndarray): | |
| try: | |
| img = Image.fromarray(image).convert("RGB") | |
| inputs = processor(images=img, return_tensors="pt") | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| logits = outputs.logits | |
| probs = torch.nn.functional.softmax(logits, dim=1).squeeze().tolist() | |
| pred_idx = int(np.argmax(probs)) | |
| pred_label = labels[pred_idx] | |
| pred_score = probs[pred_idx] | |
| return f"π― Predicted gesture: **{pred_label}**\nConfidence: {pred_score*100:.1f}%" | |
| except Exception as e: | |
| return f"β Error during prediction: {e}" | |
| # --------------------------- | |
| # HTML Header | |
| # --------------------------- | |
| html_intro = """ | |
| <h1 style='text-align:center; color:#2E86C1;'>π€ Hand Gesture Classification</h1> | |
| <h3 style='text-align:center; color:#34495E;'>By Jashandeep Kaur</h3> | |
| <p> | |
| **Note:** Currently, the model can recognize **19 common hand gestures** such as "fist", "peace", "like", "stop", and more. | |
| This is a subset of all possible gestures. The limitation exists because the model was trained only on these 19 gestures. | |
| ### πΉ Benefits of this project | |
| - Helps beginners explore **computer vision and deep learning** concepts. | |
| - Can be extended for **sign language recognition** in apps and educational tools. | |
| - Useful for learning **human-computer interaction** with gesture control. | |
| *Future work:* We plan to expand the model to recognize more gestures (50β78+) including greetings like "hello", "thank you", and other commonly used signs. | |
| </p> | |
| """ | |
| iface = gr.Interface( | |
| fn=predict_hand_gesture, | |
| inputs=gr.Image(type="numpy", label="Upload hand gesture image"), | |
| outputs=gr.Textbox(label="Prediction"), | |
| title="", # leave empty because we are using HTML | |
| description="", | |
| allow_flagging="never", | |
| ) | |
| # Add HTML at the top | |
| iface = gr.Blocks() | |
| with iface: | |
| gr.HTML(html_intro) | |
| gr.Interface( | |
| fn=predict_hand_gesture, | |
| inputs=gr.Image(type="numpy", label="Upload hand gesture image"), | |
| outputs=gr.Textbox(label="Prediction") | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() | |