Spaces:

Trainera
/

foodrecognitionapi

Sleeping

App Files Files Community

har1zarD commited on Nov 1

Commit

c826c48

1 Parent(s): 8c64cb8

ap

Browse files

Files changed (1) hide show

app.py +66 -113

app.py CHANGED Viewed

@@ -59,116 +59,63 @@ openai_client = None  # Will be initialized in lifespan startup
 # ==================== MULTI-MODEL FOOD RECOGNITION ====================
 FOOD_MODELS = {
-    # NOISYVIT 2025 STATE-OF-THE-ART FOOD RECOGNITION SYSTEM
-    # NOISYVIT 2025 FLAGSHIP MODELS (Highest Priority)
-    "noisyvit_2025_huge": {
-        "model_name": "google/vit-huge-patch14-224-in21k",
-        "type": "noisyvit_transformer_huge",
-        "classes": 21000,
         "priority": 1,
-        "description": "NoisyViT 2025 Huge (~2.5GB) - Ultimate robust food recognition with noise resilience"
     },
-    "noisyvit_2025_large": {
-        "model_name": "google/vit-large-patch16-224-in21k",
-        "type": "noisyvit_transformer_large",
-        "classes": 21000,
         "priority": 2,
-        "description": "NoisyViT 2025 Large (~1.3GB) - Advanced robustness for complex multi-object scenes"
-    },
-    "noisyvit_2025_base_384": {
-        "model_name": "google/vit-base-patch16-384",
-        "type": "noisyvit_transformer_base",
-        "classes": 1000,
-        "priority": 3,
-        "description": "NoisyViT 2025 Base 384px (~1.8GB) - High-resolution food detail detection"
     },
-    # FOOD-101 SPECIALIZED ViT ENSEMBLE
-    "food101_vit_specialist": {
-        "model_name": "nateraw/food",
         "type": "food_specialist_vit",
         "classes": 101,
         "priority": 4,
-        "description": "Food-101 ViT Specialist (~500MB) - Trained on 101 specific food categories"
     },
-    "food_enhanced_classifier": {
-        "model_name": "Kaludi/food-category-classification-v2.0",
-        "type": "food_specialist_enhanced",
-        "classes": 12,
         "priority": 5,
-        "description": "Enhanced Food Classifier (~300MB) - Multi-category detection with ViT backbone"
     },
-    # MULTI-OBJECT FOOD SCENE DETECTION
-    "multi_object_vit": {
-        "model_name": "microsoft/swin-large-patch4-window7-224",
-        "type": "swin_transformer_multi_object",
-        "classes": 1000,
         "priority": 6,
-        "description": "Swin Large (~800MB) - Excellent for complex scenes with multiple food items"
-    },
-    "scene_understanding_vit": {
-        "model_name": "microsoft/beit-large-patch16-224",
-        "type": "beit_transformer_scene",
-        "classes": 1000,
-        "priority": 7,
-        "description": "BEiT Large (~1.1GB) - Advanced scene understanding for mixed dishes"
-    },
-    # VISION-LANGUAGE MODELS FOR COMPLEX DESCRIPTIONS
-    "food_clip_huge": {
-        "model_name": "laion/CLIP-ViT-H-14-laion2B-s32B-b79K",
-        "type": "clip_huge_food",
-        "classes": 1000,
-        "priority": 8,
-        "description": "LAION CLIP Huge (~3.5GB) - Vision-language understanding for complex food descriptions"
-    },
-    "openai_clip_large": {
-        "model_name": "openai/clip-vit-large-patch14",
-        "type": "clip_large_food",
-        "classes": 1000,
-        "priority": 9,
-        "description": "OpenAI CLIP Large (~1.7GB) - Robust vision-language for food understanding"
-    },
-    # CUTTING-EDGE ARCHITECTURE MODELS
-    "convnext_xxlarge": {
-        "model_name": "laion/CLIP-convnext_xxlarge-laion2B-s34B-b82K-augreg-soup",
-        "type": "convnext_xxlarge_food",
-        "classes": 1000,
-        "priority": 10,
-        "description": "ConvNeXt XXLarge (~2.8GB) - Massive CNN for detailed food feature extraction"
-    },
-    "efficientnet_ultra": {
-        "model_name": "timm/tf_efficientnetv2_l_in21k",
-        "type": "efficientnet_ultra_food",
-        "classes": 21000,
-        "priority": 11,
-        "description": "EfficientNetV2 Large (~480MB) - Optimal efficiency for real-time food detection"
-    },
-    # MEMORY-OPTIMIZED BACKUP MODELS
-    "resnet_deep_food": {
-        "model_name": "microsoft/resnet-152",
-        "type": "resnet_deep_food",
-        "classes": 1000,
-        "priority": 12,
-        "description": "ResNet-152 (~240MB) - Memory-efficient deep baseline for food recognition"
     }
-    # NOISYVIT 2025 ENSEMBLE TOTAL:
-    # NoisyViT models: ~5.6GB (3 flagship models)
-    # Food specialists: ~800MB
-    # Multi-object models: ~1.9GB
-    # CLIP vision-language: ~5.2GB
-    # Cutting-edge architectures: ~3.3GB
-    # TOTAL: ~16.8GB - Will use smart loading to stay under 16GB limit
-    # 12 NOISYVIT-POWERED MODELS for ultimate food recognition accuracy
 }
-# Default primary model - NoisyViT 2025 Flagship
-PRIMARY_MODEL = "noisyvit_2025_huge"
 # CONFIDENCE THRESHOLDS - Realistic for ensemble models
 MIN_CONFIDENCE_THRESHOLD = 0.20  # 20% minimum confidence (ensemble should be confident)
@@ -233,14 +180,18 @@ SMART_FOOD_OVERRIDES = {
 # ADVANCED BALKAN FOOD DETECTION - Map to closest Food-101 categories
 BALKAN_TO_FOOD101_MAPPING = {
-    # Balkan dish → Closest Food-101 equivalent
     "cevapi": "hot_dog",          # Closest grilled meat in Food-101
     "cevapcici": "hot_dog",       # Same as ćevapi
     "pljeskavica": "hamburger",   # Burger-like grilled meat patty
     "burek": "pizza",             # Closest baked dough dish
     "sarma": "dumplings",         # Stuffed/wrapped food
     "kajmak": "cheese_plate",     # Dairy product
     "ajvar": "hummus",            # Vegetable spread
     "prebranac": "baked_beans",   # Bean dish (if exists)
     "pasulj": "soup",             # Bean soup
     "begova_corba": "soup"        # Turkish soup
@@ -294,6 +245,10 @@ COMPREHENSIVE_FOOD_CATEGORIES = {
     "chocolate_chip_pancakes", "banana_pancakes", "protein_pancakes", "sourdough_pancakes",
     "waffles", "belgian_waffles", "waffle", "french_toast", "toast", "bagel", "croissant",
     "muffin", "english_muffin", "danish_pastry", "cinnamon_roll", "oatmeal", "cereal",
     "scrambled_eggs", "fried_eggs", "eggs_benedict", "omelet", "breakfast_burrito",
     # FOOD-101 CATEGORIES (Proven dataset)
@@ -1024,22 +979,19 @@ class MultiModelFoodRecognizer:
         self._warm_up()
     def _initialize_models(self):
-        """Initialize NoisyViT 2025 ensemble with 16GB memory optimization."""
-        logger.info("🎯 Initializing NOISYVIT 2025 food recognition system with memory optimization...")
         # MEMORY-AWARE LOADING: Priority-based loading with RAM monitoring
         sorted_models = sorted(FOOD_MODELS.items(), key=lambda x: x[1]["priority"])
         memory_used = 0
         memory_limit = 14.5 * 1024  # 14.5GB limit (1.5GB buffer for inference)
-        # Model memory estimates (MB)
         model_sizes = {
-            "noisyvit_2025_huge": 2500,        "noisyvit_2025_large": 1300,
-            "noisyvit_2025_base_384": 1800,    "food101_vit_specialist": 500,
-            "food_enhanced_classifier": 300,   "multi_object_vit": 800,
-            "scene_understanding_vit": 1100,   "food_clip_huge": 3500,
-            "openai_clip_large": 1700,         "convnext_xxlarge": 2800,
-            "efficientnet_ultra": 480,         "resnet_deep_food": 240
         }
         for model_key, model_config in sorted_models:
@@ -1075,11 +1027,11 @@ class MultiModelFoodRecognizer:
                     model = model.to(self.device)
                 model.eval()
-                # NOISYVIT-SPECIFIC COMPILATION
-                if hasattr(torch, 'compile') and self.device == "cuda" and "noisyvit" in model_key:
                     try:
                         model = torch.compile(model, mode="reduce-overhead", dynamic=True)
-                        logger.info(f"⚡ NOISYVIT {model_key} compiled with memory optimization")
                     except Exception as e:
                         logger.info(f"⚠️ Compilation failed for {model_key}: {e}")
@@ -1299,14 +1251,14 @@ class MultiModelFoodRecognizer:
         if not all_predictions:
             raise RuntimeError("No models produced valid predictions")
-        # CONSERVATIVE FILTERING - Only remove obvious non-food items
         non_food_items = {
-            # Only obvious garbage and non-food items
             'person', 'people', 'human', 'man', 'woman', 'child',
-            'car', 'truck', 'vehicle', 'building', 'house', 'tree', 'plant',
             'computer', 'phone', 'laptop', 'tablet', 'television', 'tv',
             'book', 'paper', 'pen', 'pencil', 'chair', 'table', 'sofa',
-            'cat', 'dog', 'bird', 'fish' # live animals only
         }
         # Generic FOOD terms that should be deprioritized (but not removed)
@@ -1337,9 +1289,10 @@ class MultiModelFoodRecognizer:
             'french fries', 'fries', 'sweet potato fries', 'onion rings',
             'hot dog', 'corn dog', 'bratwurst', 'sausage', 'kielbasa',
-            # Balkanska jela (sa alternativnim imenima)
-            'cevapi', 'cevapcici', 'ćevapi', 'ćevapčići', 'burek', 'börek',
-            'pljeskavica', 'sarma', 'klepe', 'dolma', 'kajmak', 'ajvar',
             'prebranac', 'pasulj', 'grah', 'punjena paprika', 'punjene paprike',
             'stuffed peppers', 'musaka', 'moussaka', 'japrak', 'bamija', 'okra',
             'bosanski lonac', 'begova corba', 'tarhana', 'zeljanica', 'spinach pie',

 # ==================== MULTI-MODEL FOOD RECOGNITION ====================
 FOOD_MODELS = {
+    # ONLY REAL FOOD-101 SPECIALIST MODELS - NO GENERIC VISION MODELS!
+    # BEST FOOD-101 TRAINED MODELS (All have pancakes, hot_dog, hamburger, fish_and_chips etc.)
+    "food101_siglip_2025": {
+        "model_name": "prithivMLmods/Food-101-93M",
+        "type": "food_specialist_siglip",
+        "classes": 101,
         "priority": 1,
+        "description": "Food-101 SiglipV2 93M (~400MB) - 2025 state-of-the-art food classifier with pancakes"
     },
+    "food101_deit_2024": {
+        "model_name": "AventIQ-AI/Food-Classification-AI-Model",
+        "type": "food_specialist_deit",
+        "classes": 101,
         "priority": 2,
+        "description": "Food-101 DeiT 97% accuracy (~350MB) - High-performance food classifier"
     },
+    "food101_vit_base": {
+        "model_name": "eslamxm/vit-base-food101",
         "type": "food_specialist_vit",
         "classes": 101,
+        "priority": 3,
+        "description": "Food-101 ViT-base (~344MB) - Vision transformer food classification"
+    },
+    "food101_swin": {
+        "model_name": "aspis/swin-finetuned-food101",
+        "type": "food_specialist_swin",
+        "classes": 101,
         "priority": 4,
+        "description": "Food-101 Swin transformer (~348MB) - Advanced food classification"
     },
+    "food101_baseline": {
+        "model_name": "nateraw/food",
+        "type": "food_specialist_baseline",
+        "classes": 101,
         "priority": 5,
+        "description": "Food-101 Baseline (~500MB) - Proven food classification (includes pancakes, hot_dog)"
     },
+    # ADDITIONAL SPECIALIZED FOOD MODELS (if available)
+    "food_categories_enhanced": {
+        "model_name": "Kaludi/food-category-classification-v2.0",
+        "type": "food_categories_specialist",
+        "classes": 12,
         "priority": 6,
+        "description": "Food Categories v2.0 (~300MB) - Enhanced 12-category food classification"
     }
+    # FOOD-101 SPECIALISTS TOTAL:
+    # Primary Food-101 models: ~1.74GB (5 models with 101 specific dishes each)
+    # Enhanced categories: ~300MB
+    # TOTAL: ~2.04GB - Extremely efficient, focused only on food!
+    # 6 FOOD-SPECIALIST MODELS trained specifically on food datasets
 }
+# Default primary model - Best Food-101 Specialist
+PRIMARY_MODEL = "food101_siglip_2025"
 # CONFIDENCE THRESHOLDS - Realistic for ensemble models
 MIN_CONFIDENCE_THRESHOLD = 0.20  # 20% minimum confidence (ensemble should be confident)
 # ADVANCED BALKAN FOOD DETECTION - Map to closest Food-101 categories
 BALKAN_TO_FOOD101_MAPPING = {
+    # Balkan dish → Closest Food-101 equivalent (ENHANCED for better recognition)
     "cevapi": "hot_dog",          # Closest grilled meat in Food-101
     "cevapcici": "hot_dog",       # Same as ćevapi
+    "chevapi": "hot_dog",         # Alternative spelling
+    "chevapchichi": "hot_dog",    # Alternative spelling
     "pljeskavica": "hamburger",   # Burger-like grilled meat patty
     "burek": "pizza",             # Closest baked dough dish
     "sarma": "dumplings",         # Stuffed/wrapped food
     "kajmak": "cheese_plate",     # Dairy product
     "ajvar": "hummus",            # Vegetable spread
+    "raznjici": "hot_dog",        # Similar grilled meat
+    "kofte": "hot_dog",           # Similar grilled meat
     "prebranac": "baked_beans",   # Bean dish (if exists)
     "pasulj": "soup",             # Bean soup
     "begova_corba": "soup"        # Turkish soup
     "chocolate_chip_pancakes", "banana_pancakes", "protein_pancakes", "sourdough_pancakes",
     "waffles", "belgian_waffles", "waffle", "french_toast", "toast", "bagel", "croissant",
     "muffin", "english_muffin", "danish_pastry", "cinnamon_roll", "oatmeal", "cereal",
+    # BALKAN FOODS (Critical for ćevapi!)
+    "cevapi", "cevapcici", "chevapi", "chevapchichi", "kebab", "kofte", "pljeskavica",
+    "burek", "kajmak", "ajvar", "lepinja", "somun", "raznjici", "hot_dog",
     "scrambled_eggs", "fried_eggs", "eggs_benedict", "omelet", "breakfast_burrito",
     # FOOD-101 CATEGORIES (Proven dataset)
         self._warm_up()
     def _initialize_models(self):
+        """Initialize Food-101 specialist ensemble with memory optimization."""
+        logger.info("🎯 Initializing FOOD-101 SPECIALIST food recognition system with memory optimization...")
         # MEMORY-AWARE LOADING: Priority-based loading with RAM monitoring
         sorted_models = sorted(FOOD_MODELS.items(), key=lambda x: x[1]["priority"])
         memory_used = 0
         memory_limit = 14.5 * 1024  # 14.5GB limit (1.5GB buffer for inference)
+        # Model memory estimates (MB) - UPDATED FOR FOOD-101 SPECIALISTS
         model_sizes = {
+            "food101_siglip_2025": 400,        "food101_deit_2024": 350,
+            "food101_vit_base": 344,          "food101_swin": 348,
+            "food101_baseline": 500,          "food_categories_enhanced": 300
         }
         for model_key, model_config in sorted_models:
                     model = model.to(self.device)
                 model.eval()
+                # FOOD-101 SPECIFIC COMPILATION
+                if hasattr(torch, 'compile') and self.device == "cuda" and "food101" in model_key:
                     try:
                         model = torch.compile(model, mode="reduce-overhead", dynamic=True)
+                        logger.info(f"⚡ FOOD-101 {model_key} compiled with memory optimization")
                     except Exception as e:
                         logger.info(f"⚠️ Compilation failed for {model_key}: {e}")
         if not all_predictions:
             raise RuntimeError("No models produced valid predictions")
+        # ULTRA-CONSERVATIVE FILTERING - Only remove obvious non-food for Food-101 specialists
         non_food_items = {
+            # Minimal filtering since Food-101 models are trained on food only
             'person', 'people', 'human', 'man', 'woman', 'child',
+            'car', 'truck', 'vehicle', 'building', 'house',
             'computer', 'phone', 'laptop', 'tablet', 'television', 'tv',
             'book', 'paper', 'pen', 'pencil', 'chair', 'table', 'sofa',
+            'cat', 'dog', 'bird' # live animals only (removed 'fish' since it can be food)
         }
         # Generic FOOD terms that should be deprioritized (but not removed)
             'french fries', 'fries', 'sweet potato fries', 'onion rings',
             'hot dog', 'corn dog', 'bratwurst', 'sausage', 'kielbasa',
+            # Balkanska jela (sa alternativnim imenima) - ENHANCED for ćevapi detection
+            'cevapi', 'cevapcici', 'ćevapi', 'ćevapčići', 'chevapi', 'chevapchichi',
+            'burek', 'börek', 'pljeskavica', 'sarma', 'klepe', 'dolma', 'kajmak', 'ajvar',
+            'kofte', 'raznjici', 'grilled meat', 'balkan sausage',
             'prebranac', 'pasulj', 'grah', 'punjena paprika', 'punjene paprike',
             'stuffed peppers', 'musaka', 'moussaka', 'japrak', 'bamija', 'okra',
             'bosanski lonac', 'begova corba', 'tarhana', 'zeljanica', 'spinach pie',