Spaces:
Sleeping
Sleeping
har1zarD
commited on
Commit
·
c826c48
1
Parent(s):
8c64cb8
ap
Browse files
app.py
CHANGED
|
@@ -59,116 +59,63 @@ openai_client = None # Will be initialized in lifespan startup
|
|
| 59 |
|
| 60 |
# ==================== MULTI-MODEL FOOD RECOGNITION ====================
|
| 61 |
FOOD_MODELS = {
|
| 62 |
-
#
|
| 63 |
|
| 64 |
-
#
|
| 65 |
-
"
|
| 66 |
-
"model_name": "
|
| 67 |
-
"type": "
|
| 68 |
-
"classes":
|
| 69 |
"priority": 1,
|
| 70 |
-
"description": "
|
| 71 |
},
|
| 72 |
-
"
|
| 73 |
-
"model_name": "
|
| 74 |
-
"type": "
|
| 75 |
-
"classes":
|
| 76 |
"priority": 2,
|
| 77 |
-
"description": "
|
| 78 |
-
},
|
| 79 |
-
"noisyvit_2025_base_384": {
|
| 80 |
-
"model_name": "google/vit-base-patch16-384",
|
| 81 |
-
"type": "noisyvit_transformer_base",
|
| 82 |
-
"classes": 1000,
|
| 83 |
-
"priority": 3,
|
| 84 |
-
"description": "NoisyViT 2025 Base 384px (~1.8GB) - High-resolution food detail detection"
|
| 85 |
},
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
"food101_vit_specialist": {
|
| 89 |
-
"model_name": "nateraw/food",
|
| 90 |
"type": "food_specialist_vit",
|
| 91 |
"classes": 101,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
"priority": 4,
|
| 93 |
-
"description": "Food-101
|
| 94 |
},
|
| 95 |
-
"
|
| 96 |
-
"model_name": "
|
| 97 |
-
"type": "
|
| 98 |
-
"classes":
|
| 99 |
"priority": 5,
|
| 100 |
-
"description": "
|
| 101 |
},
|
| 102 |
|
| 103 |
-
#
|
| 104 |
-
"
|
| 105 |
-
"model_name": "
|
| 106 |
-
"type": "
|
| 107 |
-
"classes":
|
| 108 |
"priority": 6,
|
| 109 |
-
"description": "
|
| 110 |
-
},
|
| 111 |
-
"scene_understanding_vit": {
|
| 112 |
-
"model_name": "microsoft/beit-large-patch16-224",
|
| 113 |
-
"type": "beit_transformer_scene",
|
| 114 |
-
"classes": 1000,
|
| 115 |
-
"priority": 7,
|
| 116 |
-
"description": "BEiT Large (~1.1GB) - Advanced scene understanding for mixed dishes"
|
| 117 |
-
},
|
| 118 |
-
|
| 119 |
-
# VISION-LANGUAGE MODELS FOR COMPLEX DESCRIPTIONS
|
| 120 |
-
"food_clip_huge": {
|
| 121 |
-
"model_name": "laion/CLIP-ViT-H-14-laion2B-s32B-b79K",
|
| 122 |
-
"type": "clip_huge_food",
|
| 123 |
-
"classes": 1000,
|
| 124 |
-
"priority": 8,
|
| 125 |
-
"description": "LAION CLIP Huge (~3.5GB) - Vision-language understanding for complex food descriptions"
|
| 126 |
-
},
|
| 127 |
-
"openai_clip_large": {
|
| 128 |
-
"model_name": "openai/clip-vit-large-patch14",
|
| 129 |
-
"type": "clip_large_food",
|
| 130 |
-
"classes": 1000,
|
| 131 |
-
"priority": 9,
|
| 132 |
-
"description": "OpenAI CLIP Large (~1.7GB) - Robust vision-language for food understanding"
|
| 133 |
-
},
|
| 134 |
-
|
| 135 |
-
# CUTTING-EDGE ARCHITECTURE MODELS
|
| 136 |
-
"convnext_xxlarge": {
|
| 137 |
-
"model_name": "laion/CLIP-convnext_xxlarge-laion2B-s34B-b82K-augreg-soup",
|
| 138 |
-
"type": "convnext_xxlarge_food",
|
| 139 |
-
"classes": 1000,
|
| 140 |
-
"priority": 10,
|
| 141 |
-
"description": "ConvNeXt XXLarge (~2.8GB) - Massive CNN for detailed food feature extraction"
|
| 142 |
-
},
|
| 143 |
-
"efficientnet_ultra": {
|
| 144 |
-
"model_name": "timm/tf_efficientnetv2_l_in21k",
|
| 145 |
-
"type": "efficientnet_ultra_food",
|
| 146 |
-
"classes": 21000,
|
| 147 |
-
"priority": 11,
|
| 148 |
-
"description": "EfficientNetV2 Large (~480MB) - Optimal efficiency for real-time food detection"
|
| 149 |
-
},
|
| 150 |
-
|
| 151 |
-
# MEMORY-OPTIMIZED BACKUP MODELS
|
| 152 |
-
"resnet_deep_food": {
|
| 153 |
-
"model_name": "microsoft/resnet-152",
|
| 154 |
-
"type": "resnet_deep_food",
|
| 155 |
-
"classes": 1000,
|
| 156 |
-
"priority": 12,
|
| 157 |
-
"description": "ResNet-152 (~240MB) - Memory-efficient deep baseline for food recognition"
|
| 158 |
}
|
| 159 |
|
| 160 |
-
#
|
| 161 |
-
#
|
| 162 |
-
#
|
| 163 |
-
#
|
| 164 |
-
#
|
| 165 |
-
# Cutting-edge architectures: ~3.3GB
|
| 166 |
-
# TOTAL: ~16.8GB - Will use smart loading to stay under 16GB limit
|
| 167 |
-
# 12 NOISYVIT-POWERED MODELS for ultimate food recognition accuracy
|
| 168 |
}
|
| 169 |
|
| 170 |
-
# Default primary model -
|
| 171 |
-
PRIMARY_MODEL = "
|
| 172 |
|
| 173 |
# CONFIDENCE THRESHOLDS - Realistic for ensemble models
|
| 174 |
MIN_CONFIDENCE_THRESHOLD = 0.20 # 20% minimum confidence (ensemble should be confident)
|
|
@@ -233,14 +180,18 @@ SMART_FOOD_OVERRIDES = {
|
|
| 233 |
|
| 234 |
# ADVANCED BALKAN FOOD DETECTION - Map to closest Food-101 categories
|
| 235 |
BALKAN_TO_FOOD101_MAPPING = {
|
| 236 |
-
# Balkan dish → Closest Food-101 equivalent
|
| 237 |
"cevapi": "hot_dog", # Closest grilled meat in Food-101
|
| 238 |
"cevapcici": "hot_dog", # Same as ćevapi
|
|
|
|
|
|
|
| 239 |
"pljeskavica": "hamburger", # Burger-like grilled meat patty
|
| 240 |
"burek": "pizza", # Closest baked dough dish
|
| 241 |
"sarma": "dumplings", # Stuffed/wrapped food
|
| 242 |
"kajmak": "cheese_plate", # Dairy product
|
| 243 |
"ajvar": "hummus", # Vegetable spread
|
|
|
|
|
|
|
| 244 |
"prebranac": "baked_beans", # Bean dish (if exists)
|
| 245 |
"pasulj": "soup", # Bean soup
|
| 246 |
"begova_corba": "soup" # Turkish soup
|
|
@@ -294,6 +245,10 @@ COMPREHENSIVE_FOOD_CATEGORIES = {
|
|
| 294 |
"chocolate_chip_pancakes", "banana_pancakes", "protein_pancakes", "sourdough_pancakes",
|
| 295 |
"waffles", "belgian_waffles", "waffle", "french_toast", "toast", "bagel", "croissant",
|
| 296 |
"muffin", "english_muffin", "danish_pastry", "cinnamon_roll", "oatmeal", "cereal",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 297 |
"scrambled_eggs", "fried_eggs", "eggs_benedict", "omelet", "breakfast_burrito",
|
| 298 |
|
| 299 |
# FOOD-101 CATEGORIES (Proven dataset)
|
|
@@ -1024,22 +979,19 @@ class MultiModelFoodRecognizer:
|
|
| 1024 |
self._warm_up()
|
| 1025 |
|
| 1026 |
def _initialize_models(self):
|
| 1027 |
-
"""Initialize
|
| 1028 |
-
logger.info("🎯 Initializing
|
| 1029 |
|
| 1030 |
# MEMORY-AWARE LOADING: Priority-based loading with RAM monitoring
|
| 1031 |
sorted_models = sorted(FOOD_MODELS.items(), key=lambda x: x[1]["priority"])
|
| 1032 |
memory_used = 0
|
| 1033 |
memory_limit = 14.5 * 1024 # 14.5GB limit (1.5GB buffer for inference)
|
| 1034 |
|
| 1035 |
-
# Model memory estimates (MB)
|
| 1036 |
model_sizes = {
|
| 1037 |
-
"
|
| 1038 |
-
"
|
| 1039 |
-
"
|
| 1040 |
-
"scene_understanding_vit": 1100, "food_clip_huge": 3500,
|
| 1041 |
-
"openai_clip_large": 1700, "convnext_xxlarge": 2800,
|
| 1042 |
-
"efficientnet_ultra": 480, "resnet_deep_food": 240
|
| 1043 |
}
|
| 1044 |
|
| 1045 |
for model_key, model_config in sorted_models:
|
|
@@ -1075,11 +1027,11 @@ class MultiModelFoodRecognizer:
|
|
| 1075 |
model = model.to(self.device)
|
| 1076 |
model.eval()
|
| 1077 |
|
| 1078 |
-
#
|
| 1079 |
-
if hasattr(torch, 'compile') and self.device == "cuda" and "
|
| 1080 |
try:
|
| 1081 |
model = torch.compile(model, mode="reduce-overhead", dynamic=True)
|
| 1082 |
-
logger.info(f"⚡
|
| 1083 |
except Exception as e:
|
| 1084 |
logger.info(f"⚠️ Compilation failed for {model_key}: {e}")
|
| 1085 |
|
|
@@ -1299,14 +1251,14 @@ class MultiModelFoodRecognizer:
|
|
| 1299 |
if not all_predictions:
|
| 1300 |
raise RuntimeError("No models produced valid predictions")
|
| 1301 |
|
| 1302 |
-
# CONSERVATIVE FILTERING - Only remove obvious non-food
|
| 1303 |
non_food_items = {
|
| 1304 |
-
#
|
| 1305 |
'person', 'people', 'human', 'man', 'woman', 'child',
|
| 1306 |
-
'car', 'truck', 'vehicle', 'building', 'house',
|
| 1307 |
'computer', 'phone', 'laptop', 'tablet', 'television', 'tv',
|
| 1308 |
'book', 'paper', 'pen', 'pencil', 'chair', 'table', 'sofa',
|
| 1309 |
-
'cat', 'dog', 'bird'
|
| 1310 |
}
|
| 1311 |
|
| 1312 |
# Generic FOOD terms that should be deprioritized (but not removed)
|
|
@@ -1337,9 +1289,10 @@ class MultiModelFoodRecognizer:
|
|
| 1337 |
'french fries', 'fries', 'sweet potato fries', 'onion rings',
|
| 1338 |
'hot dog', 'corn dog', 'bratwurst', 'sausage', 'kielbasa',
|
| 1339 |
|
| 1340 |
-
# Balkanska jela (sa alternativnim imenima)
|
| 1341 |
-
'cevapi', 'cevapcici', 'ćevapi', 'ćevapčići', '
|
| 1342 |
-
'pljeskavica', 'sarma', 'klepe', 'dolma', 'kajmak', 'ajvar',
|
|
|
|
| 1343 |
'prebranac', 'pasulj', 'grah', 'punjena paprika', 'punjene paprike',
|
| 1344 |
'stuffed peppers', 'musaka', 'moussaka', 'japrak', 'bamija', 'okra',
|
| 1345 |
'bosanski lonac', 'begova corba', 'tarhana', 'zeljanica', 'spinach pie',
|
|
|
|
| 59 |
|
| 60 |
# ==================== MULTI-MODEL FOOD RECOGNITION ====================
|
| 61 |
FOOD_MODELS = {
|
| 62 |
+
# ONLY REAL FOOD-101 SPECIALIST MODELS - NO GENERIC VISION MODELS!
|
| 63 |
|
| 64 |
+
# BEST FOOD-101 TRAINED MODELS (All have pancakes, hot_dog, hamburger, fish_and_chips etc.)
|
| 65 |
+
"food101_siglip_2025": {
|
| 66 |
+
"model_name": "prithivMLmods/Food-101-93M",
|
| 67 |
+
"type": "food_specialist_siglip",
|
| 68 |
+
"classes": 101,
|
| 69 |
"priority": 1,
|
| 70 |
+
"description": "Food-101 SiglipV2 93M (~400MB) - 2025 state-of-the-art food classifier with pancakes"
|
| 71 |
},
|
| 72 |
+
"food101_deit_2024": {
|
| 73 |
+
"model_name": "AventIQ-AI/Food-Classification-AI-Model",
|
| 74 |
+
"type": "food_specialist_deit",
|
| 75 |
+
"classes": 101,
|
| 76 |
"priority": 2,
|
| 77 |
+
"description": "Food-101 DeiT 97% accuracy (~350MB) - High-performance food classifier"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
},
|
| 79 |
+
"food101_vit_base": {
|
| 80 |
+
"model_name": "eslamxm/vit-base-food101",
|
|
|
|
|
|
|
| 81 |
"type": "food_specialist_vit",
|
| 82 |
"classes": 101,
|
| 83 |
+
"priority": 3,
|
| 84 |
+
"description": "Food-101 ViT-base (~344MB) - Vision transformer food classification"
|
| 85 |
+
},
|
| 86 |
+
"food101_swin": {
|
| 87 |
+
"model_name": "aspis/swin-finetuned-food101",
|
| 88 |
+
"type": "food_specialist_swin",
|
| 89 |
+
"classes": 101,
|
| 90 |
"priority": 4,
|
| 91 |
+
"description": "Food-101 Swin transformer (~348MB) - Advanced food classification"
|
| 92 |
},
|
| 93 |
+
"food101_baseline": {
|
| 94 |
+
"model_name": "nateraw/food",
|
| 95 |
+
"type": "food_specialist_baseline",
|
| 96 |
+
"classes": 101,
|
| 97 |
"priority": 5,
|
| 98 |
+
"description": "Food-101 Baseline (~500MB) - Proven food classification (includes pancakes, hot_dog)"
|
| 99 |
},
|
| 100 |
|
| 101 |
+
# ADDITIONAL SPECIALIZED FOOD MODELS (if available)
|
| 102 |
+
"food_categories_enhanced": {
|
| 103 |
+
"model_name": "Kaludi/food-category-classification-v2.0",
|
| 104 |
+
"type": "food_categories_specialist",
|
| 105 |
+
"classes": 12,
|
| 106 |
"priority": 6,
|
| 107 |
+
"description": "Food Categories v2.0 (~300MB) - Enhanced 12-category food classification"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 108 |
}
|
| 109 |
|
| 110 |
+
# FOOD-101 SPECIALISTS TOTAL:
|
| 111 |
+
# Primary Food-101 models: ~1.74GB (5 models with 101 specific dishes each)
|
| 112 |
+
# Enhanced categories: ~300MB
|
| 113 |
+
# TOTAL: ~2.04GB - Extremely efficient, focused only on food!
|
| 114 |
+
# 6 FOOD-SPECIALIST MODELS trained specifically on food datasets
|
|
|
|
|
|
|
|
|
|
| 115 |
}
|
| 116 |
|
| 117 |
+
# Default primary model - Best Food-101 Specialist
|
| 118 |
+
PRIMARY_MODEL = "food101_siglip_2025"
|
| 119 |
|
| 120 |
# CONFIDENCE THRESHOLDS - Realistic for ensemble models
|
| 121 |
MIN_CONFIDENCE_THRESHOLD = 0.20 # 20% minimum confidence (ensemble should be confident)
|
|
|
|
| 180 |
|
| 181 |
# ADVANCED BALKAN FOOD DETECTION - Map to closest Food-101 categories
|
| 182 |
BALKAN_TO_FOOD101_MAPPING = {
|
| 183 |
+
# Balkan dish → Closest Food-101 equivalent (ENHANCED for better recognition)
|
| 184 |
"cevapi": "hot_dog", # Closest grilled meat in Food-101
|
| 185 |
"cevapcici": "hot_dog", # Same as ćevapi
|
| 186 |
+
"chevapi": "hot_dog", # Alternative spelling
|
| 187 |
+
"chevapchichi": "hot_dog", # Alternative spelling
|
| 188 |
"pljeskavica": "hamburger", # Burger-like grilled meat patty
|
| 189 |
"burek": "pizza", # Closest baked dough dish
|
| 190 |
"sarma": "dumplings", # Stuffed/wrapped food
|
| 191 |
"kajmak": "cheese_plate", # Dairy product
|
| 192 |
"ajvar": "hummus", # Vegetable spread
|
| 193 |
+
"raznjici": "hot_dog", # Similar grilled meat
|
| 194 |
+
"kofte": "hot_dog", # Similar grilled meat
|
| 195 |
"prebranac": "baked_beans", # Bean dish (if exists)
|
| 196 |
"pasulj": "soup", # Bean soup
|
| 197 |
"begova_corba": "soup" # Turkish soup
|
|
|
|
| 245 |
"chocolate_chip_pancakes", "banana_pancakes", "protein_pancakes", "sourdough_pancakes",
|
| 246 |
"waffles", "belgian_waffles", "waffle", "french_toast", "toast", "bagel", "croissant",
|
| 247 |
"muffin", "english_muffin", "danish_pastry", "cinnamon_roll", "oatmeal", "cereal",
|
| 248 |
+
|
| 249 |
+
# BALKAN FOODS (Critical for ćevapi!)
|
| 250 |
+
"cevapi", "cevapcici", "chevapi", "chevapchichi", "kebab", "kofte", "pljeskavica",
|
| 251 |
+
"burek", "kajmak", "ajvar", "lepinja", "somun", "raznjici", "hot_dog",
|
| 252 |
"scrambled_eggs", "fried_eggs", "eggs_benedict", "omelet", "breakfast_burrito",
|
| 253 |
|
| 254 |
# FOOD-101 CATEGORIES (Proven dataset)
|
|
|
|
| 979 |
self._warm_up()
|
| 980 |
|
| 981 |
def _initialize_models(self):
|
| 982 |
+
"""Initialize Food-101 specialist ensemble with memory optimization."""
|
| 983 |
+
logger.info("🎯 Initializing FOOD-101 SPECIALIST food recognition system with memory optimization...")
|
| 984 |
|
| 985 |
# MEMORY-AWARE LOADING: Priority-based loading with RAM monitoring
|
| 986 |
sorted_models = sorted(FOOD_MODELS.items(), key=lambda x: x[1]["priority"])
|
| 987 |
memory_used = 0
|
| 988 |
memory_limit = 14.5 * 1024 # 14.5GB limit (1.5GB buffer for inference)
|
| 989 |
|
| 990 |
+
# Model memory estimates (MB) - UPDATED FOR FOOD-101 SPECIALISTS
|
| 991 |
model_sizes = {
|
| 992 |
+
"food101_siglip_2025": 400, "food101_deit_2024": 350,
|
| 993 |
+
"food101_vit_base": 344, "food101_swin": 348,
|
| 994 |
+
"food101_baseline": 500, "food_categories_enhanced": 300
|
|
|
|
|
|
|
|
|
|
| 995 |
}
|
| 996 |
|
| 997 |
for model_key, model_config in sorted_models:
|
|
|
|
| 1027 |
model = model.to(self.device)
|
| 1028 |
model.eval()
|
| 1029 |
|
| 1030 |
+
# FOOD-101 SPECIFIC COMPILATION
|
| 1031 |
+
if hasattr(torch, 'compile') and self.device == "cuda" and "food101" in model_key:
|
| 1032 |
try:
|
| 1033 |
model = torch.compile(model, mode="reduce-overhead", dynamic=True)
|
| 1034 |
+
logger.info(f"⚡ FOOD-101 {model_key} compiled with memory optimization")
|
| 1035 |
except Exception as e:
|
| 1036 |
logger.info(f"⚠️ Compilation failed for {model_key}: {e}")
|
| 1037 |
|
|
|
|
| 1251 |
if not all_predictions:
|
| 1252 |
raise RuntimeError("No models produced valid predictions")
|
| 1253 |
|
| 1254 |
+
# ULTRA-CONSERVATIVE FILTERING - Only remove obvious non-food for Food-101 specialists
|
| 1255 |
non_food_items = {
|
| 1256 |
+
# Minimal filtering since Food-101 models are trained on food only
|
| 1257 |
'person', 'people', 'human', 'man', 'woman', 'child',
|
| 1258 |
+
'car', 'truck', 'vehicle', 'building', 'house',
|
| 1259 |
'computer', 'phone', 'laptop', 'tablet', 'television', 'tv',
|
| 1260 |
'book', 'paper', 'pen', 'pencil', 'chair', 'table', 'sofa',
|
| 1261 |
+
'cat', 'dog', 'bird' # live animals only (removed 'fish' since it can be food)
|
| 1262 |
}
|
| 1263 |
|
| 1264 |
# Generic FOOD terms that should be deprioritized (but not removed)
|
|
|
|
| 1289 |
'french fries', 'fries', 'sweet potato fries', 'onion rings',
|
| 1290 |
'hot dog', 'corn dog', 'bratwurst', 'sausage', 'kielbasa',
|
| 1291 |
|
| 1292 |
+
# Balkanska jela (sa alternativnim imenima) - ENHANCED for ćevapi detection
|
| 1293 |
+
'cevapi', 'cevapcici', 'ćevapi', 'ćevapčići', 'chevapi', 'chevapchichi',
|
| 1294 |
+
'burek', 'börek', 'pljeskavica', 'sarma', 'klepe', 'dolma', 'kajmak', 'ajvar',
|
| 1295 |
+
'kofte', 'raznjici', 'grilled meat', 'balkan sausage',
|
| 1296 |
'prebranac', 'pasulj', 'grah', 'punjena paprika', 'punjene paprike',
|
| 1297 |
'stuffed peppers', 'musaka', 'moussaka', 'japrak', 'bamija', 'okra',
|
| 1298 |
'bosanski lonac', 'begova corba', 'tarhana', 'zeljanica', 'spinach pie',
|