har1zarD commited on
Commit
c826c48
·
1 Parent(s): 8c64cb8
Files changed (1) hide show
  1. app.py +66 -113
app.py CHANGED
@@ -59,116 +59,63 @@ openai_client = None # Will be initialized in lifespan startup
59
 
60
  # ==================== MULTI-MODEL FOOD RECOGNITION ====================
61
  FOOD_MODELS = {
62
- # NOISYVIT 2025 STATE-OF-THE-ART FOOD RECOGNITION SYSTEM
63
 
64
- # NOISYVIT 2025 FLAGSHIP MODELS (Highest Priority)
65
- "noisyvit_2025_huge": {
66
- "model_name": "google/vit-huge-patch14-224-in21k",
67
- "type": "noisyvit_transformer_huge",
68
- "classes": 21000,
69
  "priority": 1,
70
- "description": "NoisyViT 2025 Huge (~2.5GB) - Ultimate robust food recognition with noise resilience"
71
  },
72
- "noisyvit_2025_large": {
73
- "model_name": "google/vit-large-patch16-224-in21k",
74
- "type": "noisyvit_transformer_large",
75
- "classes": 21000,
76
  "priority": 2,
77
- "description": "NoisyViT 2025 Large (~1.3GB) - Advanced robustness for complex multi-object scenes"
78
- },
79
- "noisyvit_2025_base_384": {
80
- "model_name": "google/vit-base-patch16-384",
81
- "type": "noisyvit_transformer_base",
82
- "classes": 1000,
83
- "priority": 3,
84
- "description": "NoisyViT 2025 Base 384px (~1.8GB) - High-resolution food detail detection"
85
  },
86
-
87
- # FOOD-101 SPECIALIZED ViT ENSEMBLE
88
- "food101_vit_specialist": {
89
- "model_name": "nateraw/food",
90
  "type": "food_specialist_vit",
91
  "classes": 101,
 
 
 
 
 
 
 
92
  "priority": 4,
93
- "description": "Food-101 ViT Specialist (~500MB) - Trained on 101 specific food categories"
94
  },
95
- "food_enhanced_classifier": {
96
- "model_name": "Kaludi/food-category-classification-v2.0",
97
- "type": "food_specialist_enhanced",
98
- "classes": 12,
99
  "priority": 5,
100
- "description": "Enhanced Food Classifier (~300MB) - Multi-category detection with ViT backbone"
101
  },
102
 
103
- # MULTI-OBJECT FOOD SCENE DETECTION
104
- "multi_object_vit": {
105
- "model_name": "microsoft/swin-large-patch4-window7-224",
106
- "type": "swin_transformer_multi_object",
107
- "classes": 1000,
108
  "priority": 6,
109
- "description": "Swin Large (~800MB) - Excellent for complex scenes with multiple food items"
110
- },
111
- "scene_understanding_vit": {
112
- "model_name": "microsoft/beit-large-patch16-224",
113
- "type": "beit_transformer_scene",
114
- "classes": 1000,
115
- "priority": 7,
116
- "description": "BEiT Large (~1.1GB) - Advanced scene understanding for mixed dishes"
117
- },
118
-
119
- # VISION-LANGUAGE MODELS FOR COMPLEX DESCRIPTIONS
120
- "food_clip_huge": {
121
- "model_name": "laion/CLIP-ViT-H-14-laion2B-s32B-b79K",
122
- "type": "clip_huge_food",
123
- "classes": 1000,
124
- "priority": 8,
125
- "description": "LAION CLIP Huge (~3.5GB) - Vision-language understanding for complex food descriptions"
126
- },
127
- "openai_clip_large": {
128
- "model_name": "openai/clip-vit-large-patch14",
129
- "type": "clip_large_food",
130
- "classes": 1000,
131
- "priority": 9,
132
- "description": "OpenAI CLIP Large (~1.7GB) - Robust vision-language for food understanding"
133
- },
134
-
135
- # CUTTING-EDGE ARCHITECTURE MODELS
136
- "convnext_xxlarge": {
137
- "model_name": "laion/CLIP-convnext_xxlarge-laion2B-s34B-b82K-augreg-soup",
138
- "type": "convnext_xxlarge_food",
139
- "classes": 1000,
140
- "priority": 10,
141
- "description": "ConvNeXt XXLarge (~2.8GB) - Massive CNN for detailed food feature extraction"
142
- },
143
- "efficientnet_ultra": {
144
- "model_name": "timm/tf_efficientnetv2_l_in21k",
145
- "type": "efficientnet_ultra_food",
146
- "classes": 21000,
147
- "priority": 11,
148
- "description": "EfficientNetV2 Large (~480MB) - Optimal efficiency for real-time food detection"
149
- },
150
-
151
- # MEMORY-OPTIMIZED BACKUP MODELS
152
- "resnet_deep_food": {
153
- "model_name": "microsoft/resnet-152",
154
- "type": "resnet_deep_food",
155
- "classes": 1000,
156
- "priority": 12,
157
- "description": "ResNet-152 (~240MB) - Memory-efficient deep baseline for food recognition"
158
  }
159
 
160
- # NOISYVIT 2025 ENSEMBLE TOTAL:
161
- # NoisyViT models: ~5.6GB (3 flagship models)
162
- # Food specialists: ~800MB
163
- # Multi-object models: ~1.9GB
164
- # CLIP vision-language: ~5.2GB
165
- # Cutting-edge architectures: ~3.3GB
166
- # TOTAL: ~16.8GB - Will use smart loading to stay under 16GB limit
167
- # 12 NOISYVIT-POWERED MODELS for ultimate food recognition accuracy
168
  }
169
 
170
- # Default primary model - NoisyViT 2025 Flagship
171
- PRIMARY_MODEL = "noisyvit_2025_huge"
172
 
173
  # CONFIDENCE THRESHOLDS - Realistic for ensemble models
174
  MIN_CONFIDENCE_THRESHOLD = 0.20 # 20% minimum confidence (ensemble should be confident)
@@ -233,14 +180,18 @@ SMART_FOOD_OVERRIDES = {
233
 
234
  # ADVANCED BALKAN FOOD DETECTION - Map to closest Food-101 categories
235
  BALKAN_TO_FOOD101_MAPPING = {
236
- # Balkan dish → Closest Food-101 equivalent
237
  "cevapi": "hot_dog", # Closest grilled meat in Food-101
238
  "cevapcici": "hot_dog", # Same as ćevapi
 
 
239
  "pljeskavica": "hamburger", # Burger-like grilled meat patty
240
  "burek": "pizza", # Closest baked dough dish
241
  "sarma": "dumplings", # Stuffed/wrapped food
242
  "kajmak": "cheese_plate", # Dairy product
243
  "ajvar": "hummus", # Vegetable spread
 
 
244
  "prebranac": "baked_beans", # Bean dish (if exists)
245
  "pasulj": "soup", # Bean soup
246
  "begova_corba": "soup" # Turkish soup
@@ -294,6 +245,10 @@ COMPREHENSIVE_FOOD_CATEGORIES = {
294
  "chocolate_chip_pancakes", "banana_pancakes", "protein_pancakes", "sourdough_pancakes",
295
  "waffles", "belgian_waffles", "waffle", "french_toast", "toast", "bagel", "croissant",
296
  "muffin", "english_muffin", "danish_pastry", "cinnamon_roll", "oatmeal", "cereal",
 
 
 
 
297
  "scrambled_eggs", "fried_eggs", "eggs_benedict", "omelet", "breakfast_burrito",
298
 
299
  # FOOD-101 CATEGORIES (Proven dataset)
@@ -1024,22 +979,19 @@ class MultiModelFoodRecognizer:
1024
  self._warm_up()
1025
 
1026
  def _initialize_models(self):
1027
- """Initialize NoisyViT 2025 ensemble with 16GB memory optimization."""
1028
- logger.info("🎯 Initializing NOISYVIT 2025 food recognition system with memory optimization...")
1029
 
1030
  # MEMORY-AWARE LOADING: Priority-based loading with RAM monitoring
1031
  sorted_models = sorted(FOOD_MODELS.items(), key=lambda x: x[1]["priority"])
1032
  memory_used = 0
1033
  memory_limit = 14.5 * 1024 # 14.5GB limit (1.5GB buffer for inference)
1034
 
1035
- # Model memory estimates (MB)
1036
  model_sizes = {
1037
- "noisyvit_2025_huge": 2500, "noisyvit_2025_large": 1300,
1038
- "noisyvit_2025_base_384": 1800, "food101_vit_specialist": 500,
1039
- "food_enhanced_classifier": 300, "multi_object_vit": 800,
1040
- "scene_understanding_vit": 1100, "food_clip_huge": 3500,
1041
- "openai_clip_large": 1700, "convnext_xxlarge": 2800,
1042
- "efficientnet_ultra": 480, "resnet_deep_food": 240
1043
  }
1044
 
1045
  for model_key, model_config in sorted_models:
@@ -1075,11 +1027,11 @@ class MultiModelFoodRecognizer:
1075
  model = model.to(self.device)
1076
  model.eval()
1077
 
1078
- # NOISYVIT-SPECIFIC COMPILATION
1079
- if hasattr(torch, 'compile') and self.device == "cuda" and "noisyvit" in model_key:
1080
  try:
1081
  model = torch.compile(model, mode="reduce-overhead", dynamic=True)
1082
- logger.info(f"⚡ NOISYVIT {model_key} compiled with memory optimization")
1083
  except Exception as e:
1084
  logger.info(f"⚠️ Compilation failed for {model_key}: {e}")
1085
 
@@ -1299,14 +1251,14 @@ class MultiModelFoodRecognizer:
1299
  if not all_predictions:
1300
  raise RuntimeError("No models produced valid predictions")
1301
 
1302
- # CONSERVATIVE FILTERING - Only remove obvious non-food items
1303
  non_food_items = {
1304
- # Only obvious garbage and non-food items
1305
  'person', 'people', 'human', 'man', 'woman', 'child',
1306
- 'car', 'truck', 'vehicle', 'building', 'house', 'tree', 'plant',
1307
  'computer', 'phone', 'laptop', 'tablet', 'television', 'tv',
1308
  'book', 'paper', 'pen', 'pencil', 'chair', 'table', 'sofa',
1309
- 'cat', 'dog', 'bird', 'fish' # live animals only
1310
  }
1311
 
1312
  # Generic FOOD terms that should be deprioritized (but not removed)
@@ -1337,9 +1289,10 @@ class MultiModelFoodRecognizer:
1337
  'french fries', 'fries', 'sweet potato fries', 'onion rings',
1338
  'hot dog', 'corn dog', 'bratwurst', 'sausage', 'kielbasa',
1339
 
1340
- # Balkanska jela (sa alternativnim imenima)
1341
- 'cevapi', 'cevapcici', 'ćevapi', 'ćevapčići', 'burek', 'börek',
1342
- 'pljeskavica', 'sarma', 'klepe', 'dolma', 'kajmak', 'ajvar',
 
1343
  'prebranac', 'pasulj', 'grah', 'punjena paprika', 'punjene paprike',
1344
  'stuffed peppers', 'musaka', 'moussaka', 'japrak', 'bamija', 'okra',
1345
  'bosanski lonac', 'begova corba', 'tarhana', 'zeljanica', 'spinach pie',
 
59
 
60
  # ==================== MULTI-MODEL FOOD RECOGNITION ====================
61
  FOOD_MODELS = {
62
+ # ONLY REAL FOOD-101 SPECIALIST MODELS - NO GENERIC VISION MODELS!
63
 
64
+ # BEST FOOD-101 TRAINED MODELS (All have pancakes, hot_dog, hamburger, fish_and_chips etc.)
65
+ "food101_siglip_2025": {
66
+ "model_name": "prithivMLmods/Food-101-93M",
67
+ "type": "food_specialist_siglip",
68
+ "classes": 101,
69
  "priority": 1,
70
+ "description": "Food-101 SiglipV2 93M (~400MB) - 2025 state-of-the-art food classifier with pancakes"
71
  },
72
+ "food101_deit_2024": {
73
+ "model_name": "AventIQ-AI/Food-Classification-AI-Model",
74
+ "type": "food_specialist_deit",
75
+ "classes": 101,
76
  "priority": 2,
77
+ "description": "Food-101 DeiT 97% accuracy (~350MB) - High-performance food classifier"
 
 
 
 
 
 
 
78
  },
79
+ "food101_vit_base": {
80
+ "model_name": "eslamxm/vit-base-food101",
 
 
81
  "type": "food_specialist_vit",
82
  "classes": 101,
83
+ "priority": 3,
84
+ "description": "Food-101 ViT-base (~344MB) - Vision transformer food classification"
85
+ },
86
+ "food101_swin": {
87
+ "model_name": "aspis/swin-finetuned-food101",
88
+ "type": "food_specialist_swin",
89
+ "classes": 101,
90
  "priority": 4,
91
+ "description": "Food-101 Swin transformer (~348MB) - Advanced food classification"
92
  },
93
+ "food101_baseline": {
94
+ "model_name": "nateraw/food",
95
+ "type": "food_specialist_baseline",
96
+ "classes": 101,
97
  "priority": 5,
98
+ "description": "Food-101 Baseline (~500MB) - Proven food classification (includes pancakes, hot_dog)"
99
  },
100
 
101
+ # ADDITIONAL SPECIALIZED FOOD MODELS (if available)
102
+ "food_categories_enhanced": {
103
+ "model_name": "Kaludi/food-category-classification-v2.0",
104
+ "type": "food_categories_specialist",
105
+ "classes": 12,
106
  "priority": 6,
107
+ "description": "Food Categories v2.0 (~300MB) - Enhanced 12-category food classification"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  }
109
 
110
+ # FOOD-101 SPECIALISTS TOTAL:
111
+ # Primary Food-101 models: ~1.74GB (5 models with 101 specific dishes each)
112
+ # Enhanced categories: ~300MB
113
+ # TOTAL: ~2.04GB - Extremely efficient, focused only on food!
114
+ # 6 FOOD-SPECIALIST MODELS trained specifically on food datasets
 
 
 
115
  }
116
 
117
+ # Default primary model - Best Food-101 Specialist
118
+ PRIMARY_MODEL = "food101_siglip_2025"
119
 
120
  # CONFIDENCE THRESHOLDS - Realistic for ensemble models
121
  MIN_CONFIDENCE_THRESHOLD = 0.20 # 20% minimum confidence (ensemble should be confident)
 
180
 
181
  # ADVANCED BALKAN FOOD DETECTION - Map to closest Food-101 categories
182
  BALKAN_TO_FOOD101_MAPPING = {
183
+ # Balkan dish → Closest Food-101 equivalent (ENHANCED for better recognition)
184
  "cevapi": "hot_dog", # Closest grilled meat in Food-101
185
  "cevapcici": "hot_dog", # Same as ćevapi
186
+ "chevapi": "hot_dog", # Alternative spelling
187
+ "chevapchichi": "hot_dog", # Alternative spelling
188
  "pljeskavica": "hamburger", # Burger-like grilled meat patty
189
  "burek": "pizza", # Closest baked dough dish
190
  "sarma": "dumplings", # Stuffed/wrapped food
191
  "kajmak": "cheese_plate", # Dairy product
192
  "ajvar": "hummus", # Vegetable spread
193
+ "raznjici": "hot_dog", # Similar grilled meat
194
+ "kofte": "hot_dog", # Similar grilled meat
195
  "prebranac": "baked_beans", # Bean dish (if exists)
196
  "pasulj": "soup", # Bean soup
197
  "begova_corba": "soup" # Turkish soup
 
245
  "chocolate_chip_pancakes", "banana_pancakes", "protein_pancakes", "sourdough_pancakes",
246
  "waffles", "belgian_waffles", "waffle", "french_toast", "toast", "bagel", "croissant",
247
  "muffin", "english_muffin", "danish_pastry", "cinnamon_roll", "oatmeal", "cereal",
248
+
249
+ # BALKAN FOODS (Critical for ćevapi!)
250
+ "cevapi", "cevapcici", "chevapi", "chevapchichi", "kebab", "kofte", "pljeskavica",
251
+ "burek", "kajmak", "ajvar", "lepinja", "somun", "raznjici", "hot_dog",
252
  "scrambled_eggs", "fried_eggs", "eggs_benedict", "omelet", "breakfast_burrito",
253
 
254
  # FOOD-101 CATEGORIES (Proven dataset)
 
979
  self._warm_up()
980
 
981
  def _initialize_models(self):
982
+ """Initialize Food-101 specialist ensemble with memory optimization."""
983
+ logger.info("🎯 Initializing FOOD-101 SPECIALIST food recognition system with memory optimization...")
984
 
985
  # MEMORY-AWARE LOADING: Priority-based loading with RAM monitoring
986
  sorted_models = sorted(FOOD_MODELS.items(), key=lambda x: x[1]["priority"])
987
  memory_used = 0
988
  memory_limit = 14.5 * 1024 # 14.5GB limit (1.5GB buffer for inference)
989
 
990
+ # Model memory estimates (MB) - UPDATED FOR FOOD-101 SPECIALISTS
991
  model_sizes = {
992
+ "food101_siglip_2025": 400, "food101_deit_2024": 350,
993
+ "food101_vit_base": 344, "food101_swin": 348,
994
+ "food101_baseline": 500, "food_categories_enhanced": 300
 
 
 
995
  }
996
 
997
  for model_key, model_config in sorted_models:
 
1027
  model = model.to(self.device)
1028
  model.eval()
1029
 
1030
+ # FOOD-101 SPECIFIC COMPILATION
1031
+ if hasattr(torch, 'compile') and self.device == "cuda" and "food101" in model_key:
1032
  try:
1033
  model = torch.compile(model, mode="reduce-overhead", dynamic=True)
1034
+ logger.info(f"⚡ FOOD-101 {model_key} compiled with memory optimization")
1035
  except Exception as e:
1036
  logger.info(f"⚠️ Compilation failed for {model_key}: {e}")
1037
 
 
1251
  if not all_predictions:
1252
  raise RuntimeError("No models produced valid predictions")
1253
 
1254
+ # ULTRA-CONSERVATIVE FILTERING - Only remove obvious non-food for Food-101 specialists
1255
  non_food_items = {
1256
+ # Minimal filtering since Food-101 models are trained on food only
1257
  'person', 'people', 'human', 'man', 'woman', 'child',
1258
+ 'car', 'truck', 'vehicle', 'building', 'house',
1259
  'computer', 'phone', 'laptop', 'tablet', 'television', 'tv',
1260
  'book', 'paper', 'pen', 'pencil', 'chair', 'table', 'sofa',
1261
+ 'cat', 'dog', 'bird' # live animals only (removed 'fish' since it can be food)
1262
  }
1263
 
1264
  # Generic FOOD terms that should be deprioritized (but not removed)
 
1289
  'french fries', 'fries', 'sweet potato fries', 'onion rings',
1290
  'hot dog', 'corn dog', 'bratwurst', 'sausage', 'kielbasa',
1291
 
1292
+ # Balkanska jela (sa alternativnim imenima) - ENHANCED for ćevapi detection
1293
+ 'cevapi', 'cevapcici', 'ćevapi', 'ćevapčići', 'chevapi', 'chevapchichi',
1294
+ 'burek', 'börek', 'pljeskavica', 'sarma', 'klepe', 'dolma', 'kajmak', 'ajvar',
1295
+ 'kofte', 'raznjici', 'grilled meat', 'balkan sausage',
1296
  'prebranac', 'pasulj', 'grah', 'punjena paprika', 'punjene paprike',
1297
  'stuffed peppers', 'musaka', 'moussaka', 'japrak', 'bamija', 'okra',
1298
  'bosanski lonac', 'begova corba', 'tarhana', 'zeljanica', 'spinach pie',