### 1. Imports and class names setup ### import gradio as gr import os import torch from model import create_effnetb4_model from timeit import default_timer as timer from typing import Tuple, Dict # Setup class names with open("class_names.txt", "r") as f: class_names = [name.strip() for name in f.readlines()] ### 2. Model and transforms preparation ### # Create EffNetB4 model effnetb4, effnetb4_transforms = create_effnetb4_model( num_classes=101, ) # Load saved weights effnetb4.load_state_dict( torch.load( f="model_weights.pth", map_location=torch.device("cpu"), ) ) ### 3. Predict function ### def predict(img) -> Tuple[Dict, float]: """Transforms and performs a prediction on img and returns prediction and time taken.""" # Start the timer start_time = timer() # Transform the target image and add a batch dimension img = effnetb4_transforms(img).unsqueeze(0) # Put model into evaluation mode and turn on inference mode effnetb4.eval() with torch.inference_mode(): # Pass the transformed image through the model and turn the prediction logits into prediction probabilities pred_probs = torch.softmax(effnetb4(img), dim=1) # Create a prediction label and prediction probability dictionary for each prediction class pred_labels_and_probs = {class_names[i]: float(pred_probs[0][i]) for i in range(len(class_names))} # Calculate the prediction time pred_time = round(timer() - start_time, 5) # Return the prediction dictionary and prediction time return pred_labels_and_probs, pred_time ### 4. Gradio app ### # Create title, description and article strings title = "FoodVision - EfficientNet-B4" description = """ ### AI-Powered Food Classification System Upload an image of food and the model will classify it into one of 101 food categories from the Food-101 dataset. **Model Performance:** - **Test Accuracy**: 82.2% - **Top-5 Accuracy**: 93.8% - **Inference Time**: ~0.3-0.5 seconds on CPU **Technical Specifications:** - **Architecture**: EfficientNet-B4 (Transfer Learning) - **Model Size**: ~74.5 MB (19M parameters) - **Training Dataset**: Food-101 (101,000 images across 101 classes) - **Framework**: PyTorch 2.2.2 - **Input Size**: 380×380 pixels - **Feature Extraction**: Pretrained on ImageNet """ article = """ --- ### About This Project This deep learning model was developed using **Transfer Learning** with EfficientNet-B4 as the base architecture. The model was fine-tuned on the Food-101 dataset to classify food images into 101 different categories. **Key Highlights:** - Achieved competitive accuracy through careful hyperparameter tuning - Implemented data augmentation for robust performance - Optimized for inference speed while maintaining accuracy - Deployed on Hugging Face Spaces for easy accessibility **Model Training Details:** - Optimizer: Adam - Loss Function: CrossEntropyLoss - Training Approach: Fine-tuning with frozen feature extractor initially, then unfrozen for final epochs - Validation: 20% split from training data ### Connect With Me **Rohan Jain** | [LinkedIn](https://www.linkedin.com/in/jaroh23/) | [GitHub](https://github.com/rohanjain2312) *Built with PyTorch, Gradio, and deployed on Hugging Face Spaces* """ # Create examples list from "examples/" directory example_list = [["examples/" + example] for example in os.listdir("examples")] # Create the Gradio demo demo = gr.Interface( fn=predict, inputs=gr.Image(type="pil", label="Upload Food Image"), outputs=[ gr.Label(num_top_classes=5, label="Top 5 Predictions"), gr.Number(label="Prediction time (s)") ], examples=example_list, title=title, description=description, article=article, theme="default", allow_flagging="never" ) # Launch the demo if __name__ == "__main__": demo.launch()