Spaces:

JahnaviBhansali
/

MobileNetDemo

Sleeping

App Files Files Community

Jahnavibh commited on Jul 25

Commit

08dc0e6

1 Parent(s): 7b15f43

Add real-time Vela profiling integration with dynamic performance estimation from SR Vela Compiler

Browse files

Files changed (2) hide show

app.py +157 -31
requirements.txt +2 -1

app.py CHANGED Viewed

@@ -5,12 +5,18 @@ from io import BytesIO
 from PIL import Image
 import tensorflow as tf
 from huggingface_hub import hf_hub_download
 # Download the TFLite model and labels from your Hugging Face repository
 MODEL_REPO = "JahnaviBhansali/mobilenet-v2-ethos-u55"
 MODEL_FILE = "mobilenet_v2_1.0_224_INT8.tflite"  # Using original INT8 model for Gradio compatibility
 VELA_MODEL_FILE = "mobilenet_v2_1.0_224_INT8_vela.tflite"  # Vela-optimized model for Ethos-U55
 LABELS_FILE = "labelmappings.txt"
 print("Downloading model and labels from Hugging Face...")
 model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE)
@@ -35,6 +41,155 @@ print(f"Vela-optimized model also available: {VELA_MODEL_FILE}")
 # Force rebuild with modern design
 print(f"Repository: {MODEL_REPO}")
 def preprocess_image(image):
     """
     Preprocess image for MobileNetV2 INT8 quantized model.
@@ -346,37 +501,8 @@ with gr.Blocks(
                         example_food = gr.Button("Food", size="sm", elem_classes=["btn-example"])
         with gr.Column(scale=1):
-            gr.HTML("""
-            <div style='background:#1e1e2f;border-radius:18px;padding:18px 18px 12px 18px;
-            max-width:430px;min-width:320px;width:100%;margin:auto;color:#eee;font-family:sans-serif;'>
-                <h3 style='margin-top:0;margin-bottom:12px;font-size:1.35em;color:#00b0ff;text-align:left;'>Model Performance on SR110</h3>
-                <div style='display:flex;flex-wrap:wrap;gap:10px;justify-content:center;'>
-                    <!-- Card 1: Accelerator -->
-                    <div style='flex:1 1 170px;min-width:170px;max-width:180px;background:#23233a;border-radius:12px;padding:10px 10px 8px 10px;'>
-                        <div style='font-size:1em;font-weight:520;margin-bottom:6px;color:#00b0ff;'>🚀 Accelerator</div>
-                        <div style='margin-bottom:2px;'><span style='color:#ccc;'>Configuration:</span> <span style='color:#fff;font-weight:500'>Ethos_U55_128</span></div>
-                        <div><span style='color:#ccc;'>Accelerator clock:</span> <span style='color:#fff;font-weight:500'>400 MHz</span></div>
-                    </div>
-                    <!-- Card 2: Memory Usage -->
-                    <div style='flex:1 1 170px;min-width:170px;max-width:180px;background:#23233a;border-radius:12px;padding:10px 10px 8px 10px;'>
-                        <div style='font-size:1em;font-weight:520;margin-bottom:6px;color:#00b0ff;'>💾 Memory Usage</div>
-                        <div style='margin-bottom:2px;'><span style='color:#ccc;'>Total SRAM:</span> <span style='color:#fff;font-weight:500'>353.50 KiB</span></div>
-                        <div><span style='color:#ccc;'>Total On-chip Flash:</span> <span style='color:#fff;font-weight:500'>3614.39 KiB</span></div>
-                    </div>
-                    <!-- Card 3: Operator Distribution -->
-                    <div style='flex:1 1 170px;min-width:170px;max-width:180px;background:#23233a;border-radius:12px;padding:10px 10px 8px 10px;'>
-                        <div style='font-size:1em;font-weight:520;margin-bottom:6px;color:#00b0ff;'>📈 Operator Distribution</div>
-                        <div style='margin-bottom:2px;'><span style='color:#ccc;'>CPU Operators:</span> <span style='color:#fff;font-weight:500'>0 (0.0%)</span></div>
-                        <div><span style='color:#ccc;'>NPU Operators:</span> <span style='color:#fff;font-weight:500'>95 (100.0%)</span></div>
-                    </div>
-                    <!-- Card 4: Performance -->
-                    <div style='flex:1 1 170px;min-width:170px;max-width:180px;background:#23233a;border-radius:12px;padding:10px 10px 8px 10px;'>
-                        <div style='font-size:1em;font-weight:520;margin-bottom:6px;color:#00b0ff;'>⚡ Performance</div>
-                        <div><span style='color:#ccc;'>Inference time:</span> <span style='color:#fff;font-weight:500'>15.14 ms</span></div>
-                    </div>
-                </div>
-            </div>
-            """)
             with gr.Group(elem_classes=["card"]):
                 gr.HTML('<div class="card-header"><span style="color: white; font-weight: 600;">Classification Results</span></div>')

 from PIL import Image
 import tensorflow as tf
 from huggingface_hub import hf_hub_download
+import tempfile
+import shutil
+import os
+import subprocess
+import re
 # Download the TFLite model and labels from your Hugging Face repository
 MODEL_REPO = "JahnaviBhansali/mobilenet-v2-ethos-u55"
 MODEL_FILE = "mobilenet_v2_1.0_224_INT8.tflite"  # Using original INT8 model for Gradio compatibility
 VELA_MODEL_FILE = "mobilenet_v2_1.0_224_INT8_vela.tflite"  # Vela-optimized model for Ethos-U55
 LABELS_FILE = "labelmappings.txt"
+DEFAULT_CONFIG = "u55_eval_with_TA_config_400_and_200_MHz.ini"
 print("Downloading model and labels from Hugging Face...")
 model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE)
 # Force rebuild with modern design
 print(f"Repository: {MODEL_REPO}")
+# Create Vela config file
+config_content = """[System_Config.Ethos_U55_400MHz_SRAM_3.2_GBs_Flash_0.05_GBs]
+core_clock=400e6
+axi0_port=Sram
+axi0_max_outstanding=32
+sram_clock_scale=1.0
+axi1_port=Off
+Sram=3.2GB
+OnChipFlash=0.05GB
+OffChipFlash=1GB
+Cmd_Buf_Alignment=4
+Cmd_Buf_Size=65536
+"""
+with open(DEFAULT_CONFIG, 'w') as f:
+    f.write(config_content)
+def extract_summary_from_log(log_text):
+    summary_keys = [
+        "Accelerator configuration",
+        "Accelerator clock",
+        "Total SRAM used",
+        "Total On-chip Flash used",
+        "CPU operators",
+        "NPU operators",
+        "Batch Inference time"
+    ]
+    summary = []
+    for key in summary_keys:
+        match = re.search(rf"{re.escape(key)}\s+(.+)", log_text)
+        if match:
+            value = match.group(1).strip()
+            if key == "Batch Inference time":
+                value = value.split(",")[0].strip()
+                key = "Inference time"
+            summary.append((key, value))
+    return summary
+def run_vela_analysis(model_path):
+    """Run Vela compiler on the model and extract performance metrics"""
+    accel = "ethos-u55-128"
+    optimise = "Size"
+    mem_mode = "Sram_Only"
+    sys_config = "Ethos_U55_400MHz_SRAM_3.2_GBs_Flash_0.05_GBs"
+    tmpdir = tempfile.mkdtemp()
+    try:
+        # Copy model to temp directory
+        temp_model_path = os.path.join(tmpdir, "model.tflite")
+        shutil.copy(model_path, temp_model_path)
+        # Copy config file
+        config_path = os.path.join(tmpdir, DEFAULT_CONFIG)
+        shutil.copy(DEFAULT_CONFIG, config_path)
+        output_dir = os.path.join(tmpdir, "vela_out")
+        os.makedirs(output_dir, exist_ok=True)
+        cmd = [
+            "vela",
+            f"--accelerator-config={accel}",
+            f"--optimise={optimise}",
+            f"--config={config_path}",
+            f"--memory-mode={mem_mode}",
+            f"--system-config={sys_config}",
+            temp_model_path,
+            "--verbose-cycle-estimate",
+            "--verbose-performance",
+            f"--output-dir={output_dir}"
+        ]
+        result = subprocess.run(cmd, capture_output=True, text=True, check=True)
+        vela_stdout = result.stdout
+        # Extract summary metrics
+        summary_items = extract_summary_from_log(vela_stdout)
+        summary_dict = dict(summary_items) if summary_items else {}
+        return summary_dict, None
+    except subprocess.CalledProcessError as e:
+        return {}, f"Vela compilation failed: {e.stderr}"
+    except Exception as e:
+        return {}, f"Error running Vela: {str(e)}"
+    finally:
+        shutil.rmtree(tmpdir, ignore_errors=True)
+def generate_vela_html(summary_dict, error_msg=None):
+    """Generate HTML for Vela performance results"""
+    if error_msg:
+        return f"""
+        <div style='background:#fff3f3;border-radius:14px;padding:24px 18px 18px 18px;
+        max-width:430px;min-width:320px;width:100%;margin:auto;color:#d32f2f;font-family:sans-serif;
+        font-size:1.1em;text-align:center;font-weight:600;'>
+        Vela analysis failed: {error_msg}<br>
+        Showing default values.
+        </div>
+        """
+    # Helper function to clean operator values
+    def clean_ops(val):
+        return val.lstrip("= ").strip() if isinstance(val, str) else val
+    # Get values from Vela analysis or use defaults
+    accel_config = summary_dict.get('Accelerator configuration', 'Ethos_U55_128')
+    accel_clock = summary_dict.get('Accelerator clock', '400 MHz')
+    sram_used = summary_dict.get('Total SRAM used', '353.50 KiB')
+    flash_used = summary_dict.get('Total On-chip Flash used', '3614.39 KiB')
+    cpu_ops = clean_ops(summary_dict.get('CPU operators', '0 (0.0%)'))
+    npu_ops = clean_ops(summary_dict.get('NPU operators', '95 (100.0%)'))
+    inference_time = summary_dict.get('Inference time', '15.14 ms')
+    return f"""
+    <div style='background:#1e1e2f;border-radius:18px;padding:18px 18px 12px 18px;
+    max-width:430px;min-width:320px;width:100%;margin:auto;color:#eee;font-family:sans-serif;'>
+        <h3 style='margin-top:0;margin-bottom:12px;font-size:1.35em;color:#00b0ff;text-align:left;'>Estimated Results on SR110</h3>
+        <div style='display:flex;flex-wrap:wrap;gap:10px;justify-content:center;'>
+            <!-- Card 1: Accelerator -->
+            <div style='flex:1 1 170px;min-width:170px;max-width:180px;background:#23233a;border-radius:12px;padding:10px 10px 8px 10px;'>
+                <div style='font-size:1em;font-weight:520;margin-bottom:6px;color:#00b0ff;'>🚀 Accelerator</div>
+                <div style='margin-bottom:2px;'><span style='color:#ccc;'>Configuration:</span> <span style='color:#fff;font-weight:500'>{accel_config}</span></div>
+                <div><span style='color:#ccc;'>Accelerator clock:</span> <span style='color:#fff;font-weight:500'>{accel_clock}</span></div>
+            </div>
+            <!-- Card 2: Memory Usage -->
+            <div style='flex:1 1 170px;min-width:170px;max-width:180px;background:#23233a;border-radius:12px;padding:10px 10px 8px 10px;'>
+                <div style='font-size:1em;font-weight:520;margin-bottom:6px;color:#00b0ff;'>💾 Memory Usage</div>
+                <div style='margin-bottom:2px;'><span style='color:#ccc;'>Total SRAM:</span> <span style='color:#fff;font-weight:500'>{sram_used}</span></div>
+                <div><span style='color:#ccc;'>Total On-chip Flash:</span> <span style='color:#fff;font-weight:500'>{flash_used}</span></div>
+            </div>
+            <!-- Card 3: Operator Distribution -->
+            <div style='flex:1 1 170px;min-width:170px;max-width:180px;background:#23233a;border-radius:12px;padding:10px 10px 8px 10px;'>
+                <div style='font-size:1em;font-weight:520;margin-bottom:6px;color:#00b0ff;'>📈 Operator Distribution</div>
+                <div style='margin-bottom:2px;'><span style='color:#ccc;'>CPU Operators:</span> <span style='color:#fff;font-weight:500'>{cpu_ops}</span></div>
+                <div><span style='color:#ccc;'>NPU Operators:</span> <span style='color:#fff;font-weight:500'>{npu_ops}</span></div>
+            </div>
+            <!-- Card 4: Performance -->
+            <div style='flex:1 1 170px;min-width:170px;max-width:180px;background:#23233a;border-radius:12px;padding:10px 10px 8px 10px;'>
+                <div style='font-size:1em;font-weight:520;margin-bottom:6px;color:#00b0ff;'>⚡ Performance</div>
+                <div><span style='color:#ccc;'>Inference time:</span> <span style='color:#fff;font-weight:500'>{inference_time}</span></div>
+            </div>
+        </div>
+    </div>
+    """
+# Run Vela analysis on startup and cache results
+print("Running Vela analysis on MobileNetV2 model...")
+vela_results, vela_error = run_vela_analysis(model_path)
+vela_html = generate_vela_html(vela_results, vela_error)
 def preprocess_image(image):
     """
     Preprocess image for MobileNetV2 INT8 quantized model.
                         example_food = gr.Button("Food", size="sm", elem_classes=["btn-example"])
         with gr.Column(scale=1):
+            # Display Vela analysis results
+            gr.HTML(vela_html)
             with gr.Group(elem_classes=["card"]):
                 gr.HTML('<div class="card-header"><span style="color: white; font-weight: 600;">Classification Results</span></div>')

requirements.txt CHANGED Viewed

@@ -4,4 +4,5 @@ pillow>=9.0.0
 numpy>=1.21.0
 requests>=2.25.0
 huggingface-hub>=0.16.0
-pydantic==2.10.6

 numpy>=1.21.0
 requests>=2.25.0
 huggingface-hub>=0.16.0
+pydantic==2.10.6
+ethos-u-vela