Jahnavibh commited on
Commit
08dc0e6
·
1 Parent(s): 7b15f43

Add real-time Vela profiling integration with dynamic performance estimation from SR Vela Compiler

Browse files
Files changed (2) hide show
  1. app.py +157 -31
  2. requirements.txt +2 -1
app.py CHANGED
@@ -5,12 +5,18 @@ from io import BytesIO
5
  from PIL import Image
6
  import tensorflow as tf
7
  from huggingface_hub import hf_hub_download
 
 
 
 
 
8
 
9
  # Download the TFLite model and labels from your Hugging Face repository
10
  MODEL_REPO = "JahnaviBhansali/mobilenet-v2-ethos-u55"
11
  MODEL_FILE = "mobilenet_v2_1.0_224_INT8.tflite" # Using original INT8 model for Gradio compatibility
12
  VELA_MODEL_FILE = "mobilenet_v2_1.0_224_INT8_vela.tflite" # Vela-optimized model for Ethos-U55
13
  LABELS_FILE = "labelmappings.txt"
 
14
 
15
  print("Downloading model and labels from Hugging Face...")
16
  model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE)
@@ -35,6 +41,155 @@ print(f"Vela-optimized model also available: {VELA_MODEL_FILE}")
35
  # Force rebuild with modern design
36
  print(f"Repository: {MODEL_REPO}")
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  def preprocess_image(image):
39
  """
40
  Preprocess image for MobileNetV2 INT8 quantized model.
@@ -346,37 +501,8 @@ with gr.Blocks(
346
  example_food = gr.Button("Food", size="sm", elem_classes=["btn-example"])
347
 
348
  with gr.Column(scale=1):
349
- gr.HTML("""
350
- <div style='background:#1e1e2f;border-radius:18px;padding:18px 18px 12px 18px;
351
- max-width:430px;min-width:320px;width:100%;margin:auto;color:#eee;font-family:sans-serif;'>
352
- <h3 style='margin-top:0;margin-bottom:12px;font-size:1.35em;color:#00b0ff;text-align:left;'>Model Performance on SR110</h3>
353
- <div style='display:flex;flex-wrap:wrap;gap:10px;justify-content:center;'>
354
- <!-- Card 1: Accelerator -->
355
- <div style='flex:1 1 170px;min-width:170px;max-width:180px;background:#23233a;border-radius:12px;padding:10px 10px 8px 10px;'>
356
- <div style='font-size:1em;font-weight:520;margin-bottom:6px;color:#00b0ff;'>🚀 Accelerator</div>
357
- <div style='margin-bottom:2px;'><span style='color:#ccc;'>Configuration:</span> <span style='color:#fff;font-weight:500'>Ethos_U55_128</span></div>
358
- <div><span style='color:#ccc;'>Accelerator clock:</span> <span style='color:#fff;font-weight:500'>400 MHz</span></div>
359
- </div>
360
- <!-- Card 2: Memory Usage -->
361
- <div style='flex:1 1 170px;min-width:170px;max-width:180px;background:#23233a;border-radius:12px;padding:10px 10px 8px 10px;'>
362
- <div style='font-size:1em;font-weight:520;margin-bottom:6px;color:#00b0ff;'>💾 Memory Usage</div>
363
- <div style='margin-bottom:2px;'><span style='color:#ccc;'>Total SRAM:</span> <span style='color:#fff;font-weight:500'>353.50 KiB</span></div>
364
- <div><span style='color:#ccc;'>Total On-chip Flash:</span> <span style='color:#fff;font-weight:500'>3614.39 KiB</span></div>
365
- </div>
366
- <!-- Card 3: Operator Distribution -->
367
- <div style='flex:1 1 170px;min-width:170px;max-width:180px;background:#23233a;border-radius:12px;padding:10px 10px 8px 10px;'>
368
- <div style='font-size:1em;font-weight:520;margin-bottom:6px;color:#00b0ff;'>📈 Operator Distribution</div>
369
- <div style='margin-bottom:2px;'><span style='color:#ccc;'>CPU Operators:</span> <span style='color:#fff;font-weight:500'>0 (0.0%)</span></div>
370
- <div><span style='color:#ccc;'>NPU Operators:</span> <span style='color:#fff;font-weight:500'>95 (100.0%)</span></div>
371
- </div>
372
- <!-- Card 4: Performance -->
373
- <div style='flex:1 1 170px;min-width:170px;max-width:180px;background:#23233a;border-radius:12px;padding:10px 10px 8px 10px;'>
374
- <div style='font-size:1em;font-weight:520;margin-bottom:6px;color:#00b0ff;'>âš¡ Performance</div>
375
- <div><span style='color:#ccc;'>Inference time:</span> <span style='color:#fff;font-weight:500'>15.14 ms</span></div>
376
- </div>
377
- </div>
378
- </div>
379
- """)
380
 
381
  with gr.Group(elem_classes=["card"]):
382
  gr.HTML('<div class="card-header"><span style="color: white; font-weight: 600;">Classification Results</span></div>')
 
5
  from PIL import Image
6
  import tensorflow as tf
7
  from huggingface_hub import hf_hub_download
8
+ import tempfile
9
+ import shutil
10
+ import os
11
+ import subprocess
12
+ import re
13
 
14
  # Download the TFLite model and labels from your Hugging Face repository
15
  MODEL_REPO = "JahnaviBhansali/mobilenet-v2-ethos-u55"
16
  MODEL_FILE = "mobilenet_v2_1.0_224_INT8.tflite" # Using original INT8 model for Gradio compatibility
17
  VELA_MODEL_FILE = "mobilenet_v2_1.0_224_INT8_vela.tflite" # Vela-optimized model for Ethos-U55
18
  LABELS_FILE = "labelmappings.txt"
19
+ DEFAULT_CONFIG = "u55_eval_with_TA_config_400_and_200_MHz.ini"
20
 
21
  print("Downloading model and labels from Hugging Face...")
22
  model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE)
 
41
  # Force rebuild with modern design
42
  print(f"Repository: {MODEL_REPO}")
43
 
44
+ # Create Vela config file
45
+ config_content = """[System_Config.Ethos_U55_400MHz_SRAM_3.2_GBs_Flash_0.05_GBs]
46
+ core_clock=400e6
47
+ axi0_port=Sram
48
+ axi0_max_outstanding=32
49
+ sram_clock_scale=1.0
50
+ axi1_port=Off
51
+ Sram=3.2GB
52
+ OnChipFlash=0.05GB
53
+ OffChipFlash=1GB
54
+ Cmd_Buf_Alignment=4
55
+ Cmd_Buf_Size=65536
56
+ """
57
+
58
+ with open(DEFAULT_CONFIG, 'w') as f:
59
+ f.write(config_content)
60
+
61
+ def extract_summary_from_log(log_text):
62
+ summary_keys = [
63
+ "Accelerator configuration",
64
+ "Accelerator clock",
65
+ "Total SRAM used",
66
+ "Total On-chip Flash used",
67
+ "CPU operators",
68
+ "NPU operators",
69
+ "Batch Inference time"
70
+ ]
71
+ summary = []
72
+ for key in summary_keys:
73
+ match = re.search(rf"{re.escape(key)}\s+(.+)", log_text)
74
+ if match:
75
+ value = match.group(1).strip()
76
+ if key == "Batch Inference time":
77
+ value = value.split(",")[0].strip()
78
+ key = "Inference time"
79
+ summary.append((key, value))
80
+ return summary
81
+
82
+ def run_vela_analysis(model_path):
83
+ """Run Vela compiler on the model and extract performance metrics"""
84
+ accel = "ethos-u55-128"
85
+ optimise = "Size"
86
+ mem_mode = "Sram_Only"
87
+ sys_config = "Ethos_U55_400MHz_SRAM_3.2_GBs_Flash_0.05_GBs"
88
+ tmpdir = tempfile.mkdtemp()
89
+
90
+ try:
91
+ # Copy model to temp directory
92
+ temp_model_path = os.path.join(tmpdir, "model.tflite")
93
+ shutil.copy(model_path, temp_model_path)
94
+
95
+ # Copy config file
96
+ config_path = os.path.join(tmpdir, DEFAULT_CONFIG)
97
+ shutil.copy(DEFAULT_CONFIG, config_path)
98
+
99
+ output_dir = os.path.join(tmpdir, "vela_out")
100
+ os.makedirs(output_dir, exist_ok=True)
101
+
102
+ cmd = [
103
+ "vela",
104
+ f"--accelerator-config={accel}",
105
+ f"--optimise={optimise}",
106
+ f"--config={config_path}",
107
+ f"--memory-mode={mem_mode}",
108
+ f"--system-config={sys_config}",
109
+ temp_model_path,
110
+ "--verbose-cycle-estimate",
111
+ "--verbose-performance",
112
+ f"--output-dir={output_dir}"
113
+ ]
114
+
115
+ result = subprocess.run(cmd, capture_output=True, text=True, check=True)
116
+ vela_stdout = result.stdout
117
+
118
+ # Extract summary metrics
119
+ summary_items = extract_summary_from_log(vela_stdout)
120
+ summary_dict = dict(summary_items) if summary_items else {}
121
+
122
+ return summary_dict, None
123
+
124
+ except subprocess.CalledProcessError as e:
125
+ return {}, f"Vela compilation failed: {e.stderr}"
126
+ except Exception as e:
127
+ return {}, f"Error running Vela: {str(e)}"
128
+ finally:
129
+ shutil.rmtree(tmpdir, ignore_errors=True)
130
+
131
+ def generate_vela_html(summary_dict, error_msg=None):
132
+ """Generate HTML for Vela performance results"""
133
+ if error_msg:
134
+ return f"""
135
+ <div style='background:#fff3f3;border-radius:14px;padding:24px 18px 18px 18px;
136
+ max-width:430px;min-width:320px;width:100%;margin:auto;color:#d32f2f;font-family:sans-serif;
137
+ font-size:1.1em;text-align:center;font-weight:600;'>
138
+ Vela analysis failed: {error_msg}<br>
139
+ Showing default values.
140
+ </div>
141
+ """
142
+
143
+ # Helper function to clean operator values
144
+ def clean_ops(val):
145
+ return val.lstrip("= ").strip() if isinstance(val, str) else val
146
+
147
+ # Get values from Vela analysis or use defaults
148
+ accel_config = summary_dict.get('Accelerator configuration', 'Ethos_U55_128')
149
+ accel_clock = summary_dict.get('Accelerator clock', '400 MHz')
150
+ sram_used = summary_dict.get('Total SRAM used', '353.50 KiB')
151
+ flash_used = summary_dict.get('Total On-chip Flash used', '3614.39 KiB')
152
+ cpu_ops = clean_ops(summary_dict.get('CPU operators', '0 (0.0%)'))
153
+ npu_ops = clean_ops(summary_dict.get('NPU operators', '95 (100.0%)'))
154
+ inference_time = summary_dict.get('Inference time', '15.14 ms')
155
+
156
+ return f"""
157
+ <div style='background:#1e1e2f;border-radius:18px;padding:18px 18px 12px 18px;
158
+ max-width:430px;min-width:320px;width:100%;margin:auto;color:#eee;font-family:sans-serif;'>
159
+ <h3 style='margin-top:0;margin-bottom:12px;font-size:1.35em;color:#00b0ff;text-align:left;'>Estimated Results on SR110</h3>
160
+ <div style='display:flex;flex-wrap:wrap;gap:10px;justify-content:center;'>
161
+ <!-- Card 1: Accelerator -->
162
+ <div style='flex:1 1 170px;min-width:170px;max-width:180px;background:#23233a;border-radius:12px;padding:10px 10px 8px 10px;'>
163
+ <div style='font-size:1em;font-weight:520;margin-bottom:6px;color:#00b0ff;'>🚀 Accelerator</div>
164
+ <div style='margin-bottom:2px;'><span style='color:#ccc;'>Configuration:</span> <span style='color:#fff;font-weight:500'>{accel_config}</span></div>
165
+ <div><span style='color:#ccc;'>Accelerator clock:</span> <span style='color:#fff;font-weight:500'>{accel_clock}</span></div>
166
+ </div>
167
+ <!-- Card 2: Memory Usage -->
168
+ <div style='flex:1 1 170px;min-width:170px;max-width:180px;background:#23233a;border-radius:12px;padding:10px 10px 8px 10px;'>
169
+ <div style='font-size:1em;font-weight:520;margin-bottom:6px;color:#00b0ff;'>💾 Memory Usage</div>
170
+ <div style='margin-bottom:2px;'><span style='color:#ccc;'>Total SRAM:</span> <span style='color:#fff;font-weight:500'>{sram_used}</span></div>
171
+ <div><span style='color:#ccc;'>Total On-chip Flash:</span> <span style='color:#fff;font-weight:500'>{flash_used}</span></div>
172
+ </div>
173
+ <!-- Card 3: Operator Distribution -->
174
+ <div style='flex:1 1 170px;min-width:170px;max-width:180px;background:#23233a;border-radius:12px;padding:10px 10px 8px 10px;'>
175
+ <div style='font-size:1em;font-weight:520;margin-bottom:6px;color:#00b0ff;'>📈 Operator Distribution</div>
176
+ <div style='margin-bottom:2px;'><span style='color:#ccc;'>CPU Operators:</span> <span style='color:#fff;font-weight:500'>{cpu_ops}</span></div>
177
+ <div><span style='color:#ccc;'>NPU Operators:</span> <span style='color:#fff;font-weight:500'>{npu_ops}</span></div>
178
+ </div>
179
+ <!-- Card 4: Performance -->
180
+ <div style='flex:1 1 170px;min-width:170px;max-width:180px;background:#23233a;border-radius:12px;padding:10px 10px 8px 10px;'>
181
+ <div style='font-size:1em;font-weight:520;margin-bottom:6px;color:#00b0ff;'>âš¡ Performance</div>
182
+ <div><span style='color:#ccc;'>Inference time:</span> <span style='color:#fff;font-weight:500'>{inference_time}</span></div>
183
+ </div>
184
+ </div>
185
+ </div>
186
+ """
187
+
188
+ # Run Vela analysis on startup and cache results
189
+ print("Running Vela analysis on MobileNetV2 model...")
190
+ vela_results, vela_error = run_vela_analysis(model_path)
191
+ vela_html = generate_vela_html(vela_results, vela_error)
192
+
193
  def preprocess_image(image):
194
  """
195
  Preprocess image for MobileNetV2 INT8 quantized model.
 
501
  example_food = gr.Button("Food", size="sm", elem_classes=["btn-example"])
502
 
503
  with gr.Column(scale=1):
504
+ # Display Vela analysis results
505
+ gr.HTML(vela_html)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
506
 
507
  with gr.Group(elem_classes=["card"]):
508
  gr.HTML('<div class="card-header"><span style="color: white; font-weight: 600;">Classification Results</span></div>')
requirements.txt CHANGED
@@ -4,4 +4,5 @@ pillow>=9.0.0
4
  numpy>=1.21.0
5
  requests>=2.25.0
6
  huggingface-hub>=0.16.0
7
- pydantic==2.10.6
 
 
4
  numpy>=1.21.0
5
  requests>=2.25.0
6
  huggingface-hub>=0.16.0
7
+ pydantic==2.10.6
8
+ ethos-u-vela