Sid26Roy commited on
Commit
2991b9b
·
verified ·
1 Parent(s): 996ab19

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -13
app.py CHANGED
@@ -20,16 +20,11 @@ id2label = {
20
  "7": "Surprised"
21
  }
22
 
23
- def classify_audio(audio):
24
- if audio is None:
25
- return {"error": "No audio received. Please upload a file."}
26
- # audio = (numpy_array, sample_rate)
27
- speech, sample_rate = audio
28
 
29
- # Make sure sample rate is a Python int
30
- sample_rate = int(sample_rate)
31
-
32
- # Process audio for HF model
33
  inputs = processor(
34
  speech,
35
  sampling_rate=sample_rate,
@@ -39,19 +34,23 @@ def classify_audio(audio):
39
 
40
  with torch.no_grad():
41
  outputs = model(**inputs)
42
- probs = torch.softmax(outputs.logits, dim=1).squeeze().tolist()
 
43
 
44
- return {id2label[str(i)]: round(probs[i], 3) for i in range(len(probs))}
 
 
45
 
 
46
 
47
  # Gradio Interface
48
  iface = gr.Interface(
49
  fn=classify_audio,
50
- inputs = gr.Audio(type="numpy", label="Upload Audio"),
51
  outputs=gr.Label(num_top_classes=8, label="Emotion Classification"),
52
  title="Speech Emotion Classification",
53
  description="Upload an audio clip to classify the speaker's emotion from voice signals."
54
  )
55
 
56
  if __name__ == "__main__":
57
- iface.launch(share=False, debug=False, ssr_mode=False)
 
20
  "7": "Surprised"
21
  }
22
 
23
+ def classify_audio(audio_path):
24
+ # Load and resample audio to 16kHz
25
+ speech, sample_rate = librosa.load(audio_path, sr=16000)
 
 
26
 
27
+ # Process audio
 
 
 
28
  inputs = processor(
29
  speech,
30
  sampling_rate=sample_rate,
 
34
 
35
  with torch.no_grad():
36
  outputs = model(**inputs)
37
+ logits = outputs.logits
38
+ probs = torch.nn.functional.softmax(logits, dim=1).squeeze().tolist()
39
 
40
+ prediction = {
41
+ id2label[str(i)]: round(probs[i], 3) for i in range(len(probs))
42
+ }
43
 
44
+ return prediction
45
 
46
  # Gradio Interface
47
  iface = gr.Interface(
48
  fn=classify_audio,
49
+ inputs=gr.Audio(type="filepath", label="Upload Audio (WAV, MP3, etc.)"),
50
  outputs=gr.Label(num_top_classes=8, label="Emotion Classification"),
51
  title="Speech Emotion Classification",
52
  description="Upload an audio clip to classify the speaker's emotion from voice signals."
53
  )
54
 
55
  if __name__ == "__main__":
56
+ iface.launch()