Update app.py
Browse files
app.py
CHANGED
|
@@ -112,6 +112,13 @@ def load_pipeline():
|
|
| 112 |
st.code(traceback.format_exc())
|
| 113 |
raise e
|
| 114 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
# Sidebar
|
| 116 |
with st.sidebar:
|
| 117 |
st.header("Settings")
|
|
@@ -124,6 +131,7 @@ with st.sidebar:
|
|
| 124 |
# Main content
|
| 125 |
tab1, tab2, tab3 = st.tabs(["Upload & Process", "Results", "Visualization"])
|
| 126 |
|
|
|
|
| 127 |
with tab1:
|
| 128 |
uploaded_file = st.file_uploader("Choose an audio file", type=['wav', 'mp3', 'flac'])
|
| 129 |
|
|
@@ -138,16 +146,17 @@ with tab1:
|
|
| 138 |
check_hf_api()
|
| 139 |
verify_model_files()
|
| 140 |
pipeline = load_pipeline()
|
|
|
|
| 141 |
else:
|
| 142 |
st.stop()
|
| 143 |
|
| 144 |
with st.status("Processing audio...", expanded=True) as status:
|
| 145 |
progress_bar = st.progress(0)
|
| 146 |
|
| 147 |
-
def progress_hook(
|
| 148 |
-
if total
|
| 149 |
-
progress_percentage = min(
|
| 150 |
-
status.update(label=f"Processing: {
|
| 151 |
progress_bar.progress(progress_percentage)
|
| 152 |
|
| 153 |
# Run the pipeline on the audio file
|
|
@@ -171,6 +180,11 @@ with tab1:
|
|
| 171 |
rttm_line = f"SPEAKER {os.path.basename(tmp_path)} 1 {turn.start:.3f} {turn.duration:.3f} <NA> <NA> {speaker} <NA> <NA>\n"
|
| 172 |
rttm_content += rttm_line
|
| 173 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
except Exception as e:
|
| 175 |
st.error(f"An error occurred: {str(e)}")
|
| 176 |
st.error("Error details:")
|
|
|
|
| 112 |
st.code(traceback.format_exc())
|
| 113 |
raise e
|
| 114 |
|
| 115 |
+
@st.cache_resource
|
| 116 |
+
def load_speechbrain_model():
|
| 117 |
+
st.info("Loading SpeechBrain model...")
|
| 118 |
+
classifier = EncoderClassifier.from_hparams(source="speechbrain/spkrec-ecapa-voxceleb")
|
| 119 |
+
st.success("SpeechBrain model loaded successfully")
|
| 120 |
+
return classifier
|
| 121 |
+
|
| 122 |
# Sidebar
|
| 123 |
with st.sidebar:
|
| 124 |
st.header("Settings")
|
|
|
|
| 131 |
# Main content
|
| 132 |
tab1, tab2, tab3 = st.tabs(["Upload & Process", "Results", "Visualization"])
|
| 133 |
|
| 134 |
+
|
| 135 |
with tab1:
|
| 136 |
uploaded_file = st.file_uploader("Choose an audio file", type=['wav', 'mp3', 'flac'])
|
| 137 |
|
|
|
|
| 146 |
check_hf_api()
|
| 147 |
verify_model_files()
|
| 148 |
pipeline = load_pipeline()
|
| 149 |
+
speechbrain_model = load_speechbrain_model()
|
| 150 |
else:
|
| 151 |
st.stop()
|
| 152 |
|
| 153 |
with st.status("Processing audio...", expanded=True) as status:
|
| 154 |
progress_bar = st.progress(0)
|
| 155 |
|
| 156 |
+
def progress_hook(**kwargs):
|
| 157 |
+
if 'completed' in kwargs and 'total' in kwargs:
|
| 158 |
+
progress_percentage = min(kwargs['completed'] / kwargs['total'], 1.0)
|
| 159 |
+
status.update(label=f"Processing: {progress_percentage:.1%} complete", state="running")
|
| 160 |
progress_bar.progress(progress_percentage)
|
| 161 |
|
| 162 |
# Run the pipeline on the audio file
|
|
|
|
| 180 |
rttm_line = f"SPEAKER {os.path.basename(tmp_path)} 1 {turn.start:.3f} {turn.duration:.3f} <NA> <NA> {speaker} <NA> <NA>\n"
|
| 181 |
rttm_content += rttm_line
|
| 182 |
|
| 183 |
+
# Use SpeechBrain for speaker embedding (optional)
|
| 184 |
+
waveform, sample_rate = torchaudio.load(tmp_path)
|
| 185 |
+
embeddings = speechbrain_model.encode_batch(waveform)
|
| 186 |
+
st.success("Speaker embeddings generated successfully")
|
| 187 |
+
|
| 188 |
except Exception as e:
|
| 189 |
st.error(f"An error occurred: {str(e)}")
|
| 190 |
st.error("Error details:")
|