Spaces:
Running
Running
Commit
·
8d5b897
1
Parent(s):
2b1f9fe
fix: update transcription handling to return last transcription and improve state management
Browse files
app.py
CHANGED
|
@@ -135,9 +135,8 @@ def transcribe(audio, model_name="nvidia/parakeet-tdt-0.6b-v2", state="", audio_
|
|
| 135 |
audio_buffer = [full_audio[-keep_samples:]]
|
| 136 |
else:
|
| 137 |
audio_buffer = []
|
| 138 |
-
|
| 139 |
print(f"New state: {new_state}")
|
| 140 |
-
return new_state,
|
| 141 |
|
| 142 |
except Exception as e:
|
| 143 |
print(f"Error processing audio: {e}")
|
|
@@ -243,7 +242,8 @@ with gr.Blocks(title="Real-time Speech-to-Text with NeMo") as demo:
|
|
| 243 |
placeholder="Transcription will appear here after clicking 'Transcribe Audio File'",
|
| 244 |
lines=10
|
| 245 |
)
|
| 246 |
-
|
|
|
|
| 247 |
state = gr.State("")
|
| 248 |
audio_buffer = gr.State(value=None)
|
| 249 |
last_processed_time = gr.State(value=0)
|
|
@@ -287,7 +287,7 @@ with gr.Blocks(title="Real-time Speech-to-Text with NeMo") as demo:
|
|
| 287 |
) # Clear the real-time transcription
|
| 288 |
def clear_transcription():
|
| 289 |
print("Clearing real-time transcription")
|
| 290 |
-
return "", "", None, 0 #
|
| 291 |
|
| 292 |
# Clear the file transcription
|
| 293 |
def clear_file_transcription():
|
|
@@ -298,20 +298,36 @@ with gr.Blocks(title="Real-time Speech-to-Text with NeMo") as demo:
|
|
| 298 |
clear_btn.click(
|
| 299 |
fn=clear_transcription,
|
| 300 |
inputs=[],
|
| 301 |
-
outputs=[state,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 302 |
)
|
| 303 |
|
| 304 |
clear_file_btn.click(
|
| 305 |
fn=clear_file_transcription,
|
| 306 |
inputs=[],
|
| 307 |
outputs=[file_transcription]
|
| 308 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 309 |
|
| 310 |
-
# Update the main text output when the state changes
|
| 311 |
state.change(
|
| 312 |
-
fn=
|
| 313 |
inputs=[state],
|
| 314 |
-
outputs=[text_output]
|
|
|
|
| 315 |
|
| 316 |
|
| 317 |
|
|
|
|
| 135 |
audio_buffer = [full_audio[-keep_samples:]]
|
| 136 |
else:
|
| 137 |
audio_buffer = []
|
|
|
|
| 138 |
print(f"New state: {new_state}")
|
| 139 |
+
return new_state, transcription, audio_buffer, last_processed_time # Return last transcription for streaming_text
|
| 140 |
|
| 141 |
except Exception as e:
|
| 142 |
print(f"Error processing audio: {e}")
|
|
|
|
| 242 |
placeholder="Transcription will appear here after clicking 'Transcribe Audio File'",
|
| 243 |
lines=10
|
| 244 |
)
|
| 245 |
+
|
| 246 |
+
# State to store the ongoing transcription
|
| 247 |
state = gr.State("")
|
| 248 |
audio_buffer = gr.State(value=None)
|
| 249 |
last_processed_time = gr.State(value=0)
|
|
|
|
| 287 |
) # Clear the real-time transcription
|
| 288 |
def clear_transcription():
|
| 289 |
print("Clearing real-time transcription")
|
| 290 |
+
return "", "", None, 0 # Return empty values for state, text_output, audio_buffer, and last_processed_time
|
| 291 |
|
| 292 |
# Clear the file transcription
|
| 293 |
def clear_file_transcription():
|
|
|
|
| 298 |
clear_btn.click(
|
| 299 |
fn=clear_transcription,
|
| 300 |
inputs=[],
|
| 301 |
+
outputs=[state, text_output, audio_buffer, last_processed_time]
|
| 302 |
+
)
|
| 303 |
+
|
| 304 |
+
# Also clear streaming_text when clearing the transcription
|
| 305 |
+
clear_btn.click(
|
| 306 |
+
fn=lambda: "",
|
| 307 |
+
inputs=[],
|
| 308 |
+
outputs=[streaming_text]
|
| 309 |
)
|
| 310 |
|
| 311 |
clear_file_btn.click(
|
| 312 |
fn=clear_file_transcription,
|
| 313 |
inputs=[],
|
| 314 |
outputs=[file_transcription]
|
| 315 |
+
) # Update the main text output when the state changes
|
| 316 |
+
def update_output(transcript):
|
| 317 |
+
# For streaming_text, show just the last few words or chunks
|
| 318 |
+
words = transcript.split()
|
| 319 |
+
if len(words) > 15:
|
| 320 |
+
streaming_text = " ".join(words[-15:])
|
| 321 |
+
else:
|
| 322 |
+
streaming_text = transcript
|
| 323 |
+
|
| 324 |
+
return transcript, streaming_text
|
| 325 |
|
|
|
|
| 326 |
state.change(
|
| 327 |
+
fn=update_output,
|
| 328 |
inputs=[state],
|
| 329 |
+
outputs=[text_output, streaming_text]
|
| 330 |
+
)
|
| 331 |
|
| 332 |
|
| 333 |
|