| | |
| | import gradio as gr |
| | import shutil |
| | import os |
| | import subprocess |
| | import sys |
| |
|
| | subprocess.run(["python", "src/setup_wav2lip.py"]) |
| |
|
| | |
| | sys.path.append(os.path.abspath("./src")) |
| |
|
| | from whisper_audio_transcriber import transcribe_audio, guardar_transcripcion |
| | from call_openai_api import moni as rtff |
| |
|
| | |
| | AUDIO_RECORD_PATH = os.path.abspath("./assets/audio/grabacion_gradio.wav") |
| | VIDEO_PATH = os.path.abspath("./assets/video/data_video_sun.mp4") |
| | TRANSCRIPTION_TEXT_PATH = os.path.abspath("./results/transcripcion.txt") |
| | RESULT_AUDIO_TEMP_PATH = os.path.abspath("./results/audiov2.wav") |
| | RESULT_AUDIO_FINAL_PATH = os.path.abspath("./assets/audio/audio.wav") |
| | RESULT_VIDEO_PATH = os.path.abspath("./results/result_voice.mp4") |
| | TEXT_TO_SPEECH_PATH = os.path.abspath("./src/text_to_speech.py") |
| | RUN_INFERENCE_PATH = os.path.abspath("./src/run_inference.py") |
| |
|
| |
|
| | def transcribir_con_progreso(audio_path): |
| | progreso = gr.Progress() |
| | progreso(0, "Iniciando transcripción...") |
| | model_name = "openai/whisper-large" |
| | progreso(25, "Cargando modelo Whisper...") |
| | transcripcion = transcribe_audio(audio_path, model_name) |
| | progreso(75, "Guardando transcripción...") |
| | guardar_transcripcion(transcripcion, filename=TRANSCRIPTION_TEXT_PATH) |
| | progreso(100, "Transcripción completada.") |
| | return transcripcion |
| |
|
| |
|
| | def generar_audio_desde_texto(): |
| | print("Ejecutando text_to_speech...") |
| | result = subprocess.run( |
| | [sys.executable, TEXT_TO_SPEECH_PATH], |
| | capture_output=True, |
| | text=True |
| | ) |
| | print("stdout:", result.stdout) |
| | print("stderr:", result.stderr) |
| |
|
| | if result.returncode != 0: |
| | raise RuntimeError(f"Error ejecutando text_to_speech.py: {result.stderr}") |
| |
|
| | if os.path.exists(RESULT_AUDIO_TEMP_PATH): |
| | os.makedirs(os.path.dirname(RESULT_AUDIO_FINAL_PATH), exist_ok=True) |
| | shutil.copy(RESULT_AUDIO_TEMP_PATH, RESULT_AUDIO_FINAL_PATH) |
| | print(f"Audio copiado a: {RESULT_AUDIO_FINAL_PATH}") |
| | return RESULT_AUDIO_FINAL_PATH |
| | else: |
| | print("Audio temporal no encontrado") |
| | return None |
| |
|
| |
|
| | def procesar_video_audio(): |
| | print("Iniciando procesamiento de video...") |
| | print("Audio de entrada:", RESULT_AUDIO_FINAL_PATH) |
| | print("Video de entrada:", VIDEO_PATH) |
| |
|
| | result = subprocess.run( |
| | [sys.executable, RUN_INFERENCE_PATH, "--audio", RESULT_AUDIO_FINAL_PATH, "--video", VIDEO_PATH], |
| | capture_output=True, |
| | text=True |
| | ) |
| |
|
| | print("stdout:", result.stdout) |
| | print("stderr:", result.stderr) |
| |
|
| | if os.path.exists(RESULT_VIDEO_PATH): |
| | print("Video generado:", RESULT_VIDEO_PATH) |
| | return RESULT_VIDEO_PATH |
| | else: |
| | print("No se generó el video") |
| | return None |
| |
|
| |
|
| | def flujo_completo(audio_file_path): |
| | try: |
| | os.makedirs(os.path.dirname(AUDIO_RECORD_PATH), exist_ok=True) |
| | shutil.copy(audio_file_path, AUDIO_RECORD_PATH) |
| | print("Audio grabado copiado a:", AUDIO_RECORD_PATH) |
| |
|
| | transcripcion = transcribir_con_progreso(AUDIO_RECORD_PATH) |
| | print("Texto transcrito:", transcripcion) |
| |
|
| | respuesta_openai = rtff(TRANSCRIPTION_TEXT_PATH) |
| | print("Respuesta de OpenAI:", respuesta_openai) |
| |
|
| | audio_generado = generar_audio_desde_texto() |
| | video_path = procesar_video_audio() |
| |
|
| | return "Grabación recibida", AUDIO_RECORD_PATH, transcripcion, audio_generado, video_path |
| |
|
| | except Exception as e: |
| | return ( |
| | f"Error durante el flujo completo: {str(e)}", |
| | None, |
| | f"Error: {str(e)}", |
| | None, |
| | None |
| | ) |
| |
|
| |
|
| | def interfaz(): |
| | with gr.Blocks() as demo: |
| | with gr.Row(): |
| | with gr.Column(): |
| | gr.Video(VIDEO_PATH, loop=True, autoplay=True, height=300, width=500) |
| | audio_input = gr.Audio(label="Graba tu voz", type="filepath", format="wav") |
| | estado_grabacion = gr.Textbox(label="Estado", interactive=False) |
| |
|
| | with gr.Column(): |
| | output_audio = gr.Audio(label="Audio grabado", interactive=False) |
| | output_audio_speech = gr.Audio(label="Audio TTS", interactive=False) |
| | video_resultado = gr.Video(label="Video procesado", interactive=False) |
| | texto_transcripcion = gr.Textbox(label="Texto transcrito") |
| |
|
| | audio_input.change( |
| | flujo_completo, |
| | inputs=audio_input, |
| | outputs=[estado_grabacion, output_audio, texto_transcripcion, output_audio_speech, video_resultado] |
| | ) |
| |
|
| | return demo |
| |
|
| |
|
| | if __name__ == "__main__": |
| | demo = interfaz() |
| | demo.launch() |
| |
|
| |
|