Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import requests | |
| import numpy as np | |
| import io | |
| import wave | |
| # API endpoint | |
| API_URL = "https://0608-44-220-51-89.ngrok-free.app" | |
| def check_api_health(): | |
| """Check if the TTS API service is available""" | |
| try: | |
| response = requests.get(f"{API_URL}/health", timeout=5) | |
| if response.status_code == 200: | |
| # Check if S3 storage is available | |
| data = response.json() | |
| return data.get("status") == "healthy", data.get("s3_storage") == "available" | |
| return False, False | |
| except requests.exceptions.RequestException as e: | |
| print(f"Error checking API health: {str(e)}") | |
| return False, False | |
| def stream_text_to_speech(text, description, token): | |
| """Request streaming speech generation from the TTS API""" | |
| if not text: | |
| return None | |
| # Check if API is available | |
| is_healthy, _ = check_api_health() | |
| if not is_healthy: | |
| raise gr.Error("TTS API service is not available. Please ensure the FastAPI service is running.") | |
| # Prepare the request | |
| payload = { | |
| "text": text, | |
| "description": description, | |
| "token": token | |
| } | |
| try: | |
| response = requests.post(f"{API_URL}/tts/stream", json=payload) | |
| audio_data = io.BytesIO(response.content) | |
| with wave.open(audio_data, 'rb') as wav_file: | |
| sample_rate = wav_file.getframerate() | |
| n_frames = wav_file.getnframes() | |
| audio_data = np.frombuffer(wav_file.readframes(n_frames), dtype=np.int16) | |
| audio_data = audio_data.astype(np.float32) / 32767.0 | |
| return (sample_rate, audio_data) | |
| except Exception as e: | |
| raise gr.Error(f"Request failed: {str(e)}") | |
| def store_text_to_speech(text, description, token): | |
| """Request speech generation and storage from the TTS API""" | |
| if not text: | |
| return "Error: Text cannot be empty" | |
| # Check if API and S3 storage are available | |
| is_healthy, s3_available = check_api_health() | |
| if not is_healthy: | |
| return "Error: TTS API service is not available. Please ensure the FastAPI service is running." | |
| if not s3_available: | |
| return "Error: S3 storage is not available. Please check the API server configuration." | |
| # Prepare the request | |
| payload = { | |
| "text": text, | |
| "description": description, | |
| "token": token | |
| } | |
| try: | |
| response = requests.post(f"{API_URL}/tts/store", json=payload) | |
| data = response.json() | |
| return f"✅ Audio generated and stored! Audio duration: {data['duration_seconds']:.2f}s\n\nURL: {data['url']}" | |
| except Exception as e: | |
| return f"⚠️ Request failed: {str(e)}" | |
| # Create the Gradio interface | |
| with gr.Blocks(title="Baamtu TTS") as demo: | |
| gr.Markdown("# Wolof Text-to-Speech by Baamtu") | |
| gr.Markdown("### Generate speech from text in Wolof. You can use the streaming or the storage option.") | |
| with gr.Tabs(): | |
| with gr.TabItem("Streaming"): | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| stream_text_input = gr.Textbox( | |
| label="Text to speak", | |
| lines=4 | |
| ) | |
| stream_token = gr.Textbox( | |
| label="Token for authentication", | |
| lines=1 | |
| ) | |
| with gr.Column(scale=1): | |
| stream_voice_desc = gr.Textbox( | |
| label="Voice description", | |
| lines=5, | |
| value="Female speaks in a very distant-sounding voice, with a very noisy background, and a monotone delivery, speaking slowly." | |
| ) | |
| stream_btn = gr.Button("Generate Audio", variant="primary") | |
| audio_output = gr.Audio( | |
| label="Generated Speech", | |
| type="numpy", | |
| streaming=True, | |
| interactive=False | |
| ) | |
| stream_btn.click( | |
| fn=stream_text_to_speech, | |
| inputs=[stream_text_input, stream_voice_desc, stream_token], | |
| outputs=audio_output | |
| ) | |
| with gr.TabItem("Storage"): | |
| is_healthy, s3_available = check_api_health() | |
| if not s3_available: | |
| gr.Markdown("⚠️ **S3 storage is not available.** Please check the API server configuration.") | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| store_text_input = gr.Textbox( | |
| label="Text to speak", | |
| lines=4 | |
| ) | |
| store_token = gr.Textbox( | |
| label="Token for authentication", | |
| lines=1 | |
| ) | |
| with gr.Column(scale=1): | |
| store_voice_desc = gr.Textbox( | |
| label="Voice description", | |
| lines=5, | |
| value="Female's speech is very close-sounding and very clear. She speaks fast with an expressive and animated voice." | |
| ) | |
| store_btn = gr.Button("Generate & Store in S3", variant="primary") | |
| store_result = gr.Textbox( | |
| label="Storage Result", | |
| lines=4, | |
| placeholder="Generated audio link will appear here...", | |
| interactive=False | |
| ) | |
| store_btn.click( | |
| fn=store_text_to_speech, | |
| inputs=[store_text_input, store_voice_desc, store_token], | |
| outputs=store_result | |
| ) | |
| # Launch the app | |
| demo.launch() |