baamtu-tts / app.py
aimerou's picture
update ngrok url
a254fde
import gradio as gr
import requests
import numpy as np
import io
import wave
# API endpoint
API_URL = "https://0608-44-220-51-89.ngrok-free.app"
def check_api_health():
"""Check if the TTS API service is available"""
try:
response = requests.get(f"{API_URL}/health", timeout=5)
if response.status_code == 200:
# Check if S3 storage is available
data = response.json()
return data.get("status") == "healthy", data.get("s3_storage") == "available"
return False, False
except requests.exceptions.RequestException as e:
print(f"Error checking API health: {str(e)}")
return False, False
def stream_text_to_speech(text, description, token):
"""Request streaming speech generation from the TTS API"""
if not text:
return None
# Check if API is available
is_healthy, _ = check_api_health()
if not is_healthy:
raise gr.Error("TTS API service is not available. Please ensure the FastAPI service is running.")
# Prepare the request
payload = {
"text": text,
"description": description,
"token": token
}
try:
response = requests.post(f"{API_URL}/tts/stream", json=payload)
audio_data = io.BytesIO(response.content)
with wave.open(audio_data, 'rb') as wav_file:
sample_rate = wav_file.getframerate()
n_frames = wav_file.getnframes()
audio_data = np.frombuffer(wav_file.readframes(n_frames), dtype=np.int16)
audio_data = audio_data.astype(np.float32) / 32767.0
return (sample_rate, audio_data)
except Exception as e:
raise gr.Error(f"Request failed: {str(e)}")
def store_text_to_speech(text, description, token):
"""Request speech generation and storage from the TTS API"""
if not text:
return "Error: Text cannot be empty"
# Check if API and S3 storage are available
is_healthy, s3_available = check_api_health()
if not is_healthy:
return "Error: TTS API service is not available. Please ensure the FastAPI service is running."
if not s3_available:
return "Error: S3 storage is not available. Please check the API server configuration."
# Prepare the request
payload = {
"text": text,
"description": description,
"token": token
}
try:
response = requests.post(f"{API_URL}/tts/store", json=payload)
data = response.json()
return f"✅ Audio generated and stored! Audio duration: {data['duration_seconds']:.2f}s\n\nURL: {data['url']}"
except Exception as e:
return f"⚠️ Request failed: {str(e)}"
# Create the Gradio interface
with gr.Blocks(title="Baamtu TTS") as demo:
gr.Markdown("# Wolof Text-to-Speech by Baamtu")
gr.Markdown("### Generate speech from text in Wolof. You can use the streaming or the storage option.")
with gr.Tabs():
with gr.TabItem("Streaming"):
with gr.Row():
with gr.Column(scale=2):
stream_text_input = gr.Textbox(
label="Text to speak",
lines=4
)
stream_token = gr.Textbox(
label="Token for authentication",
lines=1
)
with gr.Column(scale=1):
stream_voice_desc = gr.Textbox(
label="Voice description",
lines=5,
value="Female speaks in a very distant-sounding voice, with a very noisy background, and a monotone delivery, speaking slowly."
)
stream_btn = gr.Button("Generate Audio", variant="primary")
audio_output = gr.Audio(
label="Generated Speech",
type="numpy",
streaming=True,
interactive=False
)
stream_btn.click(
fn=stream_text_to_speech,
inputs=[stream_text_input, stream_voice_desc, stream_token],
outputs=audio_output
)
with gr.TabItem("Storage"):
is_healthy, s3_available = check_api_health()
if not s3_available:
gr.Markdown("⚠️ **S3 storage is not available.** Please check the API server configuration.")
with gr.Row():
with gr.Column(scale=2):
store_text_input = gr.Textbox(
label="Text to speak",
lines=4
)
store_token = gr.Textbox(
label="Token for authentication",
lines=1
)
with gr.Column(scale=1):
store_voice_desc = gr.Textbox(
label="Voice description",
lines=5,
value="Female's speech is very close-sounding and very clear. She speaks fast with an expressive and animated voice."
)
store_btn = gr.Button("Generate & Store in S3", variant="primary")
store_result = gr.Textbox(
label="Storage Result",
lines=4,
placeholder="Generated audio link will appear here...",
interactive=False
)
store_btn.click(
fn=store_text_to_speech,
inputs=[store_text_input, store_voice_desc, store_token],
outputs=store_result
)
# Launch the app
demo.launch()