DeepCritical / deployments /modal_tts.py
SeasonalFall84's picture
Add TTS on-demand with UI credentials, improve UI layout, and fix References removal
b4f9ff5
"""Deploy Kokoro TTS function to Modal.
This script deploys the TTS function to Modal so it can be called
from the main DeepCritical application.
Usage:
modal deploy deploy_modal_tts.py
After deployment, the function will be available at:
App: deepcritical-tts
Function: kokoro_tts_function
"""
import modal
import numpy as np
# Create Modal app
app = modal.App("deepcritical-tts")
# Define Kokoro TTS dependencies
KOKORO_DEPENDENCIES = [
"torch>=2.0.0",
"transformers>=4.30.0",
"numpy<2.0",
]
# Create Modal image with Kokoro
tts_image = (
modal.Image.debian_slim(python_version="3.11")
.apt_install("git") # Install git first for pip install from github
.pip_install(*KOKORO_DEPENDENCIES)
.pip_install("git+https://github.com/hexgrad/kokoro.git")
)
@app.function(
image=tts_image,
gpu="T4",
timeout=60,
)
def kokoro_tts_function(text: str, voice: str, speed: float) -> tuple[int, np.ndarray]:
"""Modal GPU function for Kokoro TTS.
This function runs on Modal's GPU infrastructure.
Based on: https://huggingface.co/spaces/hexgrad/Kokoro-TTS
Args:
text: Text to synthesize
voice: Voice ID (e.g., af_heart, af_bella, am_michael)
speed: Speech speed multiplier (0.5-2.0)
Returns:
Tuple of (sample_rate, audio_array)
"""
import numpy as np
try:
import torch
from kokoro import KModel, KPipeline
# Initialize model (cached on GPU)
model = KModel().to("cuda").eval()
pipeline = KPipeline(lang_code=voice[0])
pack = pipeline.load_voice(voice)
# Generate audio - accumulate all chunks
audio_chunks = []
for _, ps, _ in pipeline(text, voice, speed):
ref_s = pack[len(ps) - 1]
audio = model(ps, ref_s, speed)
audio_chunks.append(audio.numpy())
# Concatenate all audio chunks
if audio_chunks:
full_audio = np.concatenate(audio_chunks)
return (24000, full_audio)
# If no audio generated, return empty
return (24000, np.zeros(1, dtype=np.float32))
except ImportError as e:
raise RuntimeError(
f"Kokoro not installed: {e}. "
"Install with: pip install git+https://github.com/hexgrad/kokoro.git"
) from e
except Exception as e:
raise RuntimeError(f"TTS synthesis failed: {e}") from e
# Optional: Add a test entrypoint
@app.local_entrypoint()
def test():
"""Test the TTS function."""
print("Testing Modal TTS function...")
sample_rate, audio = kokoro_tts_function.remote("Hello, this is a test.", "af_heart", 1.0)
print(f"Generated audio: {sample_rate}Hz, shape={audio.shape}")
print("βœ“ TTS function works!")