Spaces:
Build error
Build error
| import gradio as gr | |
| import tensorflow as tf | |
| from tensorflow_tts.inference import TFAutoModel, AutoProcessor | |
| import soundfile as sf | |
| # Load the model and processor | |
| lightspeech = TFAutoModel.from_pretrained("bookbot/lightspeech-mfa-sw-v4") | |
| processor = AutoProcessor.from_pretrained("bookbot/lightspeech-mfa-sw-v4") | |
| mb_melgan = TFAutoModel.from_pretrained("bookbot/mb-melgan-hifi-postnets-sw-v4") | |
| def tts(text, speaker_name="sw-TZ-Victoria"): | |
| # Process input text | |
| input_ids = processor.text_to_sequence(text) | |
| # Generate mel-spectrogram | |
| mel, _, _ = lightspeech.inference( | |
| input_ids=tf.expand_dims(tf.convert_to_tensor(input_ids, dtype=tf.int32), 0), | |
| speaker_ids=tf.convert_to_tensor( | |
| [processor.speakers_map[speaker_name]], dtype=tf.int32 | |
| ), | |
| speed_ratios=tf.convert_to_tensor([1.0], dtype=tf.float32), | |
| f0_ratios=tf.convert_to_tensor([1.0], dtype=tf.float32), | |
| energy_ratios=tf.convert_to_tensor([1.0], dtype=tf.float32), | |
| ) | |
| # Generate audio from mel-spectrogram | |
| audio = mb_melgan.inference(mel)[0, :, 0] | |
| # Save to file | |
| sf.write("output.wav", audio, 44100, "PCM_16") | |
| # Return the audio file for Gradio to play | |
| return "output.wav" | |
| # Create a Gradio interface | |
| iface = gr.Interface(fn=tts, inputs="text", outputs="audio") | |
| # Launch the interface | |
| iface.launch() | |