Text-to-Audio
Audiocraft
English
audiogen
styletts2
shift-tts
sound
audio-generation
text-to-speech
mimic3
Instructions to use dkounadis/artificial-styletts2 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Audiocraft
How to use dkounadis/artificial-styletts2 with Audiocraft:
from audiocraft.models import AudioGen model = AudioGen.get_pretrained("dkounadis/artificial-styletts2") model.set_generation_params(duration=5) # generate 5 seconds. descriptions = ['dog barking', 'sirene of an emergency vehicle', 'footsteps in a corridor'] wav = model.generate(descriptions) # generates 3 samples. - Notebooks
- Google Colab
- Kaggle
| import numpy as np | |
| import soundfile | |
| import msinference # Prefer live_demo.py instead as this demo.py has no split to sentences to prevent OOM | |
| from audiocraft.builders import AudioGen # fixed bug for repeated calls | |
| def tts_entry(text='A quick brown fox jumps over the lazy dog. Sweet dreams are made of this, I traveled the world and the seven seas.', | |
| voice='en_US/m-ailabs_low#mary_ann', # Listen to voices https://huggingface.co/dkounadis/artificial-styletts2/discussions/1 | |
| soundscape = 'birds fomig'): # purposeful spells for AudioGen (behaves as controllable top-p) | |
| if ('en_US/' in voice) or ('en_UK/' in voice): | |
| style_vector = msinference.compute_style('assets/wavs/style_vector/' + voice.replace( | |
| '/', '_').replace('#', '_').replace( | |
| 'cmu-arctic', 'cmu_arctic').replace( | |
| '_low', '') + '.wav') | |
| x = msinference.inference(text, style_vector) | |
| elif '_' in voice: | |
| style_vector = msinference.compute_style('assets/wavs/mimic3_foreign_4x/' + voice.replace( | |
| '/', '_').replace('#', '_').replace( | |
| 'cmu-arctic', 'cmu_arctic').replace( | |
| '_low', '') + '.wav') | |
| x = msinference.inference(text, style_vector) | |
| else: | |
| x = msinference.foreign(text=text, lang=voice) | |
| x /= 1.02 * np.abs(x).max() + 1e-7 # volume amplify to [-1,1] | |
| if soundscape is not None: | |
| sound_gen = AudioGen().to('cuda:0').eval() | |
| background = sound_gen.generate(soundscape, duration=len(x)/16000 + .74, # sound duration in seconds | |
| ).detach().cpu().numpy() | |
| x = .6 * x + .4 * background[:len(x)] | |
| return x | |
| soundfile.write(f'demo.wav', tts_entry(), 16000) | |