| |
| import numpy as np |
| import argparse |
| import os |
| import re |
| import requests |
| from pathlib import Path |
| Path('out/').mkdir(parents=True, exist_ok=True) |
|
|
| |
| |
| |
| |
| |
| |
|
|
|
|
| def alpha_num(f): |
| f = re.sub(' +', ' ', f) |
| f = re.sub(r'[^A-Za-z0-9 ]+', '', f) |
| return f |
|
|
|
|
| def command_line_args(): |
| parser = argparse.ArgumentParser( |
| formatter_class=argparse.ArgumentDefaultsHelpFormatter |
| ) |
| parser.add_argument( |
| '--affective', |
| help="Select Emotional or non-emotional variant of Available voices: https://audeering.github.io/shift/", |
| action='store_false', |
| ) |
| parser.add_argument( |
| '--device', |
| help="Device ID", |
| type=str, |
| default='cpu', |
| ) |
| parser.add_argument( |
| '--text', |
| help="Text to be synthesized.", |
| default='sample.txt', |
| type=str, |
| ) |
| parser.add_argument( |
| '--soundscape', |
| help='soundscape - MUST BE IN BRACKETS: \"forest\"', |
| default=None, |
| nargs='?', |
| type=str, |
| const='wind fjord', |
| ) |
| parser.add_argument( |
| '--native', |
| help=""" |
| --native: (without argument) a flag to do voice cloning using the speech from --video, |
| --native my_voice.wav: Voice cloning from user provided audio""", |
| |
| |
| |
| ) |
| parser.add_argument( |
| '--voice', |
| help="TTS voice - Available voices: https://audeering.github.io/shift/", |
| default="en_US/m-ailabs_low#judy_bieber", |
| type=str, |
| ) |
| parser.add_argument( |
| '--image', |
| help="If provided is set as background for output video, see --text", |
| type=str, |
| ) |
| parser.add_argument( |
| '--video', |
| help="Video file for video translation. Voice cloned from the video", |
| type=str, |
| ) |
| parser.add_argument( |
| '--out_file', |
| help="Output file name.", |
| type=str, |
| default=None |
| ) |
| parser.add_argument( |
| '--speed', |
| help='speec of TTS (only used in Non English voices).', |
| type=str, |
| default=1.44, |
| ) |
| return parser |
|
|
| def send_to_server(args): |
| url = "http://192.168.88.209:5000" |
|
|
| |
|
|
| payload = { |
| 'affective': args.affective, |
| 'voice': args.voice, |
| 'soundscape': args.soundscape if args.soundscape != '' else None, |
| 'native': args.native, |
| 'text': args.text, |
| 'image': args.image, |
| 'video': args.video, |
| 'speed': args.speed, |
| |
| |
| } |
|
|
| |
| |
| text_file = open(args.text, 'rb') |
|
|
| image_file, video_file, native_file = None, None, None |
| if args.image is not None: |
| print('\nLOADING IMAGE\n') |
| try: |
| image_file = open(args.image, 'rb') |
| except FileNotFoundError: |
| pass |
|
|
| if args.video is not None: |
| print('\nLOADING vid\n') |
| try: |
| video_file = open(args.video, 'rb') |
| except FileNotFoundError: |
| pass |
|
|
| if args.native is not None: |
| print('\nLOADING natv\n') |
| try: |
| native_file = open(args.native, 'rb') |
| except FileNotFoundError: |
| pass |
| |
| |
|
|
| response = requests.post(url, data=payload, |
| files=[(args.text, text_file), |
| (args.image, image_file), |
| (args.video, video_file), |
| (args.native, native_file)]) |
|
|
| |
| |
| |
| |
|
|
| |
| |
| |
| |
| return response |
|
|
|
|
| def cli(): |
| parser = command_line_args() |
| args = parser.parse_args() |
| |
| if args.out_file is None: |
| vid = alpha_num(args.video) if args.video else f'{np.random.rand()*1e7}'[:6] |
| args.out_file = alpha_num(args.text) + '_' + alpha_num(args.voice) + '_' + vid |
| response = send_to_server(args) |
| |
| with open( |
| |
| './out/' + args.out_file + '.' + response.headers['suffix-file-type'].split('.')[-1], |
| 'wb' |
| ) as f: |
| f.write(response.content) |
| |
|
|
|
|
| if __name__ == '__main__': |
| cli() |
|
|
| |
| |
|
|