Commit ·
e28a050
1
Parent(s): b504537
Auto-pick voices by gender, sync voice picks to
Browse filesscript tags, add preview
Script prompt now requests a 'Character Genders:
' line which the parser strips out and uses to
pick matching-gender voices (Mom gets Cherry,
Wizard gets Chicago, etc.) without duplicates.
Adds a voice_selections state so changing a
Voice dropdown below the script live-updates every
'Speaker N - ...' tag in the editor. New
collapsible voice preview lets users sample each
voice before committing to a long generation.
app.py
CHANGED
|
@@ -236,7 +236,14 @@ CRITICAL — ONE SPEAKER PER TURN:
|
|
| 236 |
- NEVER embed another character's dialogue inside someone else's turn
|
| 237 |
- WRONG: "Speaker 1: We need magic. Mom: Hey kids, what's going on?"
|
| 238 |
- RIGHT: Every time the speaker changes, END the current turn, add a BLANK LINE, then start a NEW turn with "Speaker N:" on its own line
|
| 239 |
-
- Do NOT use character names as inline labels like "Mom:" or "Wizard:" mid-paragraph — always use "Speaker N:" on a fresh line
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 240 |
|
| 241 |
|
| 242 |
# Strip bracketed stage directions, parenthetical cues, and asterisk actions.
|
|
@@ -277,8 +284,59 @@ def sanitize_dialogue(text: str) -> str:
|
|
| 277 |
return text
|
| 278 |
|
| 279 |
|
| 280 |
-
|
| 281 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 282 |
system = SCRIPT_SYSTEM_PROMPT.format(max_words=SCRIPT_MAX_WORDS)
|
| 283 |
response = llm_client.chat_completion(
|
| 284 |
messages=[
|
|
@@ -297,6 +355,9 @@ def generate_script_from_prompt(prompt: str) -> tuple[list[dict], int, str]:
|
|
| 297 |
title = lines[0].split(":", 1)[1].strip()
|
| 298 |
raw = "\n".join(lines[1:])
|
| 299 |
|
|
|
|
|
|
|
|
|
|
| 300 |
turns = parse_script_to_turns(raw)
|
| 301 |
# Scrub stage directions from each turn, drop any turn that becomes empty
|
| 302 |
turns = [
|
|
@@ -311,7 +372,9 @@ def generate_script_from_prompt(prompt: str) -> tuple[list[dict], int, str]:
|
|
| 311 |
total_words = sum(len(t["text"].split()) for t in turns)
|
| 312 |
speaker_ids = {t["speaker"] for t in turns}
|
| 313 |
num_speakers = max(min(len(speaker_ids), 4), 1) if speaker_ids else 1
|
| 314 |
-
|
|
|
|
|
|
|
| 315 |
|
| 316 |
|
| 317 |
PARODY_SYSTEM_PROMPT = """You are a comedian narrator. The user will give you a scenario. Write a SHORT, funny behind-the-scenes narration of what's "really" happening while their audio is being generated. Be absurd, self-aware, and poke fun at AI.
|
|
@@ -543,6 +606,8 @@ def create_demo_interface():
|
|
| 543 |
turns_state = gr.State([])
|
| 544 |
script_title_state = gr.State("")
|
| 545 |
parody_lines_state = gr.State([]) # funny loading story for audio generation
|
|
|
|
|
|
|
| 546 |
|
| 547 |
# ---- BANNER ----
|
| 548 |
gr.HTML("""
|
|
@@ -595,8 +660,8 @@ def create_demo_interface():
|
|
| 595 |
duration_display = gr.HTML(value="")
|
| 596 |
|
| 597 |
with gr.Column(elem_classes="conversation-scroll"):
|
| 598 |
-
@gr.render(inputs=[turns_state,
|
| 599 |
-
def render_turns(turns,
|
| 600 |
if not turns:
|
| 601 |
gr.Markdown(
|
| 602 |
"Your conversation will appear here.\n\n"
|
|
@@ -606,12 +671,16 @@ def create_demo_interface():
|
|
| 606 |
)
|
| 607 |
return
|
| 608 |
|
| 609 |
-
#
|
|
|
|
| 610 |
speaker_choices = []
|
| 611 |
for i in range(4):
|
| 612 |
-
|
| 613 |
-
|
| 614 |
-
|
|
|
|
|
|
|
|
|
|
| 615 |
|
| 616 |
for idx, turn in enumerate(turns):
|
| 617 |
spk_num = turn["speaker"]
|
|
@@ -698,6 +767,39 @@ def create_demo_interface():
|
|
| 698 |
label="CFG Scale",
|
| 699 |
)
|
| 700 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 701 |
# ---- STEP 4: GENERATE ----
|
| 702 |
generate_btn = gr.Button(
|
| 703 |
"Generate Conference Audio", size="lg", variant="primary",
|
|
@@ -728,6 +830,19 @@ def create_demo_interface():
|
|
| 728 |
outputs=speaker_selections,
|
| 729 |
)
|
| 730 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 731 |
def add_turn(turns):
|
| 732 |
if len(turns) >= MAX_TURNS:
|
| 733 |
gr.Warning(f"Maximum {MAX_TURNS} turns reached.")
|
|
@@ -801,13 +916,13 @@ def create_demo_interface():
|
|
| 801 |
"One more revision, we promise...",
|
| 802 |
]
|
| 803 |
|
| 804 |
-
# outputs: turns, duration, status, title, audio, script_btn, gen_btn, parody, num_speakers, *4 voices
|
| 805 |
def _script_no_change(status_html):
|
| 806 |
return (gr.update(), gr.update(), status_html,
|
| 807 |
gr.update(), gr.update(),
|
| 808 |
gr.update(), gr.update(),
|
| 809 |
gr.update(),
|
| 810 |
-
gr.update(), *[gr.update()] * 4)
|
| 811 |
|
| 812 |
def _script_buttons_busy(status_html):
|
| 813 |
return (gr.update(), gr.update(), status_html,
|
|
@@ -815,7 +930,7 @@ def create_demo_interface():
|
|
| 815 |
gr.update(interactive=False, value="Writing..."),
|
| 816 |
gr.update(interactive=False),
|
| 817 |
gr.update(),
|
| 818 |
-
gr.update(), *[gr.update()] * 4)
|
| 819 |
|
| 820 |
def _script_buttons_ready(status_html=""):
|
| 821 |
return (gr.update(), gr.update(), status_html,
|
|
@@ -823,7 +938,7 @@ def create_demo_interface():
|
|
| 823 |
gr.update(interactive=True, value="Write Script with AI"),
|
| 824 |
gr.update(interactive=True),
|
| 825 |
gr.update(),
|
| 826 |
-
gr.update(), *[gr.update()] * 4)
|
| 827 |
|
| 828 |
def _make_title_html(title):
|
| 829 |
if title:
|
|
@@ -879,15 +994,20 @@ def create_demo_interface():
|
|
| 879 |
yield _script_buttons_ready(f"<em>Error: {msg[:200]}</em>")
|
| 880 |
return
|
| 881 |
|
| 882 |
-
turns, detected, title = result["data"]
|
| 883 |
if not turns:
|
| 884 |
yield _script_buttons_ready("<em>Empty result — try a more descriptive prompt.</em>")
|
| 885 |
return
|
| 886 |
|
| 887 |
-
|
|
|
|
| 888 |
while len(voices) < 4:
|
| 889 |
voices.append(None)
|
| 890 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 891 |
audio_label = title if title else AUDIO_LABEL_DEFAULT
|
| 892 |
yield (turns, estimate_duration(turns), "",
|
| 893 |
_make_title_html(title),
|
|
@@ -895,7 +1015,7 @@ def create_demo_interface():
|
|
| 895 |
gr.update(interactive=True, value="Write Script with AI"),
|
| 896 |
gr.update(interactive=True),
|
| 897 |
parody_result["lines"],
|
| 898 |
-
detected, *
|
| 899 |
|
| 900 |
generate_script_btn.click(
|
| 901 |
fn=on_generate_script,
|
|
@@ -904,13 +1024,13 @@ def create_demo_interface():
|
|
| 904 |
script_title_display, complete_audio_output,
|
| 905 |
generate_script_btn, generate_btn,
|
| 906 |
parody_lines_state,
|
| 907 |
-
num_speakers] + speaker_selections,
|
| 908 |
)
|
| 909 |
|
| 910 |
# --- Load examples ---
|
| 911 |
def load_example(idx):
|
| 912 |
if idx >= len(EXAMPLE_SCRIPTS):
|
| 913 |
-
return [], 2, "", "<h3 style='margin:0'>Script</h3>", gr.update(), *[None] * 4
|
| 914 |
|
| 915 |
title = example_names[idx]
|
| 916 |
script = EXAMPLE_SCRIPTS_NATURAL[idx]
|
|
@@ -924,14 +1044,15 @@ def create_demo_interface():
|
|
| 924 |
return (turns, num, estimate_duration(turns),
|
| 925 |
f"<h3 style='margin:0'>{title}</h3>",
|
| 926 |
gr.update(label=title),
|
| 927 |
-
*voices[:4])
|
| 928 |
|
| 929 |
for idx, btn in enumerate(example_buttons):
|
| 930 |
btn.click(
|
| 931 |
fn=lambda i=idx: load_example(i),
|
| 932 |
inputs=[],
|
| 933 |
outputs=[turns_state, num_speakers, duration_display,
|
| 934 |
-
script_title_display, complete_audio_output]
|
|
|
|
| 935 |
queue=False,
|
| 936 |
)
|
| 937 |
|
|
|
|
| 236 |
- NEVER embed another character's dialogue inside someone else's turn
|
| 237 |
- WRONG: "Speaker 1: We need magic. Mom: Hey kids, what's going on?"
|
| 238 |
- RIGHT: Every time the speaker changes, END the current turn, add a BLANK LINE, then start a NEW turn with "Speaker N:" on its own line
|
| 239 |
+
- Do NOT use character names as inline labels like "Mom:" or "Wizard:" mid-paragraph — always use "Speaker N:" on a fresh line
|
| 240 |
+
|
| 241 |
+
AFTER THE DIALOGUE — Character roster (REQUIRED):
|
| 242 |
+
- After the final dialogue turn, add a blank line, then a single line in this EXACT format:
|
| 243 |
+
Character Genders: Speaker 1: <F or M>, Speaker 2: <F or M>, Speaker 3: <F or M>, Speaker 4: <F or M>
|
| 244 |
+
- Only list speakers you actually used. Use "F" for feminine-presenting voices (women, girls, moms, queens, witches, female narrators) and "M" for masculine-presenting (men, boys, dads, kings, wizards-as-male, male narrators).
|
| 245 |
+
- For gender-ambiguous roles (robots, narrators, dragons), pick whichever fits the tone. Never use "N" or "?"
|
| 246 |
+
- Example: "Character Genders: Speaker 1: M, Speaker 2: M, Speaker 3: F" """
|
| 247 |
|
| 248 |
|
| 249 |
# Strip bracketed stage directions, parenthetical cues, and asterisk actions.
|
|
|
|
| 284 |
return text
|
| 285 |
|
| 286 |
|
| 287 |
+
_GENDER_LINE = re.compile(
|
| 288 |
+
r"character\s+genders\s*:\s*(.+?)$",
|
| 289 |
+
re.IGNORECASE | re.MULTILINE,
|
| 290 |
+
)
|
| 291 |
+
_GENDER_PAIR = re.compile(r"speaker\s+(\d+)\s*:\s*([FM])", re.IGNORECASE)
|
| 292 |
+
|
| 293 |
+
|
| 294 |
+
def _extract_genders(raw: str) -> tuple[str, dict[int, str]]:
|
| 295 |
+
"""Find and remove the 'Character Genders: ...' line. Returns (cleaned_text, genders_dict)."""
|
| 296 |
+
genders: dict[int, str] = {}
|
| 297 |
+
m = _GENDER_LINE.search(raw)
|
| 298 |
+
if not m:
|
| 299 |
+
return raw, genders
|
| 300 |
+
for pair in _GENDER_PAIR.finditer(m.group(1)):
|
| 301 |
+
try:
|
| 302 |
+
n = int(pair.group(1))
|
| 303 |
+
g = pair.group(2).upper()
|
| 304 |
+
if 1 <= n <= 4 and g in ("F", "M"):
|
| 305 |
+
genders[n] = g
|
| 306 |
+
except ValueError:
|
| 307 |
+
pass
|
| 308 |
+
cleaned = raw[: m.start()].rstrip() + "\n" + raw[m.end():].lstrip()
|
| 309 |
+
return cleaned, genders
|
| 310 |
+
|
| 311 |
+
|
| 312 |
+
def assign_voices_by_gender(genders: dict[int, str], num_speakers: int) -> list[str]:
|
| 313 |
+
"""Return a list of 4 voice-display strings, picking matching-gender voices without duplicates.
|
| 314 |
+
|
| 315 |
+
Falls back to DEFAULT_SPEAKERS_DISPLAY if no gender info for a slot.
|
| 316 |
+
"""
|
| 317 |
+
female_pool = [v for v in AVAILABLE_VOICES if VOICE_GENDERS.get(v) == "F"]
|
| 318 |
+
male_pool = [v for v in AVAILABLE_VOICES if VOICE_GENDERS.get(v) == "M"]
|
| 319 |
+
used: set[str] = set()
|
| 320 |
+
chosen: list[str] = []
|
| 321 |
+
|
| 322 |
+
for i in range(4):
|
| 323 |
+
slot = i + 1
|
| 324 |
+
if slot <= num_speakers:
|
| 325 |
+
g = genders.get(slot)
|
| 326 |
+
pool = female_pool if g == "F" else (male_pool if g == "M" else AVAILABLE_VOICES)
|
| 327 |
+
pick = next((v for v in pool if v not in used), None)
|
| 328 |
+
if pick is None:
|
| 329 |
+
# exhausted preferred pool — fall back to any unused voice
|
| 330 |
+
pick = next((v for v in AVAILABLE_VOICES if v not in used), AVAILABLE_VOICES[0])
|
| 331 |
+
used.add(pick)
|
| 332 |
+
chosen.append(f"{pick} ({VOICE_GENDERS.get(pick, '?')})")
|
| 333 |
+
else:
|
| 334 |
+
chosen.append(DEFAULT_SPEAKERS_DISPLAY[i] if i < len(DEFAULT_SPEAKERS_DISPLAY) else None)
|
| 335 |
+
return chosen
|
| 336 |
+
|
| 337 |
+
|
| 338 |
+
def generate_script_from_prompt(prompt: str) -> tuple[list[dict], int, str, list[str]]:
|
| 339 |
+
"""Returns (turns, num_speakers, title, voice_selections)."""
|
| 340 |
system = SCRIPT_SYSTEM_PROMPT.format(max_words=SCRIPT_MAX_WORDS)
|
| 341 |
response = llm_client.chat_completion(
|
| 342 |
messages=[
|
|
|
|
| 355 |
title = lines[0].split(":", 1)[1].strip()
|
| 356 |
raw = "\n".join(lines[1:])
|
| 357 |
|
| 358 |
+
# Extract and strip the "Character Genders:" line before parsing turns
|
| 359 |
+
raw, genders = _extract_genders(raw)
|
| 360 |
+
|
| 361 |
turns = parse_script_to_turns(raw)
|
| 362 |
# Scrub stage directions from each turn, drop any turn that becomes empty
|
| 363 |
turns = [
|
|
|
|
| 372 |
total_words = sum(len(t["text"].split()) for t in turns)
|
| 373 |
speaker_ids = {t["speaker"] for t in turns}
|
| 374 |
num_speakers = max(min(len(speaker_ids), 4), 1) if speaker_ids else 1
|
| 375 |
+
|
| 376 |
+
voice_selections = assign_voices_by_gender(genders, num_speakers)
|
| 377 |
+
return turns, num_speakers, title, voice_selections
|
| 378 |
|
| 379 |
|
| 380 |
PARODY_SYSTEM_PROMPT = """You are a comedian narrator. The user will give you a scenario. Write a SHORT, funny behind-the-scenes narration of what's "really" happening while their audio is being generated. Be absurd, self-aware, and poke fun at AI.
|
|
|
|
| 606 |
turns_state = gr.State([])
|
| 607 |
script_title_state = gr.State("")
|
| 608 |
parody_lines_state = gr.State([]) # funny loading story for audio generation
|
| 609 |
+
# Current voice selection per speaker slot (list of 4 display strings like "Cherry (F)")
|
| 610 |
+
voice_selections_state = gr.State(list(DEFAULT_SPEAKERS_DISPLAY))
|
| 611 |
|
| 612 |
# ---- BANNER ----
|
| 613 |
gr.HTML("""
|
|
|
|
| 660 |
duration_display = gr.HTML(value="")
|
| 661 |
|
| 662 |
with gr.Column(elem_classes="conversation-scroll"):
|
| 663 |
+
@gr.render(inputs=[turns_state, voice_selections_state])
|
| 664 |
+
def render_turns(turns, voice_sels):
|
| 665 |
if not turns:
|
| 666 |
gr.Markdown(
|
| 667 |
"Your conversation will appear here.\n\n"
|
|
|
|
| 671 |
)
|
| 672 |
return
|
| 673 |
|
| 674 |
+
# Build speaker choice labels from the CURRENT voice selections,
|
| 675 |
+
# so changing a Voice dropdown below propagates to the tags above.
|
| 676 |
speaker_choices = []
|
| 677 |
for i in range(4):
|
| 678 |
+
sel = voice_sels[i] if voice_sels and i < len(voice_sels) else None
|
| 679 |
+
if sel:
|
| 680 |
+
# sel looks like "Cherry (F)" — display directly
|
| 681 |
+
speaker_choices.append(f"Speaker {i+1} - {sel}")
|
| 682 |
+
else:
|
| 683 |
+
speaker_choices.append(f"Speaker {i+1}")
|
| 684 |
|
| 685 |
for idx, turn in enumerate(turns):
|
| 686 |
spk_num = turn["speaker"]
|
|
|
|
| 767 |
label="CFG Scale",
|
| 768 |
)
|
| 769 |
|
| 770 |
+
# ---- Voice preview ----
|
| 771 |
+
with gr.Accordion("🔊 Preview voices before generating", open=False):
|
| 772 |
+
with gr.Row():
|
| 773 |
+
preview_voice = gr.Dropdown(
|
| 774 |
+
choices=VOICE_DISPLAY,
|
| 775 |
+
value=VOICE_DISPLAY[0] if VOICE_DISPLAY else None,
|
| 776 |
+
label="Pick a voice",
|
| 777 |
+
scale=2,
|
| 778 |
+
)
|
| 779 |
+
preview_audio = gr.Audio(
|
| 780 |
+
label="Sample",
|
| 781 |
+
value=os.path.join("public", "voices", f"{AVAILABLE_VOICES[0]}.mp3") if AVAILABLE_VOICES else None,
|
| 782 |
+
autoplay=False,
|
| 783 |
+
show_download_button=False,
|
| 784 |
+
scale=3,
|
| 785 |
+
)
|
| 786 |
+
|
| 787 |
+
def _load_preview(display: str):
|
| 788 |
+
name = voice_display_to_name(display) if display else None
|
| 789 |
+
if not name:
|
| 790 |
+
return gr.update(value=None)
|
| 791 |
+
path = os.path.join("public", "voices", f"{name}.mp3")
|
| 792 |
+
if not os.path.exists(path):
|
| 793 |
+
return gr.update(value=None)
|
| 794 |
+
return gr.update(value=path)
|
| 795 |
+
|
| 796 |
+
preview_voice.change(
|
| 797 |
+
fn=_load_preview,
|
| 798 |
+
inputs=[preview_voice],
|
| 799 |
+
outputs=[preview_audio],
|
| 800 |
+
queue=False,
|
| 801 |
+
)
|
| 802 |
+
|
| 803 |
# ---- STEP 4: GENERATE ----
|
| 804 |
generate_btn = gr.Button(
|
| 805 |
"Generate Conference Audio", size="lg", variant="primary",
|
|
|
|
| 830 |
outputs=speaker_selections,
|
| 831 |
)
|
| 832 |
|
| 833 |
+
# Two-way sync: when a Voice dropdown changes, update voice_selections_state
|
| 834 |
+
# so the script turn tags re-render with the new voice label.
|
| 835 |
+
def _sync_voice_state(*voices):
|
| 836 |
+
return list(voices)
|
| 837 |
+
|
| 838 |
+
for sel in speaker_selections:
|
| 839 |
+
sel.change(
|
| 840 |
+
fn=_sync_voice_state,
|
| 841 |
+
inputs=speaker_selections,
|
| 842 |
+
outputs=[voice_selections_state],
|
| 843 |
+
queue=False,
|
| 844 |
+
)
|
| 845 |
+
|
| 846 |
def add_turn(turns):
|
| 847 |
if len(turns) >= MAX_TURNS:
|
| 848 |
gr.Warning(f"Maximum {MAX_TURNS} turns reached.")
|
|
|
|
| 916 |
"One more revision, we promise...",
|
| 917 |
]
|
| 918 |
|
| 919 |
+
# outputs: turns, duration, status, title, audio, script_btn, gen_btn, parody, num_speakers, *4 voices, voice_selections_state
|
| 920 |
def _script_no_change(status_html):
|
| 921 |
return (gr.update(), gr.update(), status_html,
|
| 922 |
gr.update(), gr.update(),
|
| 923 |
gr.update(), gr.update(),
|
| 924 |
gr.update(),
|
| 925 |
+
gr.update(), *[gr.update()] * 4, gr.update())
|
| 926 |
|
| 927 |
def _script_buttons_busy(status_html):
|
| 928 |
return (gr.update(), gr.update(), status_html,
|
|
|
|
| 930 |
gr.update(interactive=False, value="Writing..."),
|
| 931 |
gr.update(interactive=False),
|
| 932 |
gr.update(),
|
| 933 |
+
gr.update(), *[gr.update()] * 4, gr.update())
|
| 934 |
|
| 935 |
def _script_buttons_ready(status_html=""):
|
| 936 |
return (gr.update(), gr.update(), status_html,
|
|
|
|
| 938 |
gr.update(interactive=True, value="Write Script with AI"),
|
| 939 |
gr.update(interactive=True),
|
| 940 |
gr.update(),
|
| 941 |
+
gr.update(), *[gr.update()] * 4, gr.update())
|
| 942 |
|
| 943 |
def _make_title_html(title):
|
| 944 |
if title:
|
|
|
|
| 994 |
yield _script_buttons_ready(f"<em>Error: {msg[:200]}</em>")
|
| 995 |
return
|
| 996 |
|
| 997 |
+
turns, detected, title, voice_picks = result["data"]
|
| 998 |
if not turns:
|
| 999 |
yield _script_buttons_ready("<em>Empty result — try a more descriptive prompt.</em>")
|
| 1000 |
return
|
| 1001 |
|
| 1002 |
+
# voice_picks is a list of 4 display strings from assign_voices_by_gender
|
| 1003 |
+
voices = list(voice_picks)
|
| 1004 |
while len(voices) < 4:
|
| 1005 |
voices.append(None)
|
| 1006 |
|
| 1007 |
+
# Strip "Speaker N - " style prefixes so the Voice dropdowns get clean values.
|
| 1008 |
+
# assign_voices_by_gender already returns display strings like "Cherry (F)".
|
| 1009 |
+
clean_voices = voices[:4]
|
| 1010 |
+
|
| 1011 |
audio_label = title if title else AUDIO_LABEL_DEFAULT
|
| 1012 |
yield (turns, estimate_duration(turns), "",
|
| 1013 |
_make_title_html(title),
|
|
|
|
| 1015 |
gr.update(interactive=True, value="Write Script with AI"),
|
| 1016 |
gr.update(interactive=True),
|
| 1017 |
parody_result["lines"],
|
| 1018 |
+
detected, *clean_voices, clean_voices)
|
| 1019 |
|
| 1020 |
generate_script_btn.click(
|
| 1021 |
fn=on_generate_script,
|
|
|
|
| 1024 |
script_title_display, complete_audio_output,
|
| 1025 |
generate_script_btn, generate_btn,
|
| 1026 |
parody_lines_state,
|
| 1027 |
+
num_speakers] + speaker_selections + [voice_selections_state],
|
| 1028 |
)
|
| 1029 |
|
| 1030 |
# --- Load examples ---
|
| 1031 |
def load_example(idx):
|
| 1032 |
if idx >= len(EXAMPLE_SCRIPTS):
|
| 1033 |
+
return [], 2, "", "<h3 style='margin:0'>Script</h3>", gr.update(), *[None] * 4, list(DEFAULT_SPEAKERS_DISPLAY)
|
| 1034 |
|
| 1035 |
title = example_names[idx]
|
| 1036 |
script = EXAMPLE_SCRIPTS_NATURAL[idx]
|
|
|
|
| 1044 |
return (turns, num, estimate_duration(turns),
|
| 1045 |
f"<h3 style='margin:0'>{title}</h3>",
|
| 1046 |
gr.update(label=title),
|
| 1047 |
+
*voices[:4], voices[:4])
|
| 1048 |
|
| 1049 |
for idx, btn in enumerate(example_buttons):
|
| 1050 |
btn.click(
|
| 1051 |
fn=lambda i=idx: load_example(i),
|
| 1052 |
inputs=[],
|
| 1053 |
outputs=[turns_state, num_speakers, duration_display,
|
| 1054 |
+
script_title_display, complete_audio_output]
|
| 1055 |
+
+ speaker_selections + [voice_selections_state],
|
| 1056 |
queue=False,
|
| 1057 |
)
|
| 1058 |
|