Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| import os | |
| import chardet | |
| import time | |
| from core.summarizer import NarrativeSummarizer | |
| # Models available | |
| MODEL_OPTIONS = [ | |
| "facebook/bart-large-cnn", | |
| "sshleifer/distilbart-cnn-12-6", | |
| "google/pegasus-cnn_dailymail", | |
| "google/long-t5-local-base", | |
| "t5-small", | |
| "t5-base", | |
| "mistralai/Mistral-7B-v0.1", | |
| "Lewdiculous/Captain-Eris_Violet-V0.420-12B-GGUF-ARM-Imatrix", | |
| "Lewdiculous/Lumimaid-v0.2-12B-GGUF-IQ-Imatrix", | |
| "Lewdiculous/Lumimaid-v0.2-8B-GGUF-IQ-Imatrix", | |
| "Lewdiculous/L3-8B-Stheno-v3.1-GGUF-IQ-Imatrix", | |
| "Lewdiculous/Llama-3-Lumimaid-8B-v0.1-OAS-GGUF-IQ-Imatrix", | |
| "Lewdiculous/Nyanade_Stunna-Maid-7B-v0.2-GGUF-IQ-Imatrix", | |
| "Lewdiculous/InfinityRP-v1-7B-GGUF-IQ-Imatrix", | |
| "Lewdiculous/Kunoichi-DPO-v2-7B-GGUF-Imatrix", | |
| "Lewdiculous/BuRP_7B-GGUF-IQ-Imatrix", | |
| "Lewdiculous/Layris_9B-GGUF-IQ-Imatrix" | |
| ] | |
| # Prompt options | |
| PROMPT_OPTIONS = [ | |
| "Bread only", | |
| "Butter only", | |
| "Bread and Butter", | |
| "Custom Prompt" | |
| ] | |
| def run_app(file_obj, text_input, model_name, local_model_path, prompt_type, custom_prompt_text, iterations, batch_size, progress=gr.Progress()): | |
| start_time = time.time() | |
| # Check if custom prompt is selected but not provided | |
| if prompt_type == "Custom Prompt" and not custom_prompt_text: | |
| return "β Error: 'Custom Prompt' selected but no custom prompt provided.", "", "", None | |
| # Determine the input source: file or direct text | |
| if file_obj is not None: | |
| progress(0, desc="Reading file and detecting encoding...") | |
| try: | |
| with open(file_obj.name, 'rb') as f: | |
| raw_data = f.read() | |
| detected = chardet.detect(raw_data) | |
| encoding = detected['encoding'] or 'utf-8' | |
| text = raw_data.decode(encoding, errors='replace') | |
| except Exception as e: | |
| return f"β Unable to read the file: {str(e)}", "", "", None | |
| elif text_input: | |
| text = text_input | |
| else: | |
| return "β Please upload a file or enter text to summarize.", "", "", None | |
| input_word_count = len(text.split()) | |
| # Override model name with local path if provided | |
| actual_model_name = local_model_path if local_model_path else model_name | |
| # Instantiate the summarizer and process the text | |
| try: | |
| progress(0.1, desc=f"Loading model: {actual_model_name}...") | |
| summarizer = NarrativeSummarizer(model_name=actual_model_name) | |
| chunks = summarizer.chunk_text_tokenwise(text, max_tokens=512, overlap=50) | |
| total_chunks = len(chunks) | |
| log_messages = [ | |
| f"β Input ready. Word Count: {input_word_count}", | |
| f"β Using model: {actual_model_name}", | |
| f"β Split into {total_chunks} chunks.", | |
| f"β Beginning summarization with {iterations} passes..." | |
| ] | |
| condensed_chunks = [] | |
| for i in range(0, total_chunks, batch_size): | |
| batch = chunks[i:i + batch_size] | |
| progress(i / total_chunks, desc=f"Processing batch {i // batch_size + 1} of {total_chunks // batch_size + 1}...") | |
| # This is where the actual summarization happens for a single batch | |
| for _ in range(iterations): | |
| batch_summaries = summarizer.summarize_batch( | |
| batch, | |
| prompt_type, | |
| custom_prompt_text if prompt_type == "Custom Prompt" else None | |
| ) | |
| batch = batch_summaries | |
| condensed_chunks.extend(batch) | |
| log_messages.append(f"β Pass 1 complete. Combining summaries...") | |
| # Second pass for global compression | |
| combined = " ".join(condensed_chunks) | |
| final_summary = combined | |
| # Check if the combined text is large enough for a final summary | |
| if len(summarizer.tokenizer.encode(combined)) > summarizer.tokenizer.model_max_length * 0.8: | |
| log_messages.append("β Final text is large, performing global summarization...") | |
| final_summary = summarizer.summarize_batch( | |
| [combined], | |
| prompt_type, | |
| custom_prompt_text if prompt_type == "Custom Prompt" else None | |
| )[0] | |
| end_time = time.time() | |
| duration = round(end_time - start_time, 2) | |
| summary_word_count = len(final_summary.split()) | |
| log_messages.append(f"β Summarization complete in {duration} seconds.") | |
| log_messages.append(f"β Final Summary Word Count: {summary_word_count}") | |
| # Gradio now handles file downloads | |
| output_file_path = "summary.txt" | |
| with open(output_file_path, "w", encoding="utf-8") as f: | |
| f.write(final_summary) | |
| return final_summary, "β \n" + "\n".join(log_messages), output_file_path, gr.Button(value="Summarize", interactive=True) | |
| except Exception as e: | |
| log_messages.append(f"β An error occurred: {str(e)}") | |
| return f"An error occurred during summarization: {str(e)}", "\n".join(log_messages), None, gr.Button(value="Summarize", interactive=True) | |
| # Gradio Interface | |
| model_tip = gr.Markdown( | |
| """ | |
| **Model Selection Tips:** | |
| - **facebook/bart-large-cnn:** Fast, general-purpose summarization for short to medium texts. | |
| - **sshleifer/distilbart-cnn-12-6:** A smaller, faster version of BART. | |
| - **google/pegasus-cnn_dailymail:** Excellent for abstractive summarization. | |
| - **google/long-t5-local-base:** Designed for long documents; better context handling. | |
| - **t5-small/t5-base:** Efficient and versatile for various tasks. | |
| - **mistralai/Mistral-7B-v0.1:** High-quality nuanced summaries; resource-intensive. | |
| - **Lewdiculous/Captain-Eris_Violet-V0.420-12B-GGUF-ARM-Imatrix:** Great for **world-building and immersive roleplay** with coherent, dynamic narratives. | |
| - **Lewdiculous/Lumimaid-v0.2-12B-GGUF-IQ-Imatrix:** High-quality **world generation** with **creative control** for better safety and versatility. | |
| - **Lewdiculous/Lumimaid-v0.2-8B-GGUF-IQ-Imatrix:** Versatile model for **roleplay and world-building**, with an emphasis on creative flexibility. | |
| - **Lewdiculous/L3-8B-Stheno-v3.1-GGUF-IQ-Imatrix:** **Optimized for 1-on-1 roleplay**, handling **scenarios, RPGs**, and **storywriting**. | |
| - **Lewdiculous/Llama-3-Lumimaid-8B-v0.1-OAS-GGUF-IQ-Imatrix:** **Versatile for roleplay** with **unrestricted generation**, perfect for dynamic worlds. | |
| - **Lewdiculous/Nyanade_Stunna-Maid-7B-v0.2-GGUF-IQ-Imatrix:** **Multimodal model** with **vision capabilities**, ideal for **image-based and text-based roleplay**. | |
| - **Lewdiculous/InfinityRP-v1-7B-GGUF-IQ-Imatrix:** Designed for a **cozy roleplay experience** with reduced hallucinations, focused on more grounded conversations. | |
| - **Lewdiculous/Kunoichi-DPO-v2-7B-GGUF-Imatrix:** **Small, smart model** for **quick roleplay interactions** with thoughtful responses. | |
| - **Lewdiculous/BuRP_7B-GGUF-IQ-Imatrix:** Great for **unconventional roleplay** with **minimal refusals** and the ability to handle unique dialogue formats. | |
| - **Lewdiculous/Layris_9B-GGUF-IQ-Imatrix:** A **merged model** with **chaotic and unhinged creativity**, perfect for **dynamic and free-form roleplay**. | |
| """ | |
| ) | |
| with gr.Blocks(css="#status-log { overflow-y: scroll; max-height: 200px; }") as demo: | |
| gr.Markdown("# Narrative Summarizer") | |
| gr.Markdown("Upload your text file OR enter text below to get a summarized version.") | |
| with gr.Row(): | |
| file_input = gr.File(label="Upload Text File (.txt)", file_types=['.txt']) | |
| text_input = gr.Textbox(label="Or, paste your text here", lines=10) | |
| gr.Markdown("---") | |
| with gr.Accordion("Model & Prompt Settings", open=True): | |
| with gr.Row(): | |
| model_dropdown = gr.Dropdown(choices=MODEL_OPTIONS, label="Choose Model", value=MODEL_OPTIONS[0]) | |
| local_model_path_input = gr.Textbox(label="Local Model Path (optional)") | |
| model_tip.render() | |
| with gr.Row(): | |
| prompt_dropdown = gr.Dropdown(choices=PROMPT_OPTIONS, label="Choose Prompt Type", value=PROMPT_OPTIONS[0]) | |
| custom_prompt_input = gr.Textbox(label="Custom Prompt (use {chunk} placeholder)") | |
| gr.Markdown("---") | |
| with gr.Accordion("Advanced Parameters", open=False): | |
| with gr.Row(): | |
| iterations_slider = gr.Slider(minimum=1, maximum=5, step=1, label="Summarization Iterations", value=1) | |
| batch_size_slider = gr.Slider(minimum=1, maximum=8, step=1, label="Batch Size (for GPU)", value=4) | |
| summarize_button = gr.Button("Summarize") | |
| with gr.Row(): | |
| output_text = gr.Textbox(label="Summary Output", lines=15) | |
| status_log = gr.Textbox(label="Process Log", lines=15, interactive=False, elem_id="status-log") | |
| download_button = gr.File(label="Download Summary", file_types=['.txt']) | |
| def update_ui_on_click(): | |
| return gr.Button(interactive=False) | |
| summarize_button.click( | |
| fn=update_ui_on_click, | |
| outputs=summarize_button | |
| ).then( | |
| fn=run_app, | |
| inputs=[file_input, text_input, model_dropdown, local_model_path_input, prompt_dropdown, custom_prompt_input, iterations_slider, batch_size_slider], | |
| outputs=[output_text, status_log, download_button, summarize_button] | |
| ) | |
| demo.launch() |