Spaces:

Gregniuki
/

Context-Translator-EN-PL-EN

Sleeping

App Files Files Community

Context-Translator-EN-PL-EN / app.py

Gregniuki

Update app.py

a12e1e4 verified 2 months ago

raw

history blame contribute delete

6.32 kB

	import sys
	import os
	import re
	import gradio as gr
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
	from threading import Thread
	from huggingface_hub import login
	from dotenv import load_dotenv

	# --- FIX: Add project root to Python's path ---
	project_root = os.path.dirname(os.path.abspath(__file__))
	sys.path.insert(0, project_root)

	# --- Updated Spaces import for Zero-GPU compatibility ---
	try:
	import spaces
	print("'spaces' module imported successfully.")
	except ImportError:
	print("Warning: 'spaces' module not found. Using dummy decorator for local execution.")
	class DummySpaces:
	def GPU(self, args, *kwargs):
	def decorator(func):
	print(f"Note: Dummy @GPU decorator used for function '{func.__name__}'.")
	return func
	return decorator
	spaces = DummySpaces()

	# --- Step 1: Hugging Face Authentication ---
	load_dotenv()
	HF_TOKEN = os.getenv("HF_TOKEN")
	if not HF_TOKEN:
	raise ValueError("FATAL: Hugging Face token not found. Please set the HF_TOKEN environment variable.")
	print("--- Logging in to Hugging Face Hub ---")
	login(token=HF_TOKEN)

	# --- Step 2: Initialize Model and Tokenizer ---
	MODEL_NAME = "Gregniuki/ERNIE-4.5-0.3B-PT-Translator-EN-PL-EN"
	print(f"--- Loading model from Hugging Face Hub: {MODEL_NAME} ---")

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	dtype = torch.bfloat16 if device.type == "cuda" else torch.float32
	print(f"--- Using device: {device}, dtype: {dtype} ---")

	try:
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
	model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=dtype, trust_remote_code=True).to(device)
	model.eval()
	print("--- Model and Tokenizer Loaded Successfully ---")
	except Exception as e:
	raise RuntimeError(f"FATAL: Could not load components. Error: {e}")

	# --- Helper Functions ---
	def chunk_text(text: str, max_size: int) -> list[str]:
	if not text: return []
	chunks, start_index = [], 0
	while start_index < len(text):
	end_index = start_index + max_size
	if end_index >= len(text):
	chunks.append(text[start_index:]); break
	split_pos = text.rfind('.', start_index, end_index)
	if split_pos != -1:
	chunk, start_index = text[start_index : split_pos + 1], split_pos + 1
	else:
	chunk, start_index = text[start_index:end_index], end_index
	chunks.append(chunk.strip())
	return [c for c in chunks if c]

	# --- Step 3: Core Translation Function (Now with Token-by-Token Streaming) ---
	@spaces.GPU
	@torch.no_grad()
	def translate_with_chunks(input_text: str, chunk_size: int, temperature: float, top_p: float, top_k: int, progress=gr.Progress()) -> str:
	"""
	Processes text by translating each chunk independently and streams the
	results back token-by-token for a smooth, real-time user experience.
	"""
	progress(0, desc="Starting...")
	if not input_text:
	yield "Input text is empty. Please enter some text to translate."
	return

	text_chunks = chunk_text(input_text, chunk_size) if len(input_text) > chunk_size else [input_text]
	num_chunks = len(text_chunks)
	print(f"Processing {num_chunks} independent chunk(s).")

	full_output = ""
	for i, chunk in enumerate(text_chunks):
	progress(0.1 + (i / num_chunks) * 0.9, desc=f"Translating chunk {i+1}/{num_chunks}")

	messages = [{"role": "user", "content": chunk}]
	prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
	model_inputs = tokenizer([prompt], add_special_tokens=False, return_tensors="pt").to(device)

	# Use TextIteratorStreamer for real-time token generation
	streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

	# Set up generation arguments
	generation_kwargs = dict(
	**model_inputs,
	streamer=streamer,
	max_new_tokens=2048,
	do_sample=True,
	temperature=temperature,
	top_p=top_p,
	top_k=top_k
	)

	# Run the generation in a separate thread to avoid blocking the UI
	thread = Thread(target=model.generate, kwargs=generation_kwargs)
	thread.start()

	# Yield new tokens as they are generated
	for new_token in streamer:
	full_output += new_token
	yield full_output

	# Add a space after each chunk for better readability
	full_output += " "
	yield full_output.strip()

	progress(1.0, desc="Done!")


	# --- Step 4: Create and Launch the Gradio App ---
	print("\n--- Initializing Gradio Interface ---")
	app = gr.Interface(
	fn=translate_with_chunks,
	inputs=[
	gr.Textbox(lines=15, label="Input Text", placeholder="Enter long text to process here..."),
	gr.Slider(
	minimum=256,
	maximum=2048,
	value=2048,
	step=64,
	label="Character Chunk Size",
	info="Text will be split into chunks of this size for translation."
	),
	gr.Slider(
	minimum=0.01, # Temperature cannot be 0 for sampling
	maximum=2.0,
	value=0.7,
	step=0.01,
	label="Temperature",
	info="Controls randomness. Higher values mean more random outputs."
	),
	gr.Slider(
	minimum=0.0,
	maximum=1.0,
	value=0.95,
	step=0.05,
	label="Top-p (Nucleus Sampling)",
	info="Selects from tokens with a cumulative probability mass up to this value."
	),
	gr.Slider(
	minimum=0,
	maximum=100,
	value=50,
	step=1,
	label="Top-k",
	info="Selects from the top 'k' most likely tokens at each step."
	)
	],
	outputs=gr.Textbox(lines=15, label="Model Output", interactive=False),
	title="ERNIE 4.5 Text Translator (Real-Time Streaming)",
	description="Processes long text by splitting it into independent chunks and streams the translation in real-time.",
	allow_flagging="never"
	)

	if __name__ == "__main__":
	app.queue().launch()