import gradio as gr
import spacy
from spacy.cli import download
from transformers import T5Tokenizer, AutoModelForSeq2SeqLM, pipeline

# 🛠 Ensure spaCy model is available
try:
    nlp_spacy = spacy.load("en_core_web_sm")
except OSError:
    download("en_core_web_sm")
    nlp_spacy = spacy.load("en_core_web_sm")

# Load T5 model for question generation
model_name = "valhalla/t5-base-e2e-qg"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
qg_pipeline = pipeline("text2text-generation", model=model, tokenizer=tokenizer)

def extract_answers(context):
    doc = nlp_spacy(context)
    answers = list(set([ent.text for ent in doc.ents]))
    if not answers:
        answers = list(set([chunk.text for chunk in doc.noun_chunks if len(chunk.text.split()) > 1]))
    return answers[:5]

def generate_questions_answers(context):
    answers = extract_answers(context)
    qa_pairs = []
    
    for ans in answers:
        input_text = f"answer: {ans}  context: {context}"
        result = qg_pipeline(input_text, max_length=128, do_sample=True, temperature=0.7)
        question = result[0]['generated_text']
        qa_pairs.append(f"Q: {question}\nA: {ans}")
    
    return "\n\n".join(qa_pairs) if qa_pairs else "Could not generate any QA pairs."

iface = gr.Interface(
    fn=generate_questions_answers,
    inputs=gr.Textbox(lines=6, label="Paste Paragraph"),
    outputs="textbox",
    title="Q&A Generator from Paragraph",
    description="Get both questions and answers using spaCy + T5!"
)

iface.queue()
iface.launch()