from PyPDF2 import PdfReader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_groq import ChatGroq from langchain.chains import RetrievalQA def process_pdf(file_path: str): reader = PdfReader(file_path) text = "\n".join([page.extract_text() or "" for page in reader.pages]) splitter = RecursiveCharacterTextSplitter( chunk_size=1000, chunk_overlap=200, length_function=len ) return splitter.split_text(text) def get_groq_response(query, vector_db, model_name="mixtral-8x7b-32768"): try: # Updated to use current LLaMA3 model (replace with latest from Groq docs) llm = ChatGroq( temperature=0.1, model_name="llama3-70b-8192", # Current recommended model max_tokens=2048 ) qa = RetrievalQA.from_chain_type( llm=llm, chain_type="stuff", retriever=vector_db.as_retriever(search_kwargs={"k": 4}) ) return qa.run(query) except Exception as e: # Provide helpful error message error_msg = str(e) if "model_decommissioned" in error_msg: return "Error: Please update the model name in rag_utils.py - check Groq's latest docs" return f"Error processing request: {error_msg}"