Spaces:

asoni9
/

canHeal

Sleeping

canHeal / rag_pipeline.py

Anirudha Soni

Add timehout

f4cc727 8 months ago

1.11 kB

	# rag_pipeline.py
	import os
	import time
	from huggingface_hub import InferenceClient
	from retriever import retrieve

	import os

	HF_TOKEN = os.environ.get("HF_TOKEN")
	if not HF_TOKEN:
	raise ValueError("Hugging Face token not found. Please set HF_TOKEN in your environment variables.")

	hf_client = InferenceClient(
	model="meta-llama/Meta-Llama-3-8B-Instruct",
	token=HF_TOKEN,
	timeout = 60
	)

	def rag_answer(query, top_k=5):
	results = retrieve(query, top_k=top_k)
	context = "\n\n".join([r["snippet"] for r in results])

	# Build messages for conversational endpoint
	messages = [
	{"role": "system", "content": "You are a helpful assistant. Use the provided context to answer."},
	{"role": "user", "content": f"Context:\n{context}\n\nQuestion:\n{query}"}
	]

	# Call Hugging Face Inference API for LLaMA 3 (chat completion)
	response = hf_client.chat_completion(
	messages=messages,
	max_tokens=300,
	temperature=0.7
	)

	# The answer will be inside choices[0].message
	return response.choices[0].message["content"], results