| |
| import os |
| import time |
| from huggingface_hub import InferenceClient |
| from retriever import retrieve |
|
|
| import os |
|
|
| HF_TOKEN = os.environ.get("HF_TOKEN") |
| if not HF_TOKEN: |
| raise ValueError("Hugging Face token not found. Please set HF_TOKEN in your environment variables.") |
|
|
| hf_client = InferenceClient( |
| model="meta-llama/Meta-Llama-3-8B-Instruct", |
| token=HF_TOKEN, |
| timeout = 60 |
| ) |
|
|
| def rag_answer(query, top_k=5): |
| results = retrieve(query, top_k=top_k) |
| context = "\n\n".join([r["snippet"] for r in results]) |
|
|
| |
| messages = [ |
| {"role": "system", "content": "You are a helpful assistant. Use the provided context to answer."}, |
| {"role": "user", "content": f"Context:\n{context}\n\nQuestion:\n{query}"} |
| ] |
|
|
| |
| response = hf_client.chat_completion( |
| messages=messages, |
| max_tokens=300, |
| temperature=0.7 |
| ) |
|
|
| |
| return response.choices[0].message["content"], results |
|
|