Spaces:
Runtime error
Runtime error
| from huggingface_hub import hf_hub_download | |
| from llama_cpp import Llama | |
| model_name_or_path = "TheBloke/Llama-2-13B-chat-GGML" | |
| model_basename = "llama-2-13b-chat.ggmlv3.q5_1.bin" # The model is in bin format | |
| # Download the model file | |
| model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename) | |
| # Initialize the Llama model with appropriate settings for GPU | |
| lcpp_llm = Llama( | |
| model_path=model_path, | |
| n_threads=2, # CPU cores to use | |
| n_batch=512, # Batch size for processing; adjust as per your VRAM capacity | |
| n_gpu_layers=32 # Number of layers to run on GPU, dependent on your GPU's VRAM | |
| ) | |
| def generate_email_response(email_prompt): | |
| # Check input received by the function | |
| print("Received prompt:", email_prompt) | |
| # Determine if the input is a shorthand command or an actual email | |
| if 'email to' in email_prompt.lower(): | |
| # Assume it's a shorthand command, format appropriately | |
| formatted_prompt = f''' | |
| Email received: "{email_prompt}" | |
| Respond to this email, ensuring a professional tone, providing a concise update, and addressing any potential concerns the sender might have. | |
| Response: | |
| ''' | |
| else: | |
| # Assume it's direct email content | |
| formatted_prompt = f''' | |
| Email received: "{email_prompt}" | |
| Respond to this email, ensuring a professional tone, providing a concise update, and addressing any potential concerns the sender might have. | |
| Response: | |
| ''' | |
| # Generate response using Llama-2 model | |
| try: | |
| response = lcpp_llm( | |
| prompt=formatted_prompt, | |
| max_tokens=256, | |
| temperature=0.5, | |
| top_p=0.95, | |
| repeat_penalty=1.2, | |
| top_k=150, | |
| echo=True | |
| ) | |
| generated_response = response["choices"][0]["text"] | |
| # Remove the input part from the output if it is included | |
| if formatted_prompt in generated_response: | |
| generated_response = generated_response.replace(formatted_prompt, '').strip() | |
| print("Generated response:", generated_response) | |
| return generated_response | |
| except Exception as e: | |
| print("Error in response generation:", str(e)) | |
| return "Failed to generate response, please check the console for errors." |