Spaces:

DevsDoCode
/

Groq-Inference

Sleeping

Groq-Inference / groq_LPU_inference.py

Upload 4 files

864c556 verified over 1 year ago

2.14 kB

	from groq import Groq
	from dotenv import load_dotenv
	import os
	import time

	def get_groq_api_keys():
	load_dotenv()
	return [os.getenv(f"GROQ_API_KEY_{i}") for i in range(1, 256)]

	Groq_Api_Keys = get_groq_api_keys()
	API_KEY = 0
	# print(len(Groq_Api_Keys))
	# print(Groq_Api_Keys[API_KEY])
	# print("List of Groq API Keys:", Groq_Api_Keys)

	def Groq_Inference(query, model="mistral", system="Be Helpful and Friendly", assistant="", temp=0.7, max_tokens=300):
	global API_KEY

	if "mistral".lower() in model:
	model = "mixtral-8x7b-32768"
	if "gemma".lower() in model:
	model = "gemma-7b-it"
	if "llama".lower() in model:
	model = "llama2-70b-4096"

	try:
	client = Groq(api_key=str(Groq_Api_Keys[API_KEY]))
	completion = client.chat.completions.create(
	model = model,
	messages=[
	{
	"role": "system",
	"content": system
	},
	{
	"role": "user",
	"content": query
	},
	{
	"role": "assistant",
	"content": assistant
	}
	],
	temperature=temp,
	max_tokens=max_tokens,
	top_p=1,
	stream=False,
	stop=None,
	)

	# Print the completion returned by the LLM.
	print(completion.choices[0].message.content)
	return completion.choices[0].message.content

	except Exception as e:
	if type(e).__name__ == "AuthenticationError":
	print(e)
	print("Wrong API Key. Appending API key")
	API_KEY += 1
	else:
	if API_KEY == 256:
	API_KEY = 0
	else:
	print(e)
	print("Upgrading API Key. Limit Reached..\nKindly Request Again in 3 Seconds")
	API_KEY += 1



	if __name__ == "__main__":
	# Example usage:
	while True:
	start = time.time()
	completion = Groq_Inference("hi, how are you")
	print(time.time()-start, "Seconds")