Spaces:

nonhuman
/

nnnn

Runtime error

App Files Files Community

nnnn / litellm /__init__.py

nonhuman

Upload 165 files

395201c over 2 years ago

raw

history blame

14.1 kB

	### INIT VARIABLES ###
	import threading, requests
	from typing import Callable, List, Optional, Dict, Union, Any
	from litellm.caching import Cache
	import httpx

	input_callback: List[Union[str, Callable]] = []
	success_callback: List[Union[str, Callable]] = []
	failure_callback: List[Union[str, Callable]] = []
	callbacks: List[Callable] = []
	_async_success_callback: List[Callable] = [] # internal variable - async custom callbacks are routed here.
	pre_call_rules: List[Callable] = []
	post_call_rules: List[Callable] = []
	set_verbose = False
	email: Optional[
	str
	] = None # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
	token: Optional[
	str
	] = None # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
	telemetry = True
	max_tokens = 256 # OpenAI Defaults
	drop_params = False
	retry = True
	api_key: Optional[str] = None
	openai_key: Optional[str] = None
	azure_key: Optional[str] = None
	anthropic_key: Optional[str] = None
	replicate_key: Optional[str] = None
	cohere_key: Optional[str] = None
	maritalk_key: Optional[str] = None
	ai21_key: Optional[str] = None
	openrouter_key: Optional[str] = None
	huggingface_key: Optional[str] = None
	vertex_project: Optional[str] = None
	vertex_location: Optional[str] = None
	togetherai_api_key: Optional[str] = None
	baseten_key: Optional[str] = None
	aleph_alpha_key: Optional[str] = None
	nlp_cloud_key: Optional[str] = None
	use_client: bool = False
	logging: bool = True
	caching: bool = False # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
	caching_with_models: bool = False # # Not used anymore, will be removed in next MAJOR release - https://github.com/BerriAI/litellm/discussions/648
	cache: Optional[Cache] = None # cache object <- use this - https://docs.litellm.ai/docs/caching
	model_alias_map: Dict[str, str] = {}
	max_budget: float = 0.0 # set the max budget across all providers
	_current_cost = 0 # private variable, used if max budget is set
	error_logs: Dict = {}
	add_function_to_prompt: bool = False # if function calling not supported by api, append function call details to system prompt
	client_session: Optional[httpx.Client] = None
	aclient_session: Optional[httpx.AsyncClient] = None
	model_fallbacks: Optional[List] = None # Deprecated for 'litellm.fallbacks'
	model_cost_map_url: str = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
	suppress_debug_info = False
	#### RELIABILITY ####
	request_timeout: Optional[float] = 6000
	num_retries: Optional[int] = None
	fallbacks: Optional[List] = None
	context_window_fallbacks: Optional[List] = None
	allowed_fails: int = 0
	####### SECRET MANAGERS #####################
	secret_manager_client: Optional[Any] = None # list of instantiated key management clients - e.g. azure kv, infisical, etc.
	#############################################

	def get_model_cost_map(url: str):
	try:
	with requests.get(url, timeout=5) as response: # set a 5 second timeout for the get request
	response.raise_for_status() # Raise an exception if the request is unsuccessful
	content = response.json()
	return content
	except Exception as e:
	import importlib.resources
	import json
	with importlib.resources.open_text("litellm", "model_prices_and_context_window_backup.json") as f:
	content = json.load(f)
	return content
	model_cost = get_model_cost_map(url=model_cost_map_url)
	custom_prompt_dict:Dict[str, dict] = {}
	####### THREAD-SPECIFIC DATA ###################
	class MyLocal(threading.local):
	def __init__(self):
	self.user = "Hello World"


	_thread_context = MyLocal()


	def identify(event_details):
	# Store user in thread local data
	if "user" in event_details:
	_thread_context.user = event_details["user"]


	####### ADDITIONAL PARAMS ################### configurable params if you use proxy models like Helicone, map spend to org id, etc.
	api_base = None
	headers = None
	api_version = None
	organization = None
	config_path = None
	####### COMPLETION MODELS ###################
	open_ai_chat_completion_models: List = []
	open_ai_text_completion_models: List = []
	cohere_models: List = []
	anthropic_models: List = []
	openrouter_models: List = []
	vertex_chat_models: List = []
	vertex_code_chat_models: List = []
	vertex_text_models: List = []
	vertex_code_text_models: List = []
	ai21_models: List = []
	nlp_cloud_models: List = []
	aleph_alpha_models: List = []
	bedrock_models: List = []
	deepinfra_models: List = []
	perplexity_models: List = []
	for key, value in model_cost.items():
	if value.get('litellm_provider') == 'openai':
	open_ai_chat_completion_models.append(key)
	elif value.get('litellm_provider') == 'text-completion-openai':
	open_ai_text_completion_models.append(key)
	elif value.get('litellm_provider') == 'cohere':
	cohere_models.append(key)
	elif value.get('litellm_provider') == 'anthropic':
	anthropic_models.append(key)
	elif value.get('litellm_provider') == 'openrouter':
	split_string = key.split('/', 1)
	openrouter_models.append(split_string[1])
	elif value.get('litellm_provider') == 'vertex_ai-text-models':
	vertex_text_models.append(key)
	elif value.get('litellm_provider') == 'vertex_ai-code-text-models':
	vertex_code_text_models.append(key)
	elif value.get('litellm_provider') == 'vertex_ai-chat-models':
	vertex_chat_models.append(key)
	elif value.get('litellm_provider') == 'vertex_ai-code-chat-models':
	vertex_code_chat_models.append(key)
	elif value.get('litellm_provider') == 'ai21':
	ai21_models.append(key)
	elif value.get('litellm_provider') == 'nlp_cloud':
	nlp_cloud_models.append(key)
	elif value.get('litellm_provider') == 'aleph_alpha':
	aleph_alpha_models.append(key)
	elif value.get('litellm_provider') == 'bedrock':
	bedrock_models.append(key)
	elif value.get('litellm_provider') == 'deepinfra':
	deepinfra_models.append(key)
	elif value.get('litellm_provider') == 'perplexity':
	perplexity_models.append(key)

	# known openai compatible endpoints - we'll eventually move this list to the model_prices_and_context_window.json dictionary
	openai_compatible_endpoints: List = [
	"api.perplexity.ai",
	"api.endpoints.anyscale.com/v1",
	"api.deepinfra.com/v1/openai"
	]


	# well supported replicate llms
	replicate_models: List = [
	# llama replicate supported LLMs
	"replicate/llama-2-70b-chat:2796ee9483c3fd7aa2e171d38f4ca12251a30609463dcfd4cd76703f22e96cdf",
	"a16z-infra/llama-2-13b-chat:2a7f981751ec7fdf87b5b91ad4db53683a98082e9ff7bfd12c8cd5ea85980a52",
	"meta/codellama-13b:1c914d844307b0588599b8393480a3ba917b660c7e9dfae681542b5325f228db",
	# Vicuna
	"replicate/vicuna-13b:6282abe6a492de4145d7bb601023762212f9ddbbe78278bd6771c8b3b2f2a13b",
	"joehoover/instructblip-vicuna13b:c4c54e3c8c97cd50c2d2fec9be3b6065563ccf7d43787fb99f84151b867178fe",
	# Flan T-5
	"daanelson/flan-t5-large:ce962b3f6792a57074a601d3979db5839697add2e4e02696b3ced4c022d4767f"
	# Others
	"replicate/dolly-v2-12b:ef0e1aefc61f8e096ebe4db6b2bacc297daf2ef6899f0f7e001ec445893500e5",
	"replit/replit-code-v1-3b:b84f4c074b807211cd75e3e8b1589b6399052125b4c27106e43d47189e8415ad",
	]

	huggingface_models: List = [
	"meta-llama/Llama-2-7b-hf",
	"meta-llama/Llama-2-7b-chat-hf",
	"meta-llama/Llama-2-13b-hf",
	"meta-llama/Llama-2-13b-chat-hf",
	"meta-llama/Llama-2-70b-hf",
	"meta-llama/Llama-2-70b-chat-hf",
	"meta-llama/Llama-2-7b",
	"meta-llama/Llama-2-7b-chat",
	"meta-llama/Llama-2-13b",
	"meta-llama/Llama-2-13b-chat",
	"meta-llama/Llama-2-70b",
	"meta-llama/Llama-2-70b-chat",
	] # these have been tested on extensively. But by default all text2text-generation and text-generation models are supported by liteLLM. - https://docs.litellm.ai/docs/providers

	together_ai_models: List = [
	# llama llms - chat
	"togethercomputer/llama-2-70b-chat",

	# llama llms - language / instruct
	"togethercomputer/llama-2-70b",
	"togethercomputer/LLaMA-2-7B-32K",
	"togethercomputer/Llama-2-7B-32K-Instruct",
	"togethercomputer/llama-2-7b",

	# falcon llms
	"togethercomputer/falcon-40b-instruct",
	"togethercomputer/falcon-7b-instruct",

	# alpaca
	"togethercomputer/alpaca-7b",

	# chat llms
	"HuggingFaceH4/starchat-alpha",

	# code llms
	"togethercomputer/CodeLlama-34b",
	"togethercomputer/CodeLlama-34b-Instruct",
	"togethercomputer/CodeLlama-34b-Python",
	"defog/sqlcoder",
	"NumbersStation/nsql-llama-2-7B",
	"WizardLM/WizardCoder-15B-V1.0",
	"WizardLM/WizardCoder-Python-34B-V1.0",

	# language llms
	"NousResearch/Nous-Hermes-Llama2-13b",
	"Austism/chronos-hermes-13b",
	"upstage/SOLAR-0-70b-16bit",
	"WizardLM/WizardLM-70B-V1.0",

	] # supports all together ai models, just pass in the model id e.g. completion(model="together_computer/replit_code_3b",...)


	baseten_models: List = ["qvv0xeq", "q841o8w", "31dxrj3"] # FALCON 7B # WizardLM # Mosaic ML

	petals_models = [
	"petals-team/StableBeluga2",
	]

	ollama_models = [
	"llama2"
	]

	maritalk_models = [
	"maritalk"
	]

	model_list = (
	open_ai_chat_completion_models
	+ open_ai_text_completion_models
	+ cohere_models
	+ anthropic_models
	+ replicate_models
	+ openrouter_models
	+ huggingface_models
	+ vertex_chat_models
	+ vertex_text_models
	+ ai21_models
	+ together_ai_models
	+ baseten_models
	+ aleph_alpha_models
	+ nlp_cloud_models
	+ ollama_models
	+ bedrock_models
	+ deepinfra_models
	+ perplexity_models
	+ maritalk_models
	)

	provider_list: List = [
	"openai",
	"custom_openai",
	"cohere",
	"anthropic",
	"replicate",
	"huggingface",
	"together_ai",
	"openrouter",
	"vertex_ai",
	"palm",
	"ai21",
	"baseten",
	"azure",
	"sagemaker",
	"bedrock",
	"vllm",
	"nlp_cloud",
	"petals",
	"oobabooga",
	"ollama",
	"deepinfra",
	"perplexity",
	"anyscale",
	"maritalk",
	"custom", # custom apis
	]

	models_by_provider: dict = {
	"openai": open_ai_chat_completion_models + open_ai_text_completion_models,
	"cohere": cohere_models,
	"anthropic": anthropic_models,
	"replicate": replicate_models,
	"huggingface": huggingface_models,
	"together_ai": together_ai_models,
	"baseten": baseten_models,
	"openrouter": openrouter_models,
	"vertex_ai": vertex_chat_models + vertex_text_models,
	"ai21": ai21_models,
	"bedrock": bedrock_models,
	"petals": petals_models,
	"ollama": ollama_models,
	"deepinfra": deepinfra_models,
	"perplexity": perplexity_models,
	"maritalk": maritalk_models
	}

	# mapping for those models which have larger equivalents
	longer_context_model_fallback_dict: dict = {
	# openai chat completion models
	"gpt-3.5-turbo": "gpt-3.5-turbo-16k",
	"gpt-3.5-turbo-0301": "gpt-3.5-turbo-16k-0301",
	"gpt-3.5-turbo-0613": "gpt-3.5-turbo-16k-0613",
	"gpt-4": "gpt-4-32k",
	"gpt-4-0314": "gpt-4-32k-0314",
	"gpt-4-0613": "gpt-4-32k-0613",
	# anthropic
	"claude-instant-1": "claude-2",
	"claude-instant-1.2": "claude-2",
	# vertexai
	"chat-bison": "chat-bison-32k",
	"chat-bison@001": "chat-bison-32k",
	"codechat-bison": "codechat-bison-32k",
	"codechat-bison@001": "codechat-bison-32k",
	# openrouter
	"openrouter/openai/gpt-3.5-turbo": "openrouter/openai/gpt-3.5-turbo-16k",
	"openrouter/anthropic/claude-instant-v1": "openrouter/anthropic/claude-2",
	}

	####### EMBEDDING MODELS ###################
	open_ai_embedding_models: List = ["text-embedding-ada-002"]
	cohere_embedding_models: List = [
	"embed-english-v3.0",
	"embed-english-light-v3.0",
	"embed-multilingual-v3.0",
	"embed-english-v2.0",
	"embed-english-light-v2.0",
	"embed-multilingual-v2.0",
	]
	bedrock_embedding_models: List = ["amazon.titan-embed-text-v1"]

	all_embedding_models = open_ai_embedding_models + cohere_embedding_models + bedrock_embedding_models

	from .timeout import timeout
	from .utils import (
	client,
	exception_type,
	get_optional_params,
	modify_integration,
	token_counter,
	cost_per_token,
	completion_cost,
	get_litellm_params,
	Logging,
	acreate,
	get_model_list,
	get_max_tokens,
	get_model_info,
	register_prompt_template,
	validate_environment,
	check_valid_key,
	get_llm_provider,
	completion_with_config,
	register_model,
	encode,
	decode,
	_calculate_retry_after,
	_should_retry,
	get_secret
	)
	from .llms.huggingface_restapi import HuggingfaceConfig
	from .llms.anthropic import AnthropicConfig
	from .llms.replicate import ReplicateConfig
	from .llms.cohere import CohereConfig
	from .llms.ai21 import AI21Config
	from .llms.together_ai import TogetherAIConfig
	from .llms.palm import PalmConfig
	from .llms.nlp_cloud import NLPCloudConfig
	from .llms.aleph_alpha import AlephAlphaConfig
	from .llms.petals import PetalsConfig
	from .llms.vertex_ai import VertexAIConfig
	from .llms.sagemaker import SagemakerConfig
	from .llms.ollama import OllamaConfig
	from .llms.maritalk import MaritTalkConfig
	from .llms.bedrock import AmazonTitanConfig, AmazonAI21Config, AmazonAnthropicConfig, AmazonCohereConfig, AmazonLlamaConfig
	from .llms.openai import OpenAIConfig, OpenAITextCompletionConfig
	from .llms.azure import AzureOpenAIConfig
	from .main import * # type: ignore
	from .integrations import *
	from .exceptions import (
	AuthenticationError,
	InvalidRequestError,
	BadRequestError,
	RateLimitError,
	ServiceUnavailableError,
	OpenAIError,
	ContextWindowExceededError,
	BudgetExceededError,
	APIError,
	Timeout,
	APIConnectionError,
	APIResponseValidationError
	)
	from .budget_manager import BudgetManager
	from .proxy.proxy_cli import run_server
	from .router import Router