| |
| import threading, requests |
| from typing import Callable, List, Optional, Dict, Union, Any |
| from litellm.caching import Cache |
| import httpx |
|
|
| input_callback: List[Union[str, Callable]] = [] |
| success_callback: List[Union[str, Callable]] = [] |
| failure_callback: List[Union[str, Callable]] = [] |
| callbacks: List[Callable] = [] |
| _async_success_callback: List[Callable] = [] |
| pre_call_rules: List[Callable] = [] |
| post_call_rules: List[Callable] = [] |
| set_verbose = False |
| email: Optional[ |
| str |
| ] = None |
| token: Optional[ |
| str |
| ] = None |
| telemetry = True |
| max_tokens = 256 |
| drop_params = False |
| retry = True |
| api_key: Optional[str] = None |
| openai_key: Optional[str] = None |
| azure_key: Optional[str] = None |
| anthropic_key: Optional[str] = None |
| replicate_key: Optional[str] = None |
| cohere_key: Optional[str] = None |
| maritalk_key: Optional[str] = None |
| ai21_key: Optional[str] = None |
| openrouter_key: Optional[str] = None |
| huggingface_key: Optional[str] = None |
| vertex_project: Optional[str] = None |
| vertex_location: Optional[str] = None |
| togetherai_api_key: Optional[str] = None |
| baseten_key: Optional[str] = None |
| aleph_alpha_key: Optional[str] = None |
| nlp_cloud_key: Optional[str] = None |
| use_client: bool = False |
| logging: bool = True |
| caching: bool = False |
| caching_with_models: bool = False |
| cache: Optional[Cache] = None |
| model_alias_map: Dict[str, str] = {} |
| max_budget: float = 0.0 |
| _current_cost = 0 |
| error_logs: Dict = {} |
| add_function_to_prompt: bool = False |
| client_session: Optional[httpx.Client] = None |
| aclient_session: Optional[httpx.AsyncClient] = None |
| model_fallbacks: Optional[List] = None |
| model_cost_map_url: str = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json" |
| suppress_debug_info = False |
| |
| request_timeout: Optional[float] = 6000 |
| num_retries: Optional[int] = None |
| fallbacks: Optional[List] = None |
| context_window_fallbacks: Optional[List] = None |
| allowed_fails: int = 0 |
| |
| secret_manager_client: Optional[Any] = None |
| |
|
|
| def get_model_cost_map(url: str): |
| try: |
| with requests.get(url, timeout=5) as response: |
| response.raise_for_status() |
| content = response.json() |
| return content |
| except Exception as e: |
| import importlib.resources |
| import json |
| with importlib.resources.open_text("litellm", "model_prices_and_context_window_backup.json") as f: |
| content = json.load(f) |
| return content |
| model_cost = get_model_cost_map(url=model_cost_map_url) |
| custom_prompt_dict:Dict[str, dict] = {} |
| |
| class MyLocal(threading.local): |
| def __init__(self): |
| self.user = "Hello World" |
|
|
|
|
| _thread_context = MyLocal() |
|
|
|
|
| def identify(event_details): |
| |
| if "user" in event_details: |
| _thread_context.user = event_details["user"] |
|
|
|
|
| |
| api_base = None |
| headers = None |
| api_version = None |
| organization = None |
| config_path = None |
| |
| open_ai_chat_completion_models: List = [] |
| open_ai_text_completion_models: List = [] |
| cohere_models: List = [] |
| anthropic_models: List = [] |
| openrouter_models: List = [] |
| vertex_chat_models: List = [] |
| vertex_code_chat_models: List = [] |
| vertex_text_models: List = [] |
| vertex_code_text_models: List = [] |
| ai21_models: List = [] |
| nlp_cloud_models: List = [] |
| aleph_alpha_models: List = [] |
| bedrock_models: List = [] |
| deepinfra_models: List = [] |
| perplexity_models: List = [] |
| for key, value in model_cost.items(): |
| if value.get('litellm_provider') == 'openai': |
| open_ai_chat_completion_models.append(key) |
| elif value.get('litellm_provider') == 'text-completion-openai': |
| open_ai_text_completion_models.append(key) |
| elif value.get('litellm_provider') == 'cohere': |
| cohere_models.append(key) |
| elif value.get('litellm_provider') == 'anthropic': |
| anthropic_models.append(key) |
| elif value.get('litellm_provider') == 'openrouter': |
| split_string = key.split('/', 1) |
| openrouter_models.append(split_string[1]) |
| elif value.get('litellm_provider') == 'vertex_ai-text-models': |
| vertex_text_models.append(key) |
| elif value.get('litellm_provider') == 'vertex_ai-code-text-models': |
| vertex_code_text_models.append(key) |
| elif value.get('litellm_provider') == 'vertex_ai-chat-models': |
| vertex_chat_models.append(key) |
| elif value.get('litellm_provider') == 'vertex_ai-code-chat-models': |
| vertex_code_chat_models.append(key) |
| elif value.get('litellm_provider') == 'ai21': |
| ai21_models.append(key) |
| elif value.get('litellm_provider') == 'nlp_cloud': |
| nlp_cloud_models.append(key) |
| elif value.get('litellm_provider') == 'aleph_alpha': |
| aleph_alpha_models.append(key) |
| elif value.get('litellm_provider') == 'bedrock': |
| bedrock_models.append(key) |
| elif value.get('litellm_provider') == 'deepinfra': |
| deepinfra_models.append(key) |
| elif value.get('litellm_provider') == 'perplexity': |
| perplexity_models.append(key) |
|
|
| |
| openai_compatible_endpoints: List = [ |
| "api.perplexity.ai", |
| "api.endpoints.anyscale.com/v1", |
| "api.deepinfra.com/v1/openai" |
| ] |
|
|
|
|
| |
| replicate_models: List = [ |
| |
| "replicate/llama-2-70b-chat:2796ee9483c3fd7aa2e171d38f4ca12251a30609463dcfd4cd76703f22e96cdf", |
| "a16z-infra/llama-2-13b-chat:2a7f981751ec7fdf87b5b91ad4db53683a98082e9ff7bfd12c8cd5ea85980a52", |
| "meta/codellama-13b:1c914d844307b0588599b8393480a3ba917b660c7e9dfae681542b5325f228db", |
| |
| "replicate/vicuna-13b:6282abe6a492de4145d7bb601023762212f9ddbbe78278bd6771c8b3b2f2a13b", |
| "joehoover/instructblip-vicuna13b:c4c54e3c8c97cd50c2d2fec9be3b6065563ccf7d43787fb99f84151b867178fe", |
| |
| "daanelson/flan-t5-large:ce962b3f6792a57074a601d3979db5839697add2e4e02696b3ced4c022d4767f" |
| |
| "replicate/dolly-v2-12b:ef0e1aefc61f8e096ebe4db6b2bacc297daf2ef6899f0f7e001ec445893500e5", |
| "replit/replit-code-v1-3b:b84f4c074b807211cd75e3e8b1589b6399052125b4c27106e43d47189e8415ad", |
| ] |
|
|
| huggingface_models: List = [ |
| "meta-llama/Llama-2-7b-hf", |
| "meta-llama/Llama-2-7b-chat-hf", |
| "meta-llama/Llama-2-13b-hf", |
| "meta-llama/Llama-2-13b-chat-hf", |
| "meta-llama/Llama-2-70b-hf", |
| "meta-llama/Llama-2-70b-chat-hf", |
| "meta-llama/Llama-2-7b", |
| "meta-llama/Llama-2-7b-chat", |
| "meta-llama/Llama-2-13b", |
| "meta-llama/Llama-2-13b-chat", |
| "meta-llama/Llama-2-70b", |
| "meta-llama/Llama-2-70b-chat", |
| ] |
|
|
| together_ai_models: List = [ |
| |
| "togethercomputer/llama-2-70b-chat", |
|
|
| |
| "togethercomputer/llama-2-70b", |
| "togethercomputer/LLaMA-2-7B-32K", |
| "togethercomputer/Llama-2-7B-32K-Instruct", |
| "togethercomputer/llama-2-7b", |
|
|
| |
| "togethercomputer/falcon-40b-instruct", |
| "togethercomputer/falcon-7b-instruct", |
|
|
| |
| "togethercomputer/alpaca-7b", |
|
|
| |
| "HuggingFaceH4/starchat-alpha", |
|
|
| |
| "togethercomputer/CodeLlama-34b", |
| "togethercomputer/CodeLlama-34b-Instruct", |
| "togethercomputer/CodeLlama-34b-Python", |
| "defog/sqlcoder", |
| "NumbersStation/nsql-llama-2-7B", |
| "WizardLM/WizardCoder-15B-V1.0", |
| "WizardLM/WizardCoder-Python-34B-V1.0", |
|
|
| |
| "NousResearch/Nous-Hermes-Llama2-13b", |
| "Austism/chronos-hermes-13b", |
| "upstage/SOLAR-0-70b-16bit", |
| "WizardLM/WizardLM-70B-V1.0", |
|
|
| ] |
|
|
|
|
| baseten_models: List = ["qvv0xeq", "q841o8w", "31dxrj3"] |
|
|
| petals_models = [ |
| "petals-team/StableBeluga2", |
| ] |
|
|
| ollama_models = [ |
| "llama2" |
| ] |
|
|
| maritalk_models = [ |
| "maritalk" |
| ] |
|
|
| model_list = ( |
| open_ai_chat_completion_models |
| + open_ai_text_completion_models |
| + cohere_models |
| + anthropic_models |
| + replicate_models |
| + openrouter_models |
| + huggingface_models |
| + vertex_chat_models |
| + vertex_text_models |
| + ai21_models |
| + together_ai_models |
| + baseten_models |
| + aleph_alpha_models |
| + nlp_cloud_models |
| + ollama_models |
| + bedrock_models |
| + deepinfra_models |
| + perplexity_models |
| + maritalk_models |
| ) |
|
|
| provider_list: List = [ |
| "openai", |
| "custom_openai", |
| "cohere", |
| "anthropic", |
| "replicate", |
| "huggingface", |
| "together_ai", |
| "openrouter", |
| "vertex_ai", |
| "palm", |
| "ai21", |
| "baseten", |
| "azure", |
| "sagemaker", |
| "bedrock", |
| "vllm", |
| "nlp_cloud", |
| "petals", |
| "oobabooga", |
| "ollama", |
| "deepinfra", |
| "perplexity", |
| "anyscale", |
| "maritalk", |
| "custom", |
| ] |
|
|
| models_by_provider: dict = { |
| "openai": open_ai_chat_completion_models + open_ai_text_completion_models, |
| "cohere": cohere_models, |
| "anthropic": anthropic_models, |
| "replicate": replicate_models, |
| "huggingface": huggingface_models, |
| "together_ai": together_ai_models, |
| "baseten": baseten_models, |
| "openrouter": openrouter_models, |
| "vertex_ai": vertex_chat_models + vertex_text_models, |
| "ai21": ai21_models, |
| "bedrock": bedrock_models, |
| "petals": petals_models, |
| "ollama": ollama_models, |
| "deepinfra": deepinfra_models, |
| "perplexity": perplexity_models, |
| "maritalk": maritalk_models |
| } |
|
|
| |
| longer_context_model_fallback_dict: dict = { |
| |
| "gpt-3.5-turbo": "gpt-3.5-turbo-16k", |
| "gpt-3.5-turbo-0301": "gpt-3.5-turbo-16k-0301", |
| "gpt-3.5-turbo-0613": "gpt-3.5-turbo-16k-0613", |
| "gpt-4": "gpt-4-32k", |
| "gpt-4-0314": "gpt-4-32k-0314", |
| "gpt-4-0613": "gpt-4-32k-0613", |
| |
| "claude-instant-1": "claude-2", |
| "claude-instant-1.2": "claude-2", |
| |
| "chat-bison": "chat-bison-32k", |
| "chat-bison@001": "chat-bison-32k", |
| "codechat-bison": "codechat-bison-32k", |
| "codechat-bison@001": "codechat-bison-32k", |
| |
| "openrouter/openai/gpt-3.5-turbo": "openrouter/openai/gpt-3.5-turbo-16k", |
| "openrouter/anthropic/claude-instant-v1": "openrouter/anthropic/claude-2", |
| } |
|
|
| |
| open_ai_embedding_models: List = ["text-embedding-ada-002"] |
| cohere_embedding_models: List = [ |
| "embed-english-v3.0", |
| "embed-english-light-v3.0", |
| "embed-multilingual-v3.0", |
| "embed-english-v2.0", |
| "embed-english-light-v2.0", |
| "embed-multilingual-v2.0", |
| ] |
| bedrock_embedding_models: List = ["amazon.titan-embed-text-v1"] |
|
|
| all_embedding_models = open_ai_embedding_models + cohere_embedding_models + bedrock_embedding_models |
|
|
| from .timeout import timeout |
| from .utils import ( |
| client, |
| exception_type, |
| get_optional_params, |
| modify_integration, |
| token_counter, |
| cost_per_token, |
| completion_cost, |
| get_litellm_params, |
| Logging, |
| acreate, |
| get_model_list, |
| get_max_tokens, |
| get_model_info, |
| register_prompt_template, |
| validate_environment, |
| check_valid_key, |
| get_llm_provider, |
| completion_with_config, |
| register_model, |
| encode, |
| decode, |
| _calculate_retry_after, |
| _should_retry, |
| get_secret |
| ) |
| from .llms.huggingface_restapi import HuggingfaceConfig |
| from .llms.anthropic import AnthropicConfig |
| from .llms.replicate import ReplicateConfig |
| from .llms.cohere import CohereConfig |
| from .llms.ai21 import AI21Config |
| from .llms.together_ai import TogetherAIConfig |
| from .llms.palm import PalmConfig |
| from .llms.nlp_cloud import NLPCloudConfig |
| from .llms.aleph_alpha import AlephAlphaConfig |
| from .llms.petals import PetalsConfig |
| from .llms.vertex_ai import VertexAIConfig |
| from .llms.sagemaker import SagemakerConfig |
| from .llms.ollama import OllamaConfig |
| from .llms.maritalk import MaritTalkConfig |
| from .llms.bedrock import AmazonTitanConfig, AmazonAI21Config, AmazonAnthropicConfig, AmazonCohereConfig, AmazonLlamaConfig |
| from .llms.openai import OpenAIConfig, OpenAITextCompletionConfig |
| from .llms.azure import AzureOpenAIConfig |
| from .main import * |
| from .integrations import * |
| from .exceptions import ( |
| AuthenticationError, |
| InvalidRequestError, |
| BadRequestError, |
| RateLimitError, |
| ServiceUnavailableError, |
| OpenAIError, |
| ContextWindowExceededError, |
| BudgetExceededError, |
| APIError, |
| Timeout, |
| APIConnectionError, |
| APIResponseValidationError |
| ) |
| from .budget_manager import BudgetManager |
| from .proxy.proxy_cli import run_server |
| from .router import Router |