CaffeinatedCoding commited on
Commit
edb155f
·
verified ·
1 Parent(s): ed20917

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. src/llm.py +25 -67
src/llm.py CHANGED
@@ -1,12 +1,5 @@
1
  """
2
- LLM module. HuggingFace Inference API as primary.
3
- Works natively from HF Spaces — same infrastructure.
4
- OpenRouter and Groq as fallback providers.
5
-
6
- WHY HF Inference API?
7
- HF Spaces can always reach HuggingFace's own APIs.
8
- No network routing issues. Uses existing HF_TOKEN.
9
- Same Llama 3.3 70B model as others.
10
  """
11
 
12
  import os
@@ -17,39 +10,18 @@ from tenacity import retry, stop_after_attempt, wait_exponential
17
  load_dotenv()
18
  logger = logging.getLogger(__name__)
19
 
20
- # ── HuggingFace Inference API ─────────────────────────────
21
- _hf_client = None
22
-
23
- # ── OpenRouter (free tier, reliable fallback) ──────────────
24
  _openrouter_client = None
25
 
26
- # ── Groq fallback (works locally, may be blocked on HF Spaces) ──
27
- _groq_client = None
28
-
29
-
30
- def _init_hf():
31
- global _hf_client
32
- token = os.getenv("HF_TOKEN")
33
- if not token:
34
- logger.warning("HF_TOKEN not set — HF Inference API disabled")
35
- return False
36
- try:
37
- from huggingface_hub import InferenceClient
38
- _hf_client = InferenceClient(
39
- model="meta-llama/Llama-3.3-70B-Instruct",
40
- token=token
41
- )
42
- logger.info("HF Inference API ready (Llama-3.3-70B)")
43
- return True
44
- except Exception as e:
45
- logger.error(f"HF Inference API init failed: {e}")
46
- return False
47
 
48
 
49
  def _init_openrouter():
50
  global _openrouter_client
51
  api_key = os.getenv("OPENROUTER_API_KEY")
52
  if not api_key:
 
53
  return False
54
  try:
55
  from openai import OpenAI
@@ -57,45 +29,38 @@ def _init_openrouter():
57
  base_url="https://openrouter.ai/api/v1",
58
  api_key=api_key,
59
  )
60
- logger.info("OpenRouter ready as fallback")
61
  return True
62
  except Exception as e:
63
  logger.error(f"OpenRouter init failed: {e}")
64
  return False
65
 
66
 
67
- def _init_groq():
68
- global _groq_client
69
- api_key = os.getenv("GROQ_API_KEY")
70
- if not api_key:
 
71
  return False
72
  try:
73
- from groq import Groq
74
- _groq_client = Groq(api_key=api_key)
75
- logger.info("Groq ready as fallback")
 
 
 
76
  return True
77
  except Exception as e:
78
- logger.error(f"Groq init failed: {e}")
79
  return False
80
 
81
 
82
- _hf_ready = _init_hf()
83
  _openrouter_ready = _init_openrouter()
84
- _groq_ready = _init_groq()
85
-
86
-
87
- def _call_hf(messages: list) -> str:
88
- """Call HuggingFace Inference API."""
89
- response = _hf_client.chat_completion(
90
- messages=messages,
91
- max_tokens=1500,
92
- temperature=0.3,
93
- )
94
- return response.choices[0].message.content
95
 
96
 
97
  def _call_openrouter(messages: list) -> str:
98
- """Call OpenRouter free tier."""
99
  response = _openrouter_client.chat.completions.create(
100
  model="meta-llama/llama-3.3-70b-instruct:free",
101
  messages=messages,
@@ -105,25 +70,18 @@ def _call_openrouter(messages: list) -> str:
105
  return response.choices[0].message.content
106
 
107
 
108
- def _call_groq(messages: list) -> str:
109
- """Call Groq as fallback."""
110
- response = _groq_client.chat.completions.create(
111
- model="llama-3.3-70b-versatile",
112
  messages=messages,
 
113
  temperature=0.3,
114
- max_tokens=1500
115
  )
116
  return response.choices[0].message.content
117
 
118
 
119
  def _call_with_fallback(messages: list) -> str:
120
- """Try Groq first, then OpenRouter, then HF."""
121
- if _groq_ready and _groq_client:
122
- try:
123
- return _call_groq(messages)
124
- except Exception as e:
125
- logger.warning(f"Groq failed: {e}, trying OpenRouter")
126
-
127
  if _openrouter_ready and _openrouter_client:
128
  try:
129
  return _call_openrouter(messages)
 
1
  """
2
+ LLM module. OpenRouter as primary, HuggingFace Inference API as fallback.
 
 
 
 
 
 
 
3
  """
4
 
5
  import os
 
10
  load_dotenv()
11
  logger = logging.getLogger(__name__)
12
 
13
+ # ── OpenRouter (primary) ──────────────────────────────────
 
 
 
14
  _openrouter_client = None
15
 
16
+ # ── HuggingFace Inference API (fallback) ──────────────────
17
+ _hf_client = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
 
20
  def _init_openrouter():
21
  global _openrouter_client
22
  api_key = os.getenv("OPENROUTER_API_KEY")
23
  if not api_key:
24
+ logger.warning("OPENROUTER_API_KEY not set — OpenRouter disabled")
25
  return False
26
  try:
27
  from openai import OpenAI
 
29
  base_url="https://openrouter.ai/api/v1",
30
  api_key=api_key,
31
  )
32
+ logger.info("OpenRouter ready (primary)")
33
  return True
34
  except Exception as e:
35
  logger.error(f"OpenRouter init failed: {e}")
36
  return False
37
 
38
 
39
+ def _init_hf():
40
+ global _hf_client
41
+ token = os.getenv("HF_TOKEN")
42
+ if not token:
43
+ logger.warning("HF_TOKEN not set — HF Inference API disabled")
44
  return False
45
  try:
46
+ from huggingface_hub import InferenceClient
47
+ _hf_client = InferenceClient(
48
+ model="meta-llama/Llama-3.3-70B-Instruct",
49
+ token=token
50
+ )
51
+ logger.info("HF Inference API ready (fallback)")
52
  return True
53
  except Exception as e:
54
+ logger.error(f"HF Inference API init failed: {e}")
55
  return False
56
 
57
 
 
58
  _openrouter_ready = _init_openrouter()
59
+ _hf_ready = _init_hf()
 
 
 
 
 
 
 
 
 
 
60
 
61
 
62
  def _call_openrouter(messages: list) -> str:
63
+ """Call OpenRouter."""
64
  response = _openrouter_client.chat.completions.create(
65
  model="meta-llama/llama-3.3-70b-instruct:free",
66
  messages=messages,
 
70
  return response.choices[0].message.content
71
 
72
 
73
+ def _call_hf(messages: list) -> str:
74
+ """Call HuggingFace Inference API."""
75
+ response = _hf_client.chat_completion(
 
76
  messages=messages,
77
+ max_tokens=1500,
78
  temperature=0.3,
 
79
  )
80
  return response.choices[0].message.content
81
 
82
 
83
  def _call_with_fallback(messages: list) -> str:
84
+ """Try OpenRouter first, then HF."""
 
 
 
 
 
 
85
  if _openrouter_ready and _openrouter_client:
86
  try:
87
  return _call_openrouter(messages)