Nexari-Research commited on
Commit
3bca02a
·
verified ·
1 Parent(s): 05875d7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -14
app.py CHANGED
@@ -1,4 +1,4 @@
1
- # app.py - UPDATED: show "Reasoning..." during planning stage and "Generating (attempt N)..." for LLM generation
2
  import re
3
  import json
4
  import asyncio
@@ -185,7 +185,7 @@ def extract_and_sanitize_plan(text: str, max_plan_chars: int = 240) -> (str, str
185
  return None, text
186
 
187
  # -------------------------
188
- # Streaming generator with Reasoning indicator + regeneration
189
  # -------------------------
190
  async def generate_response_stream(messages: List[Dict[str,str]], max_tokens=600, temperature=0.85):
191
  try:
@@ -204,14 +204,13 @@ async def generate_response_stream(messages: List[Dict[str,str]], max_tokens=600
204
  yield "data: [DONE]\n\n"
205
  return
206
 
207
- # initial quick ack
208
  yield f"data: {json.dumps({'status': 'Thinking...'})}\n\n"
209
  await asyncio.sleep(0)
210
 
211
  intent = analyze_intent(last_user_msg) or "general"
212
 
213
- # ---------- PLANNING STAGE (Reasoning...) ----------
214
- # Compute flow context and vibe and plan requirements BEFORE calling the LLM
215
  try:
216
  flow_context = analyze_flow(messages)
217
  except Exception as e:
@@ -223,12 +222,10 @@ async def generate_response_stream(messages: List[Dict[str,str]], max_tokens=600
223
  min_words = plan_req["min_words"]
224
  strictness = plan_req["strictness"]
225
 
226
- # Inform UI that we finished planning and are now reasoning (planning-model stage)
227
- # This is the explicit "Reasoning..." indicator the user requested.
228
- yield f"data: {json.dumps({'status': 'Reasoning...'})}\n\n"
229
  await asyncio.sleep(0)
230
 
231
- # adjust tokens/temperature if strict
232
  if strictness:
233
  temperature = min(temperature + 0.05, 0.95)
234
  max_tokens = max(max_tokens, min_words // 2 + 120)
@@ -254,13 +251,12 @@ async def generate_response_stream(messages: List[Dict[str,str]], max_tokens=600
254
 
255
  final_system_prompt = f"{base_system_instruction}\n{flow_desc}\n{vibe_block}\n{time_data}\n{strategy_data}"
256
 
257
- # ensure system message present
258
  if messages and messages[0].get("role") == "system":
259
  messages[0]["content"] = final_system_prompt
260
  else:
261
  messages.insert(0, {"role":"system","content": final_system_prompt})
262
 
263
- # web search (if needed)
264
  tool_data_struct = None
265
  if intent == "internet_search":
266
  yield f"data: {json.dumps({'status': 'Searching the web...'})}\n\n"
@@ -302,15 +298,15 @@ async def generate_response_stream(messages: List[Dict[str,str]], max_tokens=600
302
  except Exception:
303
  text_prompt = _build_prompt_from_messages(messages)
304
 
305
- # ---------- GENERATION STAGE (Generating...) ----------
306
  max_attempts = 2
307
  attempts = 0
308
  last_meta = {}
309
  generated_text = ""
310
  while attempts < max_attempts:
311
  attempts += 1
312
- # Inform UI we're now invoking the LLM: explicit "Generating (attempt N)..."
313
- yield f"data: {json.dumps({'status': f'Generating (attempt {attempts})...'})}\n\n"
314
  await asyncio.sleep(0)
315
 
316
  model_inputs = tokenizer(text_prompt, return_tensors="pt", truncation=True, max_length=4096).to(next(model.parameters()).device)
 
1
+ # app.py - UPDATED: explicit "Reasoning (planner)..." and "Generating — LLM (attempt N)..." status labels
2
  import re
3
  import json
4
  import asyncio
 
185
  return None, text
186
 
187
  # -------------------------
188
+ # Streaming generator with explicit Reasoning + Generating labels
189
  # -------------------------
190
  async def generate_response_stream(messages: List[Dict[str,str]], max_tokens=600, temperature=0.85):
191
  try:
 
204
  yield "data: [DONE]\n\n"
205
  return
206
 
207
+ # Quick initial indicator (keeps UI responsive)
208
  yield f"data: {json.dumps({'status': 'Thinking...'})}\n\n"
209
  await asyncio.sleep(0)
210
 
211
  intent = analyze_intent(last_user_msg) or "general"
212
 
213
+ # ---------- PLANNING STAGE (Reasoning - planner) ----------
 
214
  try:
215
  flow_context = analyze_flow(messages)
216
  except Exception as e:
 
222
  min_words = plan_req["min_words"]
223
  strictness = plan_req["strictness"]
224
 
225
+ # explicit planner status the UI expects
226
+ yield f"data: {json.dumps({'status': 'Reasoning (planner)...'})}\n\n"
 
227
  await asyncio.sleep(0)
228
 
 
229
  if strictness:
230
  temperature = min(temperature + 0.05, 0.95)
231
  max_tokens = max(max_tokens, min_words // 2 + 120)
 
251
 
252
  final_system_prompt = f"{base_system_instruction}\n{flow_desc}\n{vibe_block}\n{time_data}\n{strategy_data}"
253
 
 
254
  if messages and messages[0].get("role") == "system":
255
  messages[0]["content"] = final_system_prompt
256
  else:
257
  messages.insert(0, {"role":"system","content": final_system_prompt})
258
 
259
+ # web search if needed
260
  tool_data_struct = None
261
  if intent == "internet_search":
262
  yield f"data: {json.dumps({'status': 'Searching the web...'})}\n\n"
 
298
  except Exception:
299
  text_prompt = _build_prompt_from_messages(messages)
300
 
301
+ # ---------- GENERATION STAGE (Generating — LLM (attempt N)) ----------
302
  max_attempts = 2
303
  attempts = 0
304
  last_meta = {}
305
  generated_text = ""
306
  while attempts < max_attempts:
307
  attempts += 1
308
+ # Clear, explicit generation label for UI
309
+ yield f"data: {json.dumps({'status': f'Generating — LLM (attempt {attempts})...'})}\n\n"
310
  await asyncio.sleep(0)
311
 
312
  model_inputs = tokenizer(text_prompt, return_tensors="pt", truncation=True, max_length=4096).to(next(model.parameters()).device)