Nexari-Research commited on
Commit
8754ff8
Β·
verified Β·
1 Parent(s): 9f47f84

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -14
app.py CHANGED
@@ -1,4 +1,5 @@
1
- # app.py - UPDATED: explicit "Reasoning (planner)..." and "Generating β€” LLM (attempt N)..." status labels
 
2
  import re
3
  import json
4
  import asyncio
@@ -105,7 +106,7 @@ async def startup_event():
105
  tokenizer, model = None, None
106
 
107
  # -------------------------
108
- # Prompt builder
109
  # -------------------------
110
  def _build_prompt_from_messages(messages: List[Dict[str, str]]) -> str:
111
  parts = []
@@ -185,7 +186,9 @@ def extract_and_sanitize_plan(text: str, max_plan_chars: int = 240) -> (str, str
185
  return None, text
186
 
187
  # -------------------------
188
- # Streaming generator with explicit Reasoning + Generating labels
 
 
189
  # -------------------------
190
  async def generate_response_stream(messages: List[Dict[str,str]], max_tokens=600, temperature=0.85):
191
  try:
@@ -204,13 +207,18 @@ async def generate_response_stream(messages: List[Dict[str,str]], max_tokens=600
204
  yield "data: [DONE]\n\n"
205
  return
206
 
207
- # Quick initial indicator (keeps UI responsive)
208
  yield f"data: {json.dumps({'status': 'Thinking...'})}\n\n"
209
  await asyncio.sleep(0)
210
 
211
  intent = analyze_intent(last_user_msg) or "general"
212
 
213
- # ---------- PLANNING STAGE (Reasoning - planner) ----------
 
 
 
 
 
214
  try:
215
  flow_context = analyze_flow(messages)
216
  except Exception as e:
@@ -222,10 +230,7 @@ async def generate_response_stream(messages: List[Dict[str,str]], max_tokens=600
222
  min_words = plan_req["min_words"]
223
  strictness = plan_req["strictness"]
224
 
225
- # explicit planner status the UI expects
226
- yield f"data: {json.dumps({'status': 'Reasoning (planner)...'})}\n\n"
227
- await asyncio.sleep(0)
228
-
229
  if strictness:
230
  temperature = min(temperature + 0.05, 0.95)
231
  max_tokens = max(max_tokens, min_words // 2 + 120)
@@ -298,16 +303,17 @@ async def generate_response_stream(messages: List[Dict[str,str]], max_tokens=600
298
  except Exception:
299
  text_prompt = _build_prompt_from_messages(messages)
300
 
301
- # ---------- GENERATION STAGE (Generating β€” LLM (attempt N)) ----------
302
  max_attempts = 2
303
  attempts = 0
304
  last_meta = {}
305
  generated_text = ""
306
  while attempts < max_attempts:
307
  attempts += 1
308
- # Clear, explicit generation label for UI
309
- yield f"data: {json.dumps({'status': f'Generating β€” LLM {attempts})...'})}\n\n"
310
- await asyncio.sleep(0)
 
311
 
312
  model_inputs = tokenizer(text_prompt, return_tensors="pt", truncation=True, max_length=4096).to(next(model.parameters()).device)
313
 
@@ -363,7 +369,8 @@ async def generate_response_stream(messages: List[Dict[str,str]], max_tokens=600
363
  text_prompt = _build_prompt_from_messages(messages)
364
  except Exception:
365
  text_prompt = _build_prompt_from_messages(messages)
366
- await asyncio.sleep(0.01)
 
367
  continue
368
 
369
  if not generated_text:
 
1
+ # app.py - FINAL: ensure "Reasoning (planner)..." shows during planning (before heavy analysis),
2
+ # then show "Generating β€” LLM (attempt N)..." only when invoking the LLM.
3
  import re
4
  import json
5
  import asyncio
 
106
  tokenizer, model = None, None
107
 
108
  # -------------------------
109
+ # Prompt builder & utils
110
  # -------------------------
111
  def _build_prompt_from_messages(messages: List[Dict[str, str]]) -> str:
112
  parts = []
 
186
  return None, text
187
 
188
  # -------------------------
189
+ # Streaming generator with corrected ordering:
190
+ # Emit "Reasoning (planner)..." first, THEN run planning analysis,
191
+ # then emit "Generating β€” LLM (attempt N)..." for model attempts.
192
  # -------------------------
193
  async def generate_response_stream(messages: List[Dict[str,str]], max_tokens=600, temperature=0.85):
194
  try:
 
207
  yield "data: [DONE]\n\n"
208
  return
209
 
210
+ # Quick initial indicator to keep UI responsive
211
  yield f"data: {json.dumps({'status': 'Thinking...'})}\n\n"
212
  await asyncio.sleep(0)
213
 
214
  intent = analyze_intent(last_user_msg) or "general"
215
 
216
+ # Emit Reasoning indicator BEFORE heavy planning so UI shows it during planning
217
+ yield f"data: {json.dumps({'status': 'Reasoning (planner)...'})}\n\n"
218
+ # small pause to allow UI to render the status before we start analysis
219
+ await asyncio.sleep(0.15)
220
+
221
+ # ---------- PLANNING WORK (now executed while UI shows Reasoning) ----------
222
  try:
223
  flow_context = analyze_flow(messages)
224
  except Exception as e:
 
230
  min_words = plan_req["min_words"]
231
  strictness = plan_req["strictness"]
232
 
233
+ # adjust tokens/temperature if strict
 
 
 
234
  if strictness:
235
  temperature = min(temperature + 0.05, 0.95)
236
  max_tokens = max(max_tokens, min_words // 2 + 120)
 
303
  except Exception:
304
  text_prompt = _build_prompt_from_messages(messages)
305
 
306
+ # ---------- GENERATION STAGE ----------
307
  max_attempts = 2
308
  attempts = 0
309
  last_meta = {}
310
  generated_text = ""
311
  while attempts < max_attempts:
312
  attempts += 1
313
+ # Emit explicit generating label (after planning completed)
314
+ yield f"data: {json.dumps({'status': f'Generating β€” LLM (attempt {attempts})...'})}\n\n"
315
+ # tiny sleep to let UI update
316
+ await asyncio.sleep(0.06)
317
 
318
  model_inputs = tokenizer(text_prompt, return_tensors="pt", truncation=True, max_length=4096).to(next(model.parameters()).device)
319
 
 
369
  text_prompt = _build_prompt_from_messages(messages)
370
  except Exception:
371
  text_prompt = _build_prompt_from_messages(messages)
372
+ # allow a short break so UI shows the attempted generate label
373
+ await asyncio.sleep(0.02)
374
  continue
375
 
376
  if not generated_text: