Spaces:

Nexari-Research
/

Nexari-Server

Sleeping

App Files Files Community

Nexari-Research commited on 10 days ago

Commit

8754ff8

verified ·

1 Parent(s): 9f47f84

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -14

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
-# app.py - UPDATED: explicit "Reasoning (planner)..." and "Generating — LLM (attempt N)..." status labels
 import re
 import json
 import asyncio
@@ -105,7 +106,7 @@ async def startup_event():
         tokenizer, model = None, None
 # -------------------------
-# Prompt builder
 # -------------------------
 def _build_prompt_from_messages(messages: List[Dict[str, str]]) -> str:
     parts = []
@@ -185,7 +186,9 @@ def extract_and_sanitize_plan(text: str, max_plan_chars: int = 240) -> (str, str
     return None, text
 # -------------------------
-# Streaming generator with explicit Reasoning + Generating labels
 # -------------------------
 async def generate_response_stream(messages: List[Dict[str,str]], max_tokens=600, temperature=0.85):
     try:
@@ -204,13 +207,18 @@ async def generate_response_stream(messages: List[Dict[str,str]], max_tokens=600
             yield "data: [DONE]\n\n"
             return
-        # Quick initial indicator (keeps UI responsive)
         yield f"data: {json.dumps({'status': 'Thinking...'})}\n\n"
         await asyncio.sleep(0)
         intent = analyze_intent(last_user_msg) or "general"
-        # ---------- PLANNING STAGE (Reasoning - planner) ----------
         try:
             flow_context = analyze_flow(messages)
         except Exception as e:
@@ -222,10 +230,7 @@ async def generate_response_stream(messages: List[Dict[str,str]], max_tokens=600
         min_words = plan_req["min_words"]
         strictness = plan_req["strictness"]
-        # explicit planner status the UI expects
-        yield f"data: {json.dumps({'status': 'Reasoning (planner)...'})}\n\n"
-        await asyncio.sleep(0)
         if strictness:
             temperature = min(temperature + 0.05, 0.95)
             max_tokens = max(max_tokens, min_words // 2 + 120)
@@ -298,16 +303,17 @@ async def generate_response_stream(messages: List[Dict[str,str]], max_tokens=600
         except Exception:
             text_prompt = _build_prompt_from_messages(messages)
-        # ---------- GENERATION STAGE (Generating — LLM (attempt N)) ----------
         max_attempts = 2
         attempts = 0
         last_meta = {}
         generated_text = ""
         while attempts < max_attempts:
             attempts += 1
-            # Clear, explicit generation label for UI
-            yield f"data: {json.dumps({'status': f'Generating — LLM {attempts})...'})}\n\n"
-            await asyncio.sleep(0)
             model_inputs = tokenizer(text_prompt, return_tensors="pt", truncation=True, max_length=4096).to(next(model.parameters()).device)
@@ -363,7 +369,8 @@ async def generate_response_stream(messages: List[Dict[str,str]], max_tokens=600
                         text_prompt = _build_prompt_from_messages(messages)
                 except Exception:
                     text_prompt = _build_prompt_from_messages(messages)
-                await asyncio.sleep(0.01)
                 continue
         if not generated_text:

+# app.py - FINAL: ensure "Reasoning (planner)..." shows during planning (before heavy analysis),
+# then show "Generating — LLM (attempt N)..." only when invoking the LLM.
 import re
 import json
 import asyncio
         tokenizer, model = None, None
 # -------------------------
+# Prompt builder & utils
 # -------------------------
 def _build_prompt_from_messages(messages: List[Dict[str, str]]) -> str:
     parts = []
     return None, text
 # -------------------------
+# Streaming generator with corrected ordering:
+# Emit "Reasoning (planner)..." first, THEN run planning analysis,
+# then emit "Generating — LLM (attempt N)..." for model attempts.
 # -------------------------
 async def generate_response_stream(messages: List[Dict[str,str]], max_tokens=600, temperature=0.85):
     try:
             yield "data: [DONE]\n\n"
             return
+        # Quick initial indicator to keep UI responsive
         yield f"data: {json.dumps({'status': 'Thinking...'})}\n\n"
         await asyncio.sleep(0)
         intent = analyze_intent(last_user_msg) or "general"
+        # Emit Reasoning indicator BEFORE heavy planning so UI shows it during planning
+        yield f"data: {json.dumps({'status': 'Reasoning (planner)...'})}\n\n"
+        # small pause to allow UI to render the status before we start analysis
+        await asyncio.sleep(0.15)
+        # ---------- PLANNING WORK (now executed while UI shows Reasoning) ----------
         try:
             flow_context = analyze_flow(messages)
         except Exception as e:
         min_words = plan_req["min_words"]
         strictness = plan_req["strictness"]
+        # adjust tokens/temperature if strict
         if strictness:
             temperature = min(temperature + 0.05, 0.95)
             max_tokens = max(max_tokens, min_words // 2 + 120)
         except Exception:
             text_prompt = _build_prompt_from_messages(messages)
+        # ---------- GENERATION STAGE ----------
         max_attempts = 2
         attempts = 0
         last_meta = {}
         generated_text = ""
         while attempts < max_attempts:
             attempts += 1
+            # Emit explicit generating label (after planning completed)
+            yield f"data: {json.dumps({'status': f'Generating — LLM (attempt {attempts})...'})}\n\n"
+            # tiny sleep to let UI update
+            await asyncio.sleep(0.06)
             model_inputs = tokenizer(text_prompt, return_tensors="pt", truncation=True, max_length=4096).to(next(model.parameters()).device)
                         text_prompt = _build_prompt_from_messages(messages)
                 except Exception:
                     text_prompt = _build_prompt_from_messages(messages)
+                # allow a short break so UI shows the attempted generate label
+                await asyncio.sleep(0.02)
                 continue
         if not generated_text: