Spaces:

Nexari-Research
/

Nexari-Server

Sleeping

App Files Files Community

Nexari-Research commited on 10 days ago

Commit

3bca02a

verified ·

1 Parent(s): 05875d7

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -14

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# app.py - UPDATED: show "Reasoning..." during planning stage and "Generating (attempt N)..." for LLM generation
 import re
 import json
 import asyncio
@@ -185,7 +185,7 @@ def extract_and_sanitize_plan(text: str, max_plan_chars: int = 240) -> (str, str
     return None, text
 # -------------------------
-# Streaming generator with Reasoning indicator + regeneration
 # -------------------------
 async def generate_response_stream(messages: List[Dict[str,str]], max_tokens=600, temperature=0.85):
     try:
@@ -204,14 +204,13 @@ async def generate_response_stream(messages: List[Dict[str,str]], max_tokens=600
             yield "data: [DONE]\n\n"
             return
-        # initial quick ack
         yield f"data: {json.dumps({'status': 'Thinking...'})}\n\n"
         await asyncio.sleep(0)
         intent = analyze_intent(last_user_msg) or "general"
-        # ---------- PLANNING STAGE (Reasoning...) ----------
-        # Compute flow context and vibe and plan requirements BEFORE calling the LLM
         try:
             flow_context = analyze_flow(messages)
         except Exception as e:
@@ -223,12 +222,10 @@ async def generate_response_stream(messages: List[Dict[str,str]], max_tokens=600
         min_words = plan_req["min_words"]
         strictness = plan_req["strictness"]
-        # Inform UI that we finished planning and are now reasoning (planning-model stage)
-        # This is the explicit "Reasoning..." indicator the user requested.
-        yield f"data: {json.dumps({'status': 'Reasoning...'})}\n\n"
         await asyncio.sleep(0)
-        # adjust tokens/temperature if strict
         if strictness:
             temperature = min(temperature + 0.05, 0.95)
             max_tokens = max(max_tokens, min_words // 2 + 120)
@@ -254,13 +251,12 @@ async def generate_response_stream(messages: List[Dict[str,str]], max_tokens=600
         final_system_prompt = f"{base_system_instruction}\n{flow_desc}\n{vibe_block}\n{time_data}\n{strategy_data}"
-        # ensure system message present
         if messages and messages[0].get("role") == "system":
             messages[0]["content"] = final_system_prompt
         else:
             messages.insert(0, {"role":"system","content": final_system_prompt})
-        # web search (if needed)
         tool_data_struct = None
         if intent == "internet_search":
             yield f"data: {json.dumps({'status': 'Searching the web...'})}\n\n"
@@ -302,15 +298,15 @@ async def generate_response_stream(messages: List[Dict[str,str]], max_tokens=600
         except Exception:
             text_prompt = _build_prompt_from_messages(messages)
-        # ---------- GENERATION STAGE (Generating...) ----------
         max_attempts = 2
         attempts = 0
         last_meta = {}
         generated_text = ""
         while attempts < max_attempts:
             attempts += 1
-            # Inform UI we're now invoking the LLM: explicit "Generating (attempt N)..."
-            yield f"data: {json.dumps({'status': f'Generating (attempt {attempts})...'})}\n\n"
             await asyncio.sleep(0)
             model_inputs = tokenizer(text_prompt, return_tensors="pt", truncation=True, max_length=4096).to(next(model.parameters()).device)

+# app.py - UPDATED: explicit "Reasoning (planner)..." and "Generating — LLM (attempt N)..." status labels
 import re
 import json
 import asyncio
     return None, text
 # -------------------------
+# Streaming generator with explicit Reasoning + Generating labels
 # -------------------------
 async def generate_response_stream(messages: List[Dict[str,str]], max_tokens=600, temperature=0.85):
     try:
             yield "data: [DONE]\n\n"
             return
+        # Quick initial indicator (keeps UI responsive)
         yield f"data: {json.dumps({'status': 'Thinking...'})}\n\n"
         await asyncio.sleep(0)
         intent = analyze_intent(last_user_msg) or "general"
+        # ---------- PLANNING STAGE (Reasoning - planner) ----------
         try:
             flow_context = analyze_flow(messages)
         except Exception as e:
         min_words = plan_req["min_words"]
         strictness = plan_req["strictness"]
+        # explicit planner status the UI expects
+        yield f"data: {json.dumps({'status': 'Reasoning (planner)...'})}\n\n"
         await asyncio.sleep(0)
         if strictness:
             temperature = min(temperature + 0.05, 0.95)
             max_tokens = max(max_tokens, min_words // 2 + 120)
         final_system_prompt = f"{base_system_instruction}\n{flow_desc}\n{vibe_block}\n{time_data}\n{strategy_data}"
         if messages and messages[0].get("role") == "system":
             messages[0]["content"] = final_system_prompt
         else:
             messages.insert(0, {"role":"system","content": final_system_prompt})
+        # web search if needed
         tool_data_struct = None
         if intent == "internet_search":
             yield f"data: {json.dumps({'status': 'Searching the web...'})}\n\n"
         except Exception:
             text_prompt = _build_prompt_from_messages(messages)
+        # ---------- GENERATION STAGE (Generating — LLM (attempt N)) ----------
         max_attempts = 2
         attempts = 0
         last_meta = {}
         generated_text = ""
         while attempts < max_attempts:
             attempts += 1
+            # Clear, explicit generation label for UI
+            yield f"data: {json.dumps({'status': f'Generating — LLM (attempt {attempts})...'})}\n\n"
             await asyncio.sleep(0)
             model_inputs = tokenizer(text_prompt, return_tensors="pt", truncation=True, max_length=4096).to(next(model.parameters()).device)