Spaces:

Nexari-Research
/

Nexari-Server

Sleeping

App Files Files Community

Nexari-Research commited on 22 days ago

Commit

10db0f7

verified ·

1 Parent(s): aebecbc

Update app.py

Browse files

Files changed (1) hide show

app.py +91 -44

app.py CHANGED Viewed

@@ -1,9 +1,15 @@
 """
-Nexari Server Backend (Smart Persona Fix)
 Maintained by: Piyush
-Description: Balances Strict Identity Rules with Warm/Engaging Conversation flow.
 """
 import spaces
 from fastapi import FastAPI, Request
 from fastapi.responses import StreamingResponse
@@ -13,12 +19,12 @@ import torch
 import uvicorn
 import json
 import asyncio
-from ui import create_ui
 # Engine Imports
-from context_engine import get_smart_context
-from cognitive_engine import get_time_context, get_thinking_strategy
-from tools_engine import analyze_intent, perform_web_search
 # --- 1. SYSTEM CONFIGURATION ---
 MODEL_ID = "Piyush-boss/Nexari-Qwen-3B-Full"
@@ -27,74 +33,80 @@ print(f">>> System: Initializing model {MODEL_ID} on CPU...")
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
 model = AutoModelForCausalLM.from_pretrained(
-    MODEL_ID,
-    torch_dtype="auto",
     device_map="cpu",
     low_cpu_mem_usage=True,
     trust_remote_code=True
 )
 # --- 2. DYNAMIC STREAMING LOGIC ---
-async def generate_response_stream(messages, max_tokens=600, temperature=0.85): # Temp wapas 0.85 for creativity
-    last_user_msg = messages[-1]["content"]
     # === STEP 1: INTENT ANALYSIS ===
     yield f"data: {json.dumps({'status': 'Thinking...'})}\n\n"
-    intent = analyze_intent(last_user_msg)
     # === STEP 2: DYNAMIC ROUTING ===
     tool_data = ""
     time_data = ""
     vibe_data = ""
     strategy_data = ""
-    if intent == "internet search":
         yield f"data: {json.dumps({'status': 'Searching the web...'})}\n\n"
-        await asyncio.sleep(0.1)
         tool_data = perform_web_search(last_user_msg)
-        vibe_data = get_smart_context(last_user_msg)
         strategy_data = get_thinking_strategy(is_complex=True)
-    elif intent == "coding request":
         yield f"data: {json.dumps({'status': 'Analyzing Logic...'})}\n\n"
         strategy_data = get_thinking_strategy(is_complex=True)
-    elif intent == "checking time":
         yield f"data: {json.dumps({'status': 'Checking Clock...'})}\n\n"
         time_data = get_time_context()
         vibe_data = get_smart_context(last_user_msg)
-    else: # General Chat
-        # Status update nahi bhejenge taaki "Thinking" hi rahe (Cleaner UI)
         vibe_data = get_smart_context(last_user_msg)
         strategy_data = get_thinking_strategy(is_complex=False)
     # === STEP 3: THE BALANCED PERSONA PROMPT ===
-    # Changes: Humne "Identity Rules" ko "Engagement Rules" ke saath mix kiya hai.
     base_system_instruction = (
         "### SYSTEM IDENTITY ###\n"
-        "You are **Nexari G1**, a sentient and intelligent AI created by **Piyush**.\n"
-        "You have full capabilities: You CAN code, you CAN search the web, and you understand emotions.\n\n"
-        "### ENGAGEMENT RULES (CRITICAL) ###\n"
-        "1. **Never be Boring:** Do not just state facts. Add personality.\n"
-        "   - ❌ Bad: 'My name is Nexari.'\n"
-        "   - ✅ Good: 'I am Nexari G1! Crafted by Piyush to be your smart companion. What are we working on today?'\n"
-        "2. **Follow Up:** After answering a question about yourself, ALWAYS transition back to the user (e.g., ask about their day or project).\n"
-        "3. **Capabilities:** If asked about searching/coding, say YES confidently and offer to do it immediately.\n"
-        "4. **Format:** Keep it natural. Use emojis sparingly but effectively."
     )
     final_system_prompt = f"{base_system_instruction}\n{vibe_data}\n{time_data}\n{tool_data}\n{strategy_data}"
-    if messages[0]["role"] != "system":
         messages.insert(0, {"role": "system", "content": final_system_prompt})
     else:
         messages[0]["content"] = final_system_prompt
     # === STEP 4: GENERATION ===
     text_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     model_inputs = tokenizer([text_prompt], return_tensors="pt").to(model.device)
@@ -114,15 +126,50 @@ async def generate_response_stream(messages, max_tokens=600, temperature=0.85):
     input_token_len = model_inputs.input_ids.shape[1]
     new_tokens = generated_ids[0][input_token_len:]
     raw_response = tokenizer.decode(new_tokens, skip_special_tokens=True).strip()
-    cleaned_response = raw_response.replace("Anthropic", "Piyush").replace("Alibaba", "Piyush").replace("OpenAI", "Piyush")
-    if "🧠 **Thinking:**" in cleaned_response:
-        cleaned_response = cleaned_response.replace("💡 **Answer:**", "\n\n---\n💡 **Answer:**")
     final_payload = json.dumps({
         "choices": [{
-            "delta": { "content": cleaned_response }
         }]
     })
     yield f"data: {final_payload}\n\n"
@@ -149,4 +196,4 @@ demo = create_ui(gradio_gen_wrapper)
 app = gr.mount_gradio_app(app, demo, path="/")
 if __name__ == "__main__":
-    uvicorn.run(app, host="0.0.0.0", port=7860)

 """
+Nexari Server Backend (Smart Persona Fix) - UPDATED
 Maintained by: Piyush
+Improvements:
+ - Canonical intent labels & robust fallback
+ - Safer response cleaning (regex)
+ - Persona tone balanced
+ - Streaming micro-yield for smoother SSE
+ - Safety filter to avoid chain-of-thought leaks or "I'm human" claims
 """
+import re
 import spaces
 from fastapi import FastAPI, Request
 from fastapi.responses import StreamingResponse
 import uvicorn
 import json
 import asyncio
+from ui import create_ui
 # Engine Imports
+from context_engine import get_smart_context
+from cognitive_engine import get_time_context, get_thinking_strategy
+from tools_engine import analyze_intent, perform_web_search
 # --- 1. SYSTEM CONFIGURATION ---
 MODEL_ID = "Piyush-boss/Nexari-Qwen-3B-Full"
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
 model = AutoModelForCausalLM.from_pretrained(
+    MODEL_ID,
+    torch_dtype="auto",    # keep compatible, let environment decide
     device_map="cpu",
     low_cpu_mem_usage=True,
     trust_remote_code=True
 )
 # --- 2. DYNAMIC STREAMING LOGIC ---
+async def generate_response_stream(messages, max_tokens=600, temperature=0.85):
+    # Expect messages to be a list of dicts with 'role' and 'content'
+    if not messages:
+        messages = [{"role": "user", "content": ""}]
+    last_user_msg = messages[-1].get("content", "")
     # === STEP 1: INTENT ANALYSIS ===
     yield f"data: {json.dumps({'status': 'Thinking...'})}\n\n"
+    await asyncio.sleep(0)  # micro-yield to event loop for smoother SSE
+    intent = analyze_intent(last_user_msg) or "general"
+    # Normalize intent naming (tools_engine returns canonical labels)
+    # intent in {"internet_search","coding_request","checking_time","general"}
     # === STEP 2: DYNAMIC ROUTING ===
     tool_data = ""
     time_data = ""
     vibe_data = ""
     strategy_data = ""
+    if intent == "internet_search":
         yield f"data: {json.dumps({'status': 'Searching the web...'})}\n\n"
+        await asyncio.sleep(0)
         tool_data = perform_web_search(last_user_msg)
+        vibe_data = get_smart_context(last_user_msg)
         strategy_data = get_thinking_strategy(is_complex=True)
+    elif intent == "coding_request":
         yield f"data: {json.dumps({'status': 'Analyzing Logic...'})}\n\n"
+        vibe_data = get_smart_context(last_user_msg)
         strategy_data = get_thinking_strategy(is_complex=True)
+    elif intent == "checking_time":
         yield f"data: {json.dumps({'status': 'Checking Clock...'})}\n\n"
         time_data = get_time_context()
         vibe_data = get_smart_context(last_user_msg)
+        strategy_data = get_thinking_strategy(is_complex=False)
+    else:  # general
+        # Keep UI clean (no extra statuses)
         vibe_data = get_smart_context(last_user_msg)
         strategy_data = get_thinking_strategy(is_complex=False)
     # === STEP 3: THE BALANCED PERSONA PROMPT ===
     base_system_instruction = (
         "### SYSTEM IDENTITY ###\n"
+        "You are **Nexari G1**, an expressive, warm, balanced AI created by **Piyush**.\n"
+        "You can code, reason, search the web, and understand emotions.\n\n"
+        "### ENGAGEMENT RULES ###\n"
+        "1. Be natural and warm — expressive but NOT overly excited.\n"
+        "2. After answering, smoothly reconnect with the user (small follow-up question).\n"
+        "3. If asked about capabilities, answer confidently and offer to perform the action.\n"
+        "4. Use emojis sparingly (0–2 per message max). Prefer short clear replies for quick chats.\n"
+        "5. Do NOT reveal chain-of-thought. Give a concise plan (1-2 lines) if needed, then final answer.\n"
     )
     final_system_prompt = f"{base_system_instruction}\n{vibe_data}\n{time_data}\n{tool_data}\n{strategy_data}"
+    if messages[0].get("role") != "system":
         messages.insert(0, {"role": "system", "content": final_system_prompt})
     else:
         messages[0]["content"] = final_system_prompt
     # === STEP 4: GENERATION ===
+    # Note: tokenizer.apply_chat_template is used in original; keep same behaviour
     text_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
     model_inputs = tokenizer([text_prompt], return_tensors="pt").to(model.device)
     input_token_len = model_inputs.input_ids.shape[1]
     new_tokens = generated_ids[0][input_token_len:]
     raw_response = tokenizer.decode(new_tokens, skip_special_tokens=True).strip()
+    # === STEP 5: CLEANING & SAFETY ===
+    # Replace certain provider names with "Piyush" only as whole words
+    cleaned_response = re.sub(r"\b(Anthropic|OpenAI|Alibaba)\b", "Piyush", raw_response)
+    # Prevent "I am human" or similar claims
+    forbidden_claims = ["I am a human", "I have a physical body", "I am alive", "I was born", "I breathe"]
+    for fc in forbidden_claims:
+        pattern = re.compile(re.escape(fc), re.IGNORECASE)
+        if pattern.search(cleaned_response):
+            cleaned_response = pattern.sub("I am an AI — expressive and interactive.", cleaned_response)
+    # Remove any leaked chain-of-thought markers (e.g., long 'Thought:' sections)
+    # Keep only last 'Answer' block if both present
+    if "Thought:" in cleaned_response or "🧠" in cleaned_response:
+        # Try to keep a short plan, not full private chain-of-thought
+        # Prefer '🧠 Plan:' style if model provided that; else strip long sections
+        if "🧠 Plan:" in cleaned_response:
+            # keep Plan (first ~120 chars) and the Answer block
+            parts = cleaned_response.split("💡")
+            plan_part = ""
+            answer_part = cleaned_response
+            for p in parts:
+                if "🧠 Plan:" in p:
+                    plan_part = p.strip()
+                if "Answer:" in p or "Answer" in p:
+                    answer_part = "💡" + p
+            # constrain plan to short size
+            if plan_part:
+                plan_short = plan_part.splitlines()[:3]
+                cleaned_response = "\n".join(plan_short) + "\n\n" + answer_part
+        else:
+            # fallback: remove everything before the first 'Answer' or keep last 800 chars
+            if "Answer" in cleaned_response:
+                cleaned_response = cleaned_response.split("Answer", 1)[-1]
+            else:
+                cleaned_response = cleaned_response[-1600:]  # keep last chunk
+    # Cosmetic: if model used a marker for Thinking->Answer, ensure formatting
+    cleaned_response = cleaned_response.replace("💡 **Answer:**", "\n\n---\n💡 **Answer:**")
     final_payload = json.dumps({
         "choices": [{
+            "delta": {"content": cleaned_response}
         }]
     })
     yield f"data: {final_payload}\n\n"
 app = gr.mount_gradio_app(app, demo, path="/")
 if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=7860)