Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,9 +1,15 @@
|
|
| 1 |
"""
|
| 2 |
-
Nexari Server Backend (Smart Persona Fix)
|
| 3 |
Maintained by: Piyush
|
| 4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
"""
|
| 6 |
|
|
|
|
| 7 |
import spaces
|
| 8 |
from fastapi import FastAPI, Request
|
| 9 |
from fastapi.responses import StreamingResponse
|
|
@@ -13,12 +19,12 @@ import torch
|
|
| 13 |
import uvicorn
|
| 14 |
import json
|
| 15 |
import asyncio
|
| 16 |
-
from ui import create_ui
|
| 17 |
|
| 18 |
# Engine Imports
|
| 19 |
-
from context_engine import get_smart_context
|
| 20 |
-
from cognitive_engine import get_time_context, get_thinking_strategy
|
| 21 |
-
from tools_engine import analyze_intent, perform_web_search
|
| 22 |
|
| 23 |
# --- 1. SYSTEM CONFIGURATION ---
|
| 24 |
MODEL_ID = "Piyush-boss/Nexari-Qwen-3B-Full"
|
|
@@ -27,74 +33,80 @@ print(f">>> System: Initializing model {MODEL_ID} on CPU...")
|
|
| 27 |
|
| 28 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
|
| 29 |
model = AutoModelForCausalLM.from_pretrained(
|
| 30 |
-
MODEL_ID,
|
| 31 |
-
torch_dtype="auto",
|
| 32 |
device_map="cpu",
|
| 33 |
low_cpu_mem_usage=True,
|
| 34 |
trust_remote_code=True
|
| 35 |
)
|
| 36 |
|
| 37 |
# --- 2. DYNAMIC STREAMING LOGIC ---
|
| 38 |
-
async def generate_response_stream(messages, max_tokens=600, temperature=0.85):
|
| 39 |
-
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
| 41 |
# === STEP 1: INTENT ANALYSIS ===
|
| 42 |
yield f"data: {json.dumps({'status': 'Thinking...'})}\n\n"
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
|
|
|
|
|
|
|
|
|
| 46 |
# === STEP 2: DYNAMIC ROUTING ===
|
| 47 |
tool_data = ""
|
| 48 |
time_data = ""
|
| 49 |
vibe_data = ""
|
| 50 |
strategy_data = ""
|
| 51 |
|
| 52 |
-
if intent == "
|
| 53 |
yield f"data: {json.dumps({'status': 'Searching the web...'})}\n\n"
|
| 54 |
-
await asyncio.sleep(0
|
| 55 |
tool_data = perform_web_search(last_user_msg)
|
| 56 |
-
vibe_data = get_smart_context(last_user_msg)
|
| 57 |
strategy_data = get_thinking_strategy(is_complex=True)
|
| 58 |
|
| 59 |
-
elif intent == "
|
| 60 |
yield f"data: {json.dumps({'status': 'Analyzing Logic...'})}\n\n"
|
|
|
|
| 61 |
strategy_data = get_thinking_strategy(is_complex=True)
|
| 62 |
-
|
| 63 |
-
elif intent == "
|
| 64 |
yield f"data: {json.dumps({'status': 'Checking Clock...'})}\n\n"
|
| 65 |
time_data = get_time_context()
|
| 66 |
vibe_data = get_smart_context(last_user_msg)
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
|
|
|
| 70 |
vibe_data = get_smart_context(last_user_msg)
|
| 71 |
strategy_data = get_thinking_strategy(is_complex=False)
|
| 72 |
|
| 73 |
# === STEP 3: THE BALANCED PERSONA PROMPT ===
|
| 74 |
-
# Changes: Humne "Identity Rules" ko "Engagement Rules" ke saath mix kiya hai.
|
| 75 |
-
|
| 76 |
base_system_instruction = (
|
| 77 |
"### SYSTEM IDENTITY ###\n"
|
| 78 |
-
"You are **Nexari G1**,
|
| 79 |
-
"You
|
| 80 |
-
|
| 81 |
-
"### ENGAGEMENT RULES
|
| 82 |
-
"1.
|
| 83 |
-
"
|
| 84 |
-
"
|
| 85 |
-
"
|
| 86 |
-
"
|
| 87 |
-
"4. **Format:** Keep it natural. Use emojis sparingly but effectively."
|
| 88 |
)
|
| 89 |
-
|
| 90 |
final_system_prompt = f"{base_system_instruction}\n{vibe_data}\n{time_data}\n{tool_data}\n{strategy_data}"
|
| 91 |
|
| 92 |
-
if messages[0]
|
| 93 |
messages.insert(0, {"role": "system", "content": final_system_prompt})
|
| 94 |
else:
|
| 95 |
messages[0]["content"] = final_system_prompt
|
| 96 |
|
| 97 |
# === STEP 4: GENERATION ===
|
|
|
|
| 98 |
text_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
| 99 |
model_inputs = tokenizer([text_prompt], return_tensors="pt").to(model.device)
|
| 100 |
|
|
@@ -114,15 +126,50 @@ async def generate_response_stream(messages, max_tokens=600, temperature=0.85):
|
|
| 114 |
input_token_len = model_inputs.input_ids.shape[1]
|
| 115 |
new_tokens = generated_ids[0][input_token_len:]
|
| 116 |
raw_response = tokenizer.decode(new_tokens, skip_special_tokens=True).strip()
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
|
| 123 |
final_payload = json.dumps({
|
| 124 |
"choices": [{
|
| 125 |
-
"delta": {
|
| 126 |
}]
|
| 127 |
})
|
| 128 |
yield f"data: {final_payload}\n\n"
|
|
@@ -149,4 +196,4 @@ demo = create_ui(gradio_gen_wrapper)
|
|
| 149 |
app = gr.mount_gradio_app(app, demo, path="/")
|
| 150 |
|
| 151 |
if __name__ == "__main__":
|
| 152 |
-
uvicorn.run(app, host="0.0.0.0", port=7860)
|
|
|
|
| 1 |
"""
|
| 2 |
+
Nexari Server Backend (Smart Persona Fix) - UPDATED
|
| 3 |
Maintained by: Piyush
|
| 4 |
+
Improvements:
|
| 5 |
+
- Canonical intent labels & robust fallback
|
| 6 |
+
- Safer response cleaning (regex)
|
| 7 |
+
- Persona tone balanced
|
| 8 |
+
- Streaming micro-yield for smoother SSE
|
| 9 |
+
- Safety filter to avoid chain-of-thought leaks or "I'm human" claims
|
| 10 |
"""
|
| 11 |
|
| 12 |
+
import re
|
| 13 |
import spaces
|
| 14 |
from fastapi import FastAPI, Request
|
| 15 |
from fastapi.responses import StreamingResponse
|
|
|
|
| 19 |
import uvicorn
|
| 20 |
import json
|
| 21 |
import asyncio
|
| 22 |
+
from ui import create_ui
|
| 23 |
|
| 24 |
# Engine Imports
|
| 25 |
+
from context_engine import get_smart_context
|
| 26 |
+
from cognitive_engine import get_time_context, get_thinking_strategy
|
| 27 |
+
from tools_engine import analyze_intent, perform_web_search
|
| 28 |
|
| 29 |
# --- 1. SYSTEM CONFIGURATION ---
|
| 30 |
MODEL_ID = "Piyush-boss/Nexari-Qwen-3B-Full"
|
|
|
|
| 33 |
|
| 34 |
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
|
| 35 |
model = AutoModelForCausalLM.from_pretrained(
|
| 36 |
+
MODEL_ID,
|
| 37 |
+
torch_dtype="auto", # keep compatible, let environment decide
|
| 38 |
device_map="cpu",
|
| 39 |
low_cpu_mem_usage=True,
|
| 40 |
trust_remote_code=True
|
| 41 |
)
|
| 42 |
|
| 43 |
# --- 2. DYNAMIC STREAMING LOGIC ---
|
| 44 |
+
async def generate_response_stream(messages, max_tokens=600, temperature=0.85):
|
| 45 |
+
# Expect messages to be a list of dicts with 'role' and 'content'
|
| 46 |
+
if not messages:
|
| 47 |
+
messages = [{"role": "user", "content": ""}]
|
| 48 |
+
last_user_msg = messages[-1].get("content", "")
|
| 49 |
+
|
| 50 |
# === STEP 1: INTENT ANALYSIS ===
|
| 51 |
yield f"data: {json.dumps({'status': 'Thinking...'})}\n\n"
|
| 52 |
+
await asyncio.sleep(0) # micro-yield to event loop for smoother SSE
|
| 53 |
+
|
| 54 |
+
intent = analyze_intent(last_user_msg) or "general"
|
| 55 |
+
# Normalize intent naming (tools_engine returns canonical labels)
|
| 56 |
+
# intent in {"internet_search","coding_request","checking_time","general"}
|
| 57 |
+
|
| 58 |
# === STEP 2: DYNAMIC ROUTING ===
|
| 59 |
tool_data = ""
|
| 60 |
time_data = ""
|
| 61 |
vibe_data = ""
|
| 62 |
strategy_data = ""
|
| 63 |
|
| 64 |
+
if intent == "internet_search":
|
| 65 |
yield f"data: {json.dumps({'status': 'Searching the web...'})}\n\n"
|
| 66 |
+
await asyncio.sleep(0)
|
| 67 |
tool_data = perform_web_search(last_user_msg)
|
| 68 |
+
vibe_data = get_smart_context(last_user_msg)
|
| 69 |
strategy_data = get_thinking_strategy(is_complex=True)
|
| 70 |
|
| 71 |
+
elif intent == "coding_request":
|
| 72 |
yield f"data: {json.dumps({'status': 'Analyzing Logic...'})}\n\n"
|
| 73 |
+
vibe_data = get_smart_context(last_user_msg)
|
| 74 |
strategy_data = get_thinking_strategy(is_complex=True)
|
| 75 |
+
|
| 76 |
+
elif intent == "checking_time":
|
| 77 |
yield f"data: {json.dumps({'status': 'Checking Clock...'})}\n\n"
|
| 78 |
time_data = get_time_context()
|
| 79 |
vibe_data = get_smart_context(last_user_msg)
|
| 80 |
+
strategy_data = get_thinking_strategy(is_complex=False)
|
| 81 |
+
|
| 82 |
+
else: # general
|
| 83 |
+
# Keep UI clean (no extra statuses)
|
| 84 |
vibe_data = get_smart_context(last_user_msg)
|
| 85 |
strategy_data = get_thinking_strategy(is_complex=False)
|
| 86 |
|
| 87 |
# === STEP 3: THE BALANCED PERSONA PROMPT ===
|
|
|
|
|
|
|
| 88 |
base_system_instruction = (
|
| 89 |
"### SYSTEM IDENTITY ###\n"
|
| 90 |
+
"You are **Nexari G1**, an expressive, warm, balanced AI created by **Piyush**.\n"
|
| 91 |
+
"You can code, reason, search the web, and understand emotions.\n\n"
|
| 92 |
+
|
| 93 |
+
"### ENGAGEMENT RULES ###\n"
|
| 94 |
+
"1. Be natural and warm β expressive but NOT overly excited.\n"
|
| 95 |
+
"2. After answering, smoothly reconnect with the user (small follow-up question).\n"
|
| 96 |
+
"3. If asked about capabilities, answer confidently and offer to perform the action.\n"
|
| 97 |
+
"4. Use emojis sparingly (0β2 per message max). Prefer short clear replies for quick chats.\n"
|
| 98 |
+
"5. Do NOT reveal chain-of-thought. Give a concise plan (1-2 lines) if needed, then final answer.\n"
|
|
|
|
| 99 |
)
|
| 100 |
+
|
| 101 |
final_system_prompt = f"{base_system_instruction}\n{vibe_data}\n{time_data}\n{tool_data}\n{strategy_data}"
|
| 102 |
|
| 103 |
+
if messages[0].get("role") != "system":
|
| 104 |
messages.insert(0, {"role": "system", "content": final_system_prompt})
|
| 105 |
else:
|
| 106 |
messages[0]["content"] = final_system_prompt
|
| 107 |
|
| 108 |
# === STEP 4: GENERATION ===
|
| 109 |
+
# Note: tokenizer.apply_chat_template is used in original; keep same behaviour
|
| 110 |
text_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
| 111 |
model_inputs = tokenizer([text_prompt], return_tensors="pt").to(model.device)
|
| 112 |
|
|
|
|
| 126 |
input_token_len = model_inputs.input_ids.shape[1]
|
| 127 |
new_tokens = generated_ids[0][input_token_len:]
|
| 128 |
raw_response = tokenizer.decode(new_tokens, skip_special_tokens=True).strip()
|
| 129 |
+
|
| 130 |
+
# === STEP 5: CLEANING & SAFETY ===
|
| 131 |
+
# Replace certain provider names with "Piyush" only as whole words
|
| 132 |
+
cleaned_response = re.sub(r"\b(Anthropic|OpenAI|Alibaba)\b", "Piyush", raw_response)
|
| 133 |
+
|
| 134 |
+
# Prevent "I am human" or similar claims
|
| 135 |
+
forbidden_claims = ["I am a human", "I have a physical body", "I am alive", "I was born", "I breathe"]
|
| 136 |
+
for fc in forbidden_claims:
|
| 137 |
+
pattern = re.compile(re.escape(fc), re.IGNORECASE)
|
| 138 |
+
if pattern.search(cleaned_response):
|
| 139 |
+
cleaned_response = pattern.sub("I am an AI β expressive and interactive.", cleaned_response)
|
| 140 |
+
|
| 141 |
+
# Remove any leaked chain-of-thought markers (e.g., long 'Thought:' sections)
|
| 142 |
+
# Keep only last 'Answer' block if both present
|
| 143 |
+
if "Thought:" in cleaned_response or "π§ " in cleaned_response:
|
| 144 |
+
# Try to keep a short plan, not full private chain-of-thought
|
| 145 |
+
# Prefer 'π§ Plan:' style if model provided that; else strip long sections
|
| 146 |
+
if "π§ Plan:" in cleaned_response:
|
| 147 |
+
# keep Plan (first ~120 chars) and the Answer block
|
| 148 |
+
parts = cleaned_response.split("π‘")
|
| 149 |
+
plan_part = ""
|
| 150 |
+
answer_part = cleaned_response
|
| 151 |
+
for p in parts:
|
| 152 |
+
if "π§ Plan:" in p:
|
| 153 |
+
plan_part = p.strip()
|
| 154 |
+
if "Answer:" in p or "Answer" in p:
|
| 155 |
+
answer_part = "π‘" + p
|
| 156 |
+
# constrain plan to short size
|
| 157 |
+
if plan_part:
|
| 158 |
+
plan_short = plan_part.splitlines()[:3]
|
| 159 |
+
cleaned_response = "\n".join(plan_short) + "\n\n" + answer_part
|
| 160 |
+
else:
|
| 161 |
+
# fallback: remove everything before the first 'Answer' or keep last 800 chars
|
| 162 |
+
if "Answer" in cleaned_response:
|
| 163 |
+
cleaned_response = cleaned_response.split("Answer", 1)[-1]
|
| 164 |
+
else:
|
| 165 |
+
cleaned_response = cleaned_response[-1600:] # keep last chunk
|
| 166 |
+
|
| 167 |
+
# Cosmetic: if model used a marker for Thinking->Answer, ensure formatting
|
| 168 |
+
cleaned_response = cleaned_response.replace("π‘ **Answer:**", "\n\n---\nπ‘ **Answer:**")
|
| 169 |
|
| 170 |
final_payload = json.dumps({
|
| 171 |
"choices": [{
|
| 172 |
+
"delta": {"content": cleaned_response}
|
| 173 |
}]
|
| 174 |
})
|
| 175 |
yield f"data: {final_payload}\n\n"
|
|
|
|
| 196 |
app = gr.mount_gradio_app(app, demo, path="/")
|
| 197 |
|
| 198 |
if __name__ == "__main__":
|
| 199 |
+
uvicorn.run(app, host="0.0.0.0", port=7860)
|