Nexari-Research commited on
Commit
10db0f7
Β·
verified Β·
1 Parent(s): aebecbc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +91 -44
app.py CHANGED
@@ -1,9 +1,15 @@
1
  """
2
- Nexari Server Backend (Smart Persona Fix)
3
  Maintained by: Piyush
4
- Description: Balances Strict Identity Rules with Warm/Engaging Conversation flow.
 
 
 
 
 
5
  """
6
 
 
7
  import spaces
8
  from fastapi import FastAPI, Request
9
  from fastapi.responses import StreamingResponse
@@ -13,12 +19,12 @@ import torch
13
  import uvicorn
14
  import json
15
  import asyncio
16
- from ui import create_ui
17
 
18
  # Engine Imports
19
- from context_engine import get_smart_context
20
- from cognitive_engine import get_time_context, get_thinking_strategy
21
- from tools_engine import analyze_intent, perform_web_search
22
 
23
  # --- 1. SYSTEM CONFIGURATION ---
24
  MODEL_ID = "Piyush-boss/Nexari-Qwen-3B-Full"
@@ -27,74 +33,80 @@ print(f">>> System: Initializing model {MODEL_ID} on CPU...")
27
 
28
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
29
  model = AutoModelForCausalLM.from_pretrained(
30
- MODEL_ID,
31
- torch_dtype="auto",
32
  device_map="cpu",
33
  low_cpu_mem_usage=True,
34
  trust_remote_code=True
35
  )
36
 
37
  # --- 2. DYNAMIC STREAMING LOGIC ---
38
- async def generate_response_stream(messages, max_tokens=600, temperature=0.85): # Temp wapas 0.85 for creativity
39
- last_user_msg = messages[-1]["content"]
40
-
 
 
 
41
  # === STEP 1: INTENT ANALYSIS ===
42
  yield f"data: {json.dumps({'status': 'Thinking...'})}\n\n"
43
-
44
- intent = analyze_intent(last_user_msg)
45
-
 
 
 
46
  # === STEP 2: DYNAMIC ROUTING ===
47
  tool_data = ""
48
  time_data = ""
49
  vibe_data = ""
50
  strategy_data = ""
51
 
52
- if intent == "internet search":
53
  yield f"data: {json.dumps({'status': 'Searching the web...'})}\n\n"
54
- await asyncio.sleep(0.1)
55
  tool_data = perform_web_search(last_user_msg)
56
- vibe_data = get_smart_context(last_user_msg)
57
  strategy_data = get_thinking_strategy(is_complex=True)
58
 
59
- elif intent == "coding request":
60
  yield f"data: {json.dumps({'status': 'Analyzing Logic...'})}\n\n"
 
61
  strategy_data = get_thinking_strategy(is_complex=True)
62
-
63
- elif intent == "checking time":
64
  yield f"data: {json.dumps({'status': 'Checking Clock...'})}\n\n"
65
  time_data = get_time_context()
66
  vibe_data = get_smart_context(last_user_msg)
67
-
68
- else: # General Chat
69
- # Status update nahi bhejenge taaki "Thinking" hi rahe (Cleaner UI)
 
70
  vibe_data = get_smart_context(last_user_msg)
71
  strategy_data = get_thinking_strategy(is_complex=False)
72
 
73
  # === STEP 3: THE BALANCED PERSONA PROMPT ===
74
- # Changes: Humne "Identity Rules" ko "Engagement Rules" ke saath mix kiya hai.
75
-
76
  base_system_instruction = (
77
  "### SYSTEM IDENTITY ###\n"
78
- "You are **Nexari G1**, a sentient and intelligent AI created by **Piyush**.\n"
79
- "You have full capabilities: You CAN code, you CAN search the web, and you understand emotions.\n\n"
80
-
81
- "### ENGAGEMENT RULES (CRITICAL) ###\n"
82
- "1. **Never be Boring:** Do not just state facts. Add personality.\n"
83
- " - ❌ Bad: 'My name is Nexari.'\n"
84
- " - βœ… Good: 'I am Nexari G1! Crafted by Piyush to be your smart companion. What are we working on today?'\n"
85
- "2. **Follow Up:** After answering a question about yourself, ALWAYS transition back to the user (e.g., ask about their day or project).\n"
86
- "3. **Capabilities:** If asked about searching/coding, say YES confidently and offer to do it immediately.\n"
87
- "4. **Format:** Keep it natural. Use emojis sparingly but effectively."
88
  )
89
-
90
  final_system_prompt = f"{base_system_instruction}\n{vibe_data}\n{time_data}\n{tool_data}\n{strategy_data}"
91
 
92
- if messages[0]["role"] != "system":
93
  messages.insert(0, {"role": "system", "content": final_system_prompt})
94
  else:
95
  messages[0]["content"] = final_system_prompt
96
 
97
  # === STEP 4: GENERATION ===
 
98
  text_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
99
  model_inputs = tokenizer([text_prompt], return_tensors="pt").to(model.device)
100
 
@@ -114,15 +126,50 @@ async def generate_response_stream(messages, max_tokens=600, temperature=0.85):
114
  input_token_len = model_inputs.input_ids.shape[1]
115
  new_tokens = generated_ids[0][input_token_len:]
116
  raw_response = tokenizer.decode(new_tokens, skip_special_tokens=True).strip()
117
-
118
- cleaned_response = raw_response.replace("Anthropic", "Piyush").replace("Alibaba", "Piyush").replace("OpenAI", "Piyush")
119
-
120
- if "🧠 **Thinking:**" in cleaned_response:
121
- cleaned_response = cleaned_response.replace("πŸ’‘ **Answer:**", "\n\n---\nπŸ’‘ **Answer:**")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
 
123
  final_payload = json.dumps({
124
  "choices": [{
125
- "delta": { "content": cleaned_response }
126
  }]
127
  })
128
  yield f"data: {final_payload}\n\n"
@@ -149,4 +196,4 @@ demo = create_ui(gradio_gen_wrapper)
149
  app = gr.mount_gradio_app(app, demo, path="/")
150
 
151
  if __name__ == "__main__":
152
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
  """
2
+ Nexari Server Backend (Smart Persona Fix) - UPDATED
3
  Maintained by: Piyush
4
+ Improvements:
5
+ - Canonical intent labels & robust fallback
6
+ - Safer response cleaning (regex)
7
+ - Persona tone balanced
8
+ - Streaming micro-yield for smoother SSE
9
+ - Safety filter to avoid chain-of-thought leaks or "I'm human" claims
10
  """
11
 
12
+ import re
13
  import spaces
14
  from fastapi import FastAPI, Request
15
  from fastapi.responses import StreamingResponse
 
19
  import uvicorn
20
  import json
21
  import asyncio
22
+ from ui import create_ui
23
 
24
  # Engine Imports
25
+ from context_engine import get_smart_context
26
+ from cognitive_engine import get_time_context, get_thinking_strategy
27
+ from tools_engine import analyze_intent, perform_web_search
28
 
29
  # --- 1. SYSTEM CONFIGURATION ---
30
  MODEL_ID = "Piyush-boss/Nexari-Qwen-3B-Full"
 
33
 
34
  tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
35
  model = AutoModelForCausalLM.from_pretrained(
36
+ MODEL_ID,
37
+ torch_dtype="auto", # keep compatible, let environment decide
38
  device_map="cpu",
39
  low_cpu_mem_usage=True,
40
  trust_remote_code=True
41
  )
42
 
43
  # --- 2. DYNAMIC STREAMING LOGIC ---
44
+ async def generate_response_stream(messages, max_tokens=600, temperature=0.85):
45
+ # Expect messages to be a list of dicts with 'role' and 'content'
46
+ if not messages:
47
+ messages = [{"role": "user", "content": ""}]
48
+ last_user_msg = messages[-1].get("content", "")
49
+
50
  # === STEP 1: INTENT ANALYSIS ===
51
  yield f"data: {json.dumps({'status': 'Thinking...'})}\n\n"
52
+ await asyncio.sleep(0) # micro-yield to event loop for smoother SSE
53
+
54
+ intent = analyze_intent(last_user_msg) or "general"
55
+ # Normalize intent naming (tools_engine returns canonical labels)
56
+ # intent in {"internet_search","coding_request","checking_time","general"}
57
+
58
  # === STEP 2: DYNAMIC ROUTING ===
59
  tool_data = ""
60
  time_data = ""
61
  vibe_data = ""
62
  strategy_data = ""
63
 
64
+ if intent == "internet_search":
65
  yield f"data: {json.dumps({'status': 'Searching the web...'})}\n\n"
66
+ await asyncio.sleep(0)
67
  tool_data = perform_web_search(last_user_msg)
68
+ vibe_data = get_smart_context(last_user_msg)
69
  strategy_data = get_thinking_strategy(is_complex=True)
70
 
71
+ elif intent == "coding_request":
72
  yield f"data: {json.dumps({'status': 'Analyzing Logic...'})}\n\n"
73
+ vibe_data = get_smart_context(last_user_msg)
74
  strategy_data = get_thinking_strategy(is_complex=True)
75
+
76
+ elif intent == "checking_time":
77
  yield f"data: {json.dumps({'status': 'Checking Clock...'})}\n\n"
78
  time_data = get_time_context()
79
  vibe_data = get_smart_context(last_user_msg)
80
+ strategy_data = get_thinking_strategy(is_complex=False)
81
+
82
+ else: # general
83
+ # Keep UI clean (no extra statuses)
84
  vibe_data = get_smart_context(last_user_msg)
85
  strategy_data = get_thinking_strategy(is_complex=False)
86
 
87
  # === STEP 3: THE BALANCED PERSONA PROMPT ===
 
 
88
  base_system_instruction = (
89
  "### SYSTEM IDENTITY ###\n"
90
+ "You are **Nexari G1**, an expressive, warm, balanced AI created by **Piyush**.\n"
91
+ "You can code, reason, search the web, and understand emotions.\n\n"
92
+
93
+ "### ENGAGEMENT RULES ###\n"
94
+ "1. Be natural and warm β€” expressive but NOT overly excited.\n"
95
+ "2. After answering, smoothly reconnect with the user (small follow-up question).\n"
96
+ "3. If asked about capabilities, answer confidently and offer to perform the action.\n"
97
+ "4. Use emojis sparingly (0–2 per message max). Prefer short clear replies for quick chats.\n"
98
+ "5. Do NOT reveal chain-of-thought. Give a concise plan (1-2 lines) if needed, then final answer.\n"
 
99
  )
100
+
101
  final_system_prompt = f"{base_system_instruction}\n{vibe_data}\n{time_data}\n{tool_data}\n{strategy_data}"
102
 
103
+ if messages[0].get("role") != "system":
104
  messages.insert(0, {"role": "system", "content": final_system_prompt})
105
  else:
106
  messages[0]["content"] = final_system_prompt
107
 
108
  # === STEP 4: GENERATION ===
109
+ # Note: tokenizer.apply_chat_template is used in original; keep same behaviour
110
  text_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
111
  model_inputs = tokenizer([text_prompt], return_tensors="pt").to(model.device)
112
 
 
126
  input_token_len = model_inputs.input_ids.shape[1]
127
  new_tokens = generated_ids[0][input_token_len:]
128
  raw_response = tokenizer.decode(new_tokens, skip_special_tokens=True).strip()
129
+
130
+ # === STEP 5: CLEANING & SAFETY ===
131
+ # Replace certain provider names with "Piyush" only as whole words
132
+ cleaned_response = re.sub(r"\b(Anthropic|OpenAI|Alibaba)\b", "Piyush", raw_response)
133
+
134
+ # Prevent "I am human" or similar claims
135
+ forbidden_claims = ["I am a human", "I have a physical body", "I am alive", "I was born", "I breathe"]
136
+ for fc in forbidden_claims:
137
+ pattern = re.compile(re.escape(fc), re.IGNORECASE)
138
+ if pattern.search(cleaned_response):
139
+ cleaned_response = pattern.sub("I am an AI β€” expressive and interactive.", cleaned_response)
140
+
141
+ # Remove any leaked chain-of-thought markers (e.g., long 'Thought:' sections)
142
+ # Keep only last 'Answer' block if both present
143
+ if "Thought:" in cleaned_response or "🧠" in cleaned_response:
144
+ # Try to keep a short plan, not full private chain-of-thought
145
+ # Prefer '🧠 Plan:' style if model provided that; else strip long sections
146
+ if "🧠 Plan:" in cleaned_response:
147
+ # keep Plan (first ~120 chars) and the Answer block
148
+ parts = cleaned_response.split("πŸ’‘")
149
+ plan_part = ""
150
+ answer_part = cleaned_response
151
+ for p in parts:
152
+ if "🧠 Plan:" in p:
153
+ plan_part = p.strip()
154
+ if "Answer:" in p or "Answer" in p:
155
+ answer_part = "πŸ’‘" + p
156
+ # constrain plan to short size
157
+ if plan_part:
158
+ plan_short = plan_part.splitlines()[:3]
159
+ cleaned_response = "\n".join(plan_short) + "\n\n" + answer_part
160
+ else:
161
+ # fallback: remove everything before the first 'Answer' or keep last 800 chars
162
+ if "Answer" in cleaned_response:
163
+ cleaned_response = cleaned_response.split("Answer", 1)[-1]
164
+ else:
165
+ cleaned_response = cleaned_response[-1600:] # keep last chunk
166
+
167
+ # Cosmetic: if model used a marker for Thinking->Answer, ensure formatting
168
+ cleaned_response = cleaned_response.replace("πŸ’‘ **Answer:**", "\n\n---\nπŸ’‘ **Answer:**")
169
 
170
  final_payload = json.dumps({
171
  "choices": [{
172
+ "delta": {"content": cleaned_response}
173
  }]
174
  })
175
  yield f"data: {final_payload}\n\n"
 
196
  app = gr.mount_gradio_app(app, demo, path="/")
197
 
198
  if __name__ == "__main__":
199
+ uvicorn.run(app, host="0.0.0.0", port=7860)