PIYUSH BOSS commited on
Commit
5128a9d
·
verified ·
1 Parent(s): 9e1f930

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -0
app.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Request
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ import torch
4
+ import os
5
+
6
+ app = FastAPI()
7
+
8
+ # --- MODEL SETUP ---
9
+ MODEL_ID = "Piyush-boss/Nexari-Qwen-3B-Full"
10
+
11
+ print("🔄 Loading Nexari Model... (This takes time on CPU)")
12
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
13
+ model = AutoModelForCausalLM.from_pretrained(
14
+ MODEL_ID,
15
+ torch_dtype=torch.float32, # CPU ke liye float32 safe hai
16
+ device_map="cpu", # Force CPU
17
+ low_cpu_mem_usage=True
18
+ )
19
+ print("✅ Nexari Loaded Successfully!")
20
+
21
+ @app.get("/")
22
+ def home():
23
+ return {"status": "Nexari Server is Running!"}
24
+
25
+ @app.post("/v1/chat/completions")
26
+ async def chat_completions(request: Request):
27
+ data = await request.json()
28
+ messages = data.get("messages", [])
29
+
30
+ # 1. Prompt Format
31
+ prompt = ""
32
+ for msg in messages:
33
+ role = msg["role"]
34
+ content = msg["content"]
35
+ if role == "system":
36
+ prompt += f"<|im_start|>system\n{content}<|im_end|>\n"
37
+ elif role == "user":
38
+ prompt += f"<|im_start|>user\n{content}<|im_end|>\n"
39
+ elif role == "assistant":
40
+ prompt += f"<|im_start|>assistant\n{content}<|im_end|>\n"
41
+
42
+ prompt += "<|im_start|>assistant\n"
43
+
44
+ # 2. Tokenize & Generate
45
+ inputs = tokenizer(prompt, return_tensors="pt")
46
+
47
+ # CPU Generation (Thoda slow hoga, par chalega)
48
+ outputs = model.generate(
49
+ **inputs,
50
+ max_new_tokens=512,
51
+ temperature=0.7,
52
+ do_sample=True
53
+ )
54
+
55
+ # 3. Decode
56
+ generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
57
+ # Prompt hata kar sirf naya text nikalo
58
+ response_text = generated_text.replace(prompt, "").split("<|im_end|>")[0].strip()
59
+ # Fallback agar prompt replace theek se na ho
60
+ if "assistant" in response_text:
61
+ response_text = response_text.split("assistant")[-1].strip()
62
+
63
+ # 4. Return OpenAI JSON
64
+ return {
65
+ "id": "chatcmpl-nexari",
66
+ "object": "chat.completion",
67
+ "created": 1234567890,
68
+ "choices": [{
69
+ "index": 0,
70
+ "message": {
71
+ "role": "assistant",
72
+ "content": response_text
73
+ },
74
+ "finish_reason": "stop"
75
+ }]
76
+ }