Spaces:

ProfessorCEO
/

coolshot-ai-backend

Running

App Files Files Community

Local AI Assistant commited on 20 days ago

Commit

a016784

1 Parent(s): a33b5b3

Implement lazy loading for AI models to save memory

Browse files

Files changed (1) hide show

api.py +36 -11

api.py CHANGED Viewed

@@ -57,12 +57,31 @@ if firebase_admin._apps:
 else:
     db = None
-# Initialize engines
-print("Initializing AI Engines...")
-chat_engine = ChatEngine()
-image_engine = ImageEngine()
-rag_engine = RAGEngine()
-print("AI Engines Ready!")
 # Auth Dependency
 oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
@@ -222,8 +241,10 @@ async def chat(request: ChatRequest, current_user: dict = Depends(get_current_us
     # ... (Keep existing /chat for backward compatibility if needed, or redirect logic)
     # For now, let's keep /chat as blocking and add /chat/stream
     try:
         # Generate Response
-        response = chat_engine.generate_response(request.message, request.history)
         # Save to Firestore if conversation_id is present
         if request.conversation_id:
@@ -261,7 +282,8 @@ async def upload_file(file: UploadFile = File(...), current_user: dict = Depends
             shutil.copyfileobj(file.file, buffer)
         # Ingest into RAG
-        rag_engine.ingest_file(file_path)
         return {"filename": file.filename, "status": "ingested"}
     except Exception as e:
@@ -272,7 +294,8 @@ async def chat_stream(request: ChatRequest, current_user: dict = Depends(get_cur
     try:
         # Check for RAG context
         context = ""
-        rag_docs = rag_engine.search(request.message)
         if rag_docs:
             context = "\n\nRelevant Context:\n" + "\n".join(rag_docs) + "\n\n"
             print(f"Found {len(rag_docs)} relevant documents.")
@@ -292,7 +315,8 @@ async def chat_stream(request: ChatRequest, current_user: dict = Depends(get_cur
             # Prepend context to the message sent to AI (but not saved in DB as user message)
             augmented_message = context + request.message if context else request.message
-            for token in chat_engine.generate_stream(augmented_message, request.history, request.language):
                 full_response += token
                 yield token
@@ -319,7 +343,8 @@ async def generate_image(request: ImageRequest, current_user: dict = Depends(get
     try:
         # Generate image to a temporary file
         filename = "temp_generated.png"
-        image_engine.generate_image(request.prompt, output_path=filename)
         # Read and encode to base64 to send to frontend
         with open(filename, "rb") as image_file:

 else:
     db = None
+# Global engine instances (Lazy loaded)
+chat_engine = None
+image_engine = None
+rag_engine = None
+def get_chat_engine():
+    global chat_engine
+    if chat_engine is None:
+        print("Lazy loading Chat Engine...")
+        chat_engine = ChatEngine()
+    return chat_engine
+def get_image_engine():
+    global image_engine
+    if image_engine is None:
+        print("Lazy loading Image Engine...")
+        image_engine = ImageEngine()
+    return image_engine
+def get_rag_engine():
+    global rag_engine
+    if rag_engine is None:
+        print("Lazy loading RAG Engine...")
+        rag_engine = RAGEngine()
+    return rag_engine
 # Auth Dependency
 oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")
     # ... (Keep existing /chat for backward compatibility if needed, or redirect logic)
     # For now, let's keep /chat as blocking and add /chat/stream
     try:
+        # Get engine (lazy load)
+        engine = get_chat_engine()
         # Generate Response
+        response = engine.generate_response(request.message, request.history)
         # Save to Firestore if conversation_id is present
         if request.conversation_id:
             shutil.copyfileobj(file.file, buffer)
         # Ingest into RAG
+        rag = get_rag_engine()
+        rag.ingest_file(file_path)
         return {"filename": file.filename, "status": "ingested"}
     except Exception as e:
     try:
         # Check for RAG context
         context = ""
+        rag = get_rag_engine()
+        rag_docs = rag.search(request.message)
         if rag_docs:
             context = "\n\nRelevant Context:\n" + "\n".join(rag_docs) + "\n\n"
             print(f"Found {len(rag_docs)} relevant documents.")
             # Prepend context to the message sent to AI (but not saved in DB as user message)
             augmented_message = context + request.message if context else request.message
+            engine = get_chat_engine()
+            for token in engine.generate_stream(augmented_message, request.history, request.language):
                 full_response += token
                 yield token
     try:
         # Generate image to a temporary file
         filename = "temp_generated.png"
+        engine = get_image_engine()
+        engine.generate_image(request.prompt, output_path=filename)
         # Read and encode to base64 to send to frontend
         with open(filename, "rb") as image_file: