Spaces:

amitbhatt6075
/

reachify-ai-service

Running

App Files Files Community

amitbhatt6075 commited on 22 days ago

Commit

c6cf010

1 Parent(s): 7959964

fix: Final definitive fix for initialization race condition

Browse files

Files changed (1) hide show

api/main.py +52 -49

api/main.py CHANGED Viewed

@@ -30,7 +30,7 @@ from core.anomaly_detector import find_anomalies
 from core.matcher import load_embedding_model, rank_documents_by_similarity
 from core.utils import get_supabase_client, extract_colors_from_url
 from core.document_parser import parse_pdf_from_url
-from core.creative_chat import CreativeDirector
 try:
     from core.rag.store import VectorStore
@@ -41,18 +41,13 @@ except ImportError:
     def cached_response(func): return func
 ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 MODELS_DIR = os.path.join(ROOT_DIR, 'models')
-# === FIX #2: Dynamic Model Downloading Logic ===
-# This replaces your old static LLAMA_MODEL_PATH
 MODEL_REPO = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
 MODEL_FILENAME = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
-# Hugging Face Spaces provides a writable directory at /data or we can fall back to /tmp
-MODEL_SAVE_DIRECTORY = os.path.join(os.environ.get("WRITABLE_DIR", "/tmp"), "llm_model")
-# This will be the final path to our model file once it's downloaded
 LLAMA_MODEL_PATH = os.path.join(MODEL_SAVE_DIRECTORY, MODEL_FILENAME)
-# ===============================================
 EMBEDDING_MODEL_NAME = 'sentence-transformers/all-MiniLM-L6-v2'
 EMBEDDING_MODEL_PATH = os.path.join(ROOT_DIR, 'embedding_model')
@@ -63,7 +58,7 @@ FINAL_EMBEDDING_PATH = EMBEDDING_MODEL_PATH if os.path.exists(EMBEDDING_MODEL_PA
 _llm_instance: Optional[Llama] = None
 _vector_store: Optional[Any] = None
 _ai_strategist: Optional[AIStrategist] = None
-_creative_director: Optional[CreativeDirector] = None
 _support_agent: Optional[SupportAgent] = None
 _budget_predictor = None
 _influencer_matcher = None
@@ -513,58 +508,60 @@ def startup_event():
            _earnings_optimizer, _earnings_encoder, _likes_predictor, _comments_predictor, \
            _revenue_forecaster, _performance_scorer
-    # === MODEL DOWNLOAD AND LOAD LOGIC ===
     print("--- 🚀 AI Service Starting Up... ---")
     try:
         os.makedirs(MODEL_SAVE_DIRECTORY, exist_ok=True)
         if not os.path.exists(LLAMA_MODEL_PATH):
             print(f"   - LLM model not found locally. Downloading '{MODEL_FILENAME}'...")
             hf_hub_download(
                 repo_id=MODEL_REPO,
                 filename=MODEL_FILENAME,
                 local_dir=MODEL_SAVE_DIRECTORY,
-                local_dir_use_symlinks=False
             )
             print("   - ✅ Model downloaded successfully.")
         else:
-            print(f"   - LLM model found at {LLAMA_MODEL_PATH}. Skipping download.")
         print("   - Loading Llama LLM into memory...")
         _llm_instance = Llama(model_path=LLAMA_MODEL_PATH, n_gpu_layers=0, n_ctx=2048, verbose=False, use_mmap=False)
-        print("   - ✅ LLM Loaded.")
     except Exception as e:
-        print(f"   - ❌ FATAL ERROR: Could not download or load LLM model: {e}")
-        traceback.print_exc()
-        # If LLM fails to load, we can't continue.
-        # Set instance to None and the rest of the app will know.
-        _llm_instance = None
-        return # Stop the startup process here.
-    # === INITIALIZE AI COMPONENTS (NOW THAT LLM IS LOADED) ===
-    # This logic now runs ONLY IF the LLM loaded successfully.
-    try:
-        print("   - Initializing Creative Director...")
-        _creative_director = CreativeDirector(llm_instance=_llm_instance)
-        print("   - ✅ Creative Director is online.")
-        if VectorStore:
-            print("   - Initializing Vector Store...")
-            _vector_store = VectorStore()
-            print("   - ✅ RAG Engine Ready.")
-        print("   - Initializing AI Strategist...")
-        _ai_strategist = AIStrategist(llm_instance=_llm_instance, store=_vector_store)
-        print("   - ✅ AI Strategist ready.")
-        print("   - Initializing Support Agent...")
-        _support_agent = SupportAgent(llm_instance=_llm_instance, embedding_path=EMBEDDING_MODEL_PATH, db_path=DB_PATH)
-        print("   - ✅ Support Agent ready.")
-    except Exception as e:
-        print(f"   - ❌ FAILED to initialize core AI components: {e}")
         traceback.print_exc()
-    # === LOAD SKLEARN MODELS (This part is independent of the LLM) ===
     print("   - Loading ML models from joblib files...")
     model_paths = {
         'budget': ('_budget_predictor', 'budget_predictor_v1.joblib'),
@@ -585,16 +582,20 @@ def startup_event():
             print(f"     - Loaded {name} model.")
         except FileNotFoundError:
             globals()[var] = None
-            print(f"   - ⚠️ WARNING: Model '{name}' not found at {path}. Endpoint disabled.")
     print("   - Initializing Text Embedding Model...")
     load_embedding_model(EMBEDDING_MODEL_PATH)
-    print("\n--- ✅ AI Service is fully operational! ---")
 @app.get("/", summary="Health Check")
 def read_root():
-    return {"status": "AI Service is running"}
 def _cleanup_llm_response(data: dict) -> dict:
     """A robust helper to clean common messy JSON outputs from smaller LLMs."""
@@ -1941,7 +1942,7 @@ def generate_weekly_plan_route(request: WeeklyPlanRequest):  # <--- async hata d
 @app.post("/chat/creative", response_model=Dict[str, str], summary="Brainstorming chat with AI Creative Director")
 def creative_chat_endpoint(request: CreativeChatRequest):
     if not _creative_director:
-        raise HTTPException(status_code=503, detail="AI Creative Director is not available due to a startup error.")
     try:
         history_list = [m.model_dump() for m in request.history]
         response_text = _creative_director.chat(
@@ -1952,13 +1953,14 @@ def creative_chat_endpoint(request: CreativeChatRequest):
         return {"reply": response_text}
     except Exception as e:
         print(f"🚨 Creative Chat Error: {e}")
         raise HTTPException(status_code=500, detail="An error occurred with the AI Director.")
 @app.post("/generate/final-from-chat", response_model=FinalScriptResponse, summary="Generates final structured script from chat history")
 def finalize_script_endpoint(request: FinalizeScriptRequest):
     if not _creative_director:
-        raise HTTPException(status_code=503, detail="AI Creative Director is not available due to a startup error.")
     try:
         history_list = [m.model_dump() for m in request.history]
         return _creative_director.generate_final_plan(
@@ -1967,4 +1969,5 @@ def finalize_script_endpoint(request: FinalizeScriptRequest):
         )
     except Exception as e:
         print(f"🚨 Finalize Script Error: {e}")
-        raise HTTPException(status_code=500, detail="Failed to generate the final plan.")

 from core.matcher import load_embedding_model, rank_documents_by_similarity
 from core.utils import get_supabase_client, extract_colors_from_url
 from core.document_parser import parse_pdf_from_url
+from core.creative_chat import CreativeDirector
 try:
     from core.rag.store import VectorStore
     def cached_response(func): return func
 ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 MODELS_DIR = os.path.join(ROOT_DIR, 'models')
 MODEL_REPO = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
 MODEL_FILENAME = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
+MODEL_SAVE_DIRECTORY = os.path.join(os.environ.get("WRITABLE_DIR", "/data"), "llm_model")
 LLAMA_MODEL_PATH = os.path.join(MODEL_SAVE_DIRECTORY, MODEL_FILENAME)
 EMBEDDING_MODEL_NAME = 'sentence-transformers/all-MiniLM-L6-v2'
 EMBEDDING_MODEL_PATH = os.path.join(ROOT_DIR, 'embedding_model')
 _llm_instance: Optional[Llama] = None
 _vector_store: Optional[Any] = None
 _ai_strategist: Optional[AIStrategist] = None
+_creative_director: CreativeDirector | None = None
 _support_agent: Optional[SupportAgent] = None
 _budget_predictor = None
 _influencer_matcher = None
            _earnings_optimizer, _earnings_encoder, _likes_predictor, _comments_predictor, \
            _revenue_forecaster, _performance_scorer
+    # --- STEP 1: DOWNLOAD AND LOAD THE LLM MODEL ---
     print("--- 🚀 AI Service Starting Up... ---")
     try:
+        # Create the directory where the model will be saved if it doesn't exist
         os.makedirs(MODEL_SAVE_DIRECTORY, exist_ok=True)
+        # Check if the model file already exists before trying to download it
         if not os.path.exists(LLAMA_MODEL_PATH):
             print(f"   - LLM model not found locally. Downloading '{MODEL_FILENAME}'...")
+            # This function downloads the file from the Hub to the specified directory
             hf_hub_download(
                 repo_id=MODEL_REPO,
                 filename=MODEL_FILENAME,
                 local_dir=MODEL_SAVE_DIRECTORY,
+                local_dir_use_symlinks=False # Important for container environments
             )
             print("   - ✅ Model downloaded successfully.")
         else:
+            print(f"   - LLM model found locally at {LLAMA_MODEL_PATH}. Skipping download.")
+        # Now that the file is guaranteed to be there, load it into memory
         print("   - Loading Llama LLM into memory...")
         _llm_instance = Llama(model_path=LLAMA_MODEL_PATH, n_gpu_layers=0, n_ctx=2048, verbose=False, use_mmap=False)
+        print("   - ✅ LLM Loaded successfully.")
     except Exception as e:
+        # If anything in this block fails, the LLM is not usable.
+        print(f"   - ❌ FATAL ERROR: Could not download or load the LLM model. LLM-dependent features will be disabled.")
         traceback.print_exc()
+        _llm_instance = None # Ensure the global variable is None
+    # --- STEP 2: INITIALIZE ALL AI COMPONENTS THAT NEED THE LLM ---
+    # This part only runs if the LLM was loaded successfully (_llm_instance is not None)
+    if _llm_instance:
+        try:
+            print("   - Initializing AI components...")
+            _creative_director = CreativeDirector(llm_instance=_llm_instance)
+            if VectorStore:
+                _vector_store = VectorStore()
+                print("   - RAG Engine Ready.")
+            _ai_strategist = AIStrategist(llm_instance=_llm_instance, store=_vector_store)
+            _support_agent = SupportAgent(llm_instance=_llm_instance, embedding_path=EMBEDDING_MODEL_PATH, db_path=DB_PATH)
+            print("   - ✅ Core AI components (Director, Strategist, Agent) are online.")
+        except Exception as e:
+            print(f"   - ❌ FAILED to initialize core AI components: {e}")
+            traceback.print_exc()
+    else:
+        print("   - ⚠️ SKIPPING initialization of LLM-dependent components because LLM failed to load.")
+    # --- STEP 3: LOAD ALL OTHER MODELS (These don't depend on the LLM) ---
     print("   - Loading ML models from joblib files...")
     model_paths = {
         'budget': ('_budget_predictor', 'budget_predictor_v1.joblib'),
             print(f"     - Loaded {name} model.")
         except FileNotFoundError:
             globals()[var] = None
+            print(f"   - ⚠️ WARNING: Model '{name}' not found at {path}. Endpoint will be disabled.")
     print("   - Initializing Text Embedding Model...")
     load_embedding_model(EMBEDDING_MODEL_PATH)
+    print("\n--- ✅ AI Service startup sequence finished! ---")
 @app.get("/", summary="Health Check")
 def read_root():
+    # We add a check here to see if the LLM loaded successfully during startup.
+    # This helps with debugging on the live server.
+    if _llm_instance is None:
+        return {"status": "AI Service is running, but the Core LLM FAILED to load. Check logs."}
+    return {"status": "AI Service is running and all models are loaded."}
 def _cleanup_llm_response(data: dict) -> dict:
     """A robust helper to clean common messy JSON outputs from smaller LLMs."""
 @app.post("/chat/creative", response_model=Dict[str, str], summary="Brainstorming chat with AI Creative Director")
 def creative_chat_endpoint(request: CreativeChatRequest):
     if not _creative_director:
+        raise HTTPException(status_code=503, detail="The AI Creative Director is not available due to a startup error.")
     try:
         history_list = [m.model_dump() for m in request.history]
         response_text = _creative_director.chat(
         return {"reply": response_text}
     except Exception as e:
         print(f"🚨 Creative Chat Error: {e}")
+        traceback.print_exc()
         raise HTTPException(status_code=500, detail="An error occurred with the AI Director.")
 @app.post("/generate/final-from-chat", response_model=FinalScriptResponse, summary="Generates final structured script from chat history")
 def finalize_script_endpoint(request: FinalizeScriptRequest):
     if not _creative_director:
+        raise HTTPException(status_code=503, detail="The AI Creative Director is not available due to a startup error.")
     try:
         history_list = [m.model_dump() for m in request.history]
         return _creative_director.generate_final_plan(
         )
     except Exception as e:
         print(f"🚨 Finalize Script Error: {e}")
+        traceback.print_exc()
+        raise HTTPException(status_code=500, detail="Failed to generate the final plan.")