Commit
Β·
c6cf010
1
Parent(s):
7959964
fix: Final definitive fix for initialization race condition
Browse files- api/main.py +52 -49
api/main.py
CHANGED
|
@@ -30,7 +30,7 @@ from core.anomaly_detector import find_anomalies
|
|
| 30 |
from core.matcher import load_embedding_model, rank_documents_by_similarity
|
| 31 |
from core.utils import get_supabase_client, extract_colors_from_url
|
| 32 |
from core.document_parser import parse_pdf_from_url
|
| 33 |
-
from core.creative_chat import CreativeDirector
|
| 34 |
|
| 35 |
try:
|
| 36 |
from core.rag.store import VectorStore
|
|
@@ -41,18 +41,13 @@ except ImportError:
|
|
| 41 |
|
| 42 |
def cached_response(func): return func
|
| 43 |
|
|
|
|
| 44 |
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
| 45 |
MODELS_DIR = os.path.join(ROOT_DIR, 'models')
|
| 46 |
-
|
| 47 |
-
# === FIX #2: Dynamic Model Downloading Logic ===
|
| 48 |
-
# This replaces your old static LLAMA_MODEL_PATH
|
| 49 |
MODEL_REPO = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
|
| 50 |
MODEL_FILENAME = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
|
| 51 |
-
|
| 52 |
-
MODEL_SAVE_DIRECTORY = os.path.join(os.environ.get("WRITABLE_DIR", "/tmp"), "llm_model")
|
| 53 |
-
# This will be the final path to our model file once it's downloaded
|
| 54 |
LLAMA_MODEL_PATH = os.path.join(MODEL_SAVE_DIRECTORY, MODEL_FILENAME)
|
| 55 |
-
# ===============================================
|
| 56 |
|
| 57 |
EMBEDDING_MODEL_NAME = 'sentence-transformers/all-MiniLM-L6-v2'
|
| 58 |
EMBEDDING_MODEL_PATH = os.path.join(ROOT_DIR, 'embedding_model')
|
|
@@ -63,7 +58,7 @@ FINAL_EMBEDDING_PATH = EMBEDDING_MODEL_PATH if os.path.exists(EMBEDDING_MODEL_PA
|
|
| 63 |
_llm_instance: Optional[Llama] = None
|
| 64 |
_vector_store: Optional[Any] = None
|
| 65 |
_ai_strategist: Optional[AIStrategist] = None
|
| 66 |
-
_creative_director:
|
| 67 |
_support_agent: Optional[SupportAgent] = None
|
| 68 |
_budget_predictor = None
|
| 69 |
_influencer_matcher = None
|
|
@@ -513,58 +508,60 @@ def startup_event():
|
|
| 513 |
_earnings_optimizer, _earnings_encoder, _likes_predictor, _comments_predictor, \
|
| 514 |
_revenue_forecaster, _performance_scorer
|
| 515 |
|
| 516 |
-
#
|
| 517 |
print("--- π AI Service Starting Up... ---")
|
| 518 |
try:
|
|
|
|
| 519 |
os.makedirs(MODEL_SAVE_DIRECTORY, exist_ok=True)
|
|
|
|
|
|
|
| 520 |
if not os.path.exists(LLAMA_MODEL_PATH):
|
| 521 |
print(f" - LLM model not found locally. Downloading '{MODEL_FILENAME}'...")
|
|
|
|
| 522 |
hf_hub_download(
|
| 523 |
repo_id=MODEL_REPO,
|
| 524 |
filename=MODEL_FILENAME,
|
| 525 |
local_dir=MODEL_SAVE_DIRECTORY,
|
| 526 |
-
local_dir_use_symlinks=False
|
| 527 |
)
|
| 528 |
print(" - β
Model downloaded successfully.")
|
| 529 |
else:
|
| 530 |
-
print(f" - LLM model found at {LLAMA_MODEL_PATH}. Skipping download.")
|
| 531 |
|
|
|
|
| 532 |
print(" - Loading Llama LLM into memory...")
|
| 533 |
_llm_instance = Llama(model_path=LLAMA_MODEL_PATH, n_gpu_layers=0, n_ctx=2048, verbose=False, use_mmap=False)
|
| 534 |
-
print(" - β
LLM Loaded.")
|
| 535 |
|
| 536 |
except Exception as e:
|
| 537 |
-
|
| 538 |
-
|
| 539 |
-
# If LLM fails to load, we can't continue.
|
| 540 |
-
# Set instance to None and the rest of the app will know.
|
| 541 |
-
_llm_instance = None
|
| 542 |
-
return # Stop the startup process here.
|
| 543 |
-
|
| 544 |
-
# === INITIALIZE AI COMPONENTS (NOW THAT LLM IS LOADED) ===
|
| 545 |
-
# This logic now runs ONLY IF the LLM loaded successfully.
|
| 546 |
-
try:
|
| 547 |
-
print(" - Initializing Creative Director...")
|
| 548 |
-
_creative_director = CreativeDirector(llm_instance=_llm_instance)
|
| 549 |
-
print(" - β
Creative Director is online.")
|
| 550 |
-
|
| 551 |
-
if VectorStore:
|
| 552 |
-
print(" - Initializing Vector Store...")
|
| 553 |
-
_vector_store = VectorStore()
|
| 554 |
-
print(" - β
RAG Engine Ready.")
|
| 555 |
-
|
| 556 |
-
print(" - Initializing AI Strategist...")
|
| 557 |
-
_ai_strategist = AIStrategist(llm_instance=_llm_instance, store=_vector_store)
|
| 558 |
-
print(" - β
AI Strategist ready.")
|
| 559 |
-
|
| 560 |
-
print(" - Initializing Support Agent...")
|
| 561 |
-
_support_agent = SupportAgent(llm_instance=_llm_instance, embedding_path=EMBEDDING_MODEL_PATH, db_path=DB_PATH)
|
| 562 |
-
print(" - β
Support Agent ready.")
|
| 563 |
-
except Exception as e:
|
| 564 |
-
print(f" - β FAILED to initialize core AI components: {e}")
|
| 565 |
traceback.print_exc()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 566 |
|
| 567 |
-
#
|
| 568 |
print(" - Loading ML models from joblib files...")
|
| 569 |
model_paths = {
|
| 570 |
'budget': ('_budget_predictor', 'budget_predictor_v1.joblib'),
|
|
@@ -585,16 +582,20 @@ def startup_event():
|
|
| 585 |
print(f" - Loaded {name} model.")
|
| 586 |
except FileNotFoundError:
|
| 587 |
globals()[var] = None
|
| 588 |
-
print(f" - β οΈ WARNING: Model '{name}' not found at {path}. Endpoint disabled.")
|
| 589 |
|
| 590 |
print(" - Initializing Text Embedding Model...")
|
| 591 |
load_embedding_model(EMBEDDING_MODEL_PATH)
|
| 592 |
|
| 593 |
-
print("\n--- β
AI Service
|
| 594 |
-
|
| 595 |
@app.get("/", summary="Health Check")
|
| 596 |
def read_root():
|
| 597 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 598 |
|
| 599 |
def _cleanup_llm_response(data: dict) -> dict:
|
| 600 |
"""A robust helper to clean common messy JSON outputs from smaller LLMs."""
|
|
@@ -1941,7 +1942,7 @@ def generate_weekly_plan_route(request: WeeklyPlanRequest): # <--- async hata d
|
|
| 1941 |
@app.post("/chat/creative", response_model=Dict[str, str], summary="Brainstorming chat with AI Creative Director")
|
| 1942 |
def creative_chat_endpoint(request: CreativeChatRequest):
|
| 1943 |
if not _creative_director:
|
| 1944 |
-
raise HTTPException(status_code=503, detail="AI Creative Director is not available due to a startup error.")
|
| 1945 |
try:
|
| 1946 |
history_list = [m.model_dump() for m in request.history]
|
| 1947 |
response_text = _creative_director.chat(
|
|
@@ -1952,13 +1953,14 @@ def creative_chat_endpoint(request: CreativeChatRequest):
|
|
| 1952 |
return {"reply": response_text}
|
| 1953 |
except Exception as e:
|
| 1954 |
print(f"π¨ Creative Chat Error: {e}")
|
|
|
|
| 1955 |
raise HTTPException(status_code=500, detail="An error occurred with the AI Director.")
|
| 1956 |
|
| 1957 |
|
| 1958 |
@app.post("/generate/final-from-chat", response_model=FinalScriptResponse, summary="Generates final structured script from chat history")
|
| 1959 |
def finalize_script_endpoint(request: FinalizeScriptRequest):
|
| 1960 |
if not _creative_director:
|
| 1961 |
-
raise HTTPException(status_code=503, detail="AI Creative Director is not available due to a startup error.")
|
| 1962 |
try:
|
| 1963 |
history_list = [m.model_dump() for m in request.history]
|
| 1964 |
return _creative_director.generate_final_plan(
|
|
@@ -1967,4 +1969,5 @@ def finalize_script_endpoint(request: FinalizeScriptRequest):
|
|
| 1967 |
)
|
| 1968 |
except Exception as e:
|
| 1969 |
print(f"π¨ Finalize Script Error: {e}")
|
| 1970 |
-
|
|
|
|
|
|
| 30 |
from core.matcher import load_embedding_model, rank_documents_by_similarity
|
| 31 |
from core.utils import get_supabase_client, extract_colors_from_url
|
| 32 |
from core.document_parser import parse_pdf_from_url
|
| 33 |
+
from core.creative_chat import CreativeDirector
|
| 34 |
|
| 35 |
try:
|
| 36 |
from core.rag.store import VectorStore
|
|
|
|
| 41 |
|
| 42 |
def cached_response(func): return func
|
| 43 |
|
| 44 |
+
|
| 45 |
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
|
| 46 |
MODELS_DIR = os.path.join(ROOT_DIR, 'models')
|
|
|
|
|
|
|
|
|
|
| 47 |
MODEL_REPO = "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
|
| 48 |
MODEL_FILENAME = "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
|
| 49 |
+
MODEL_SAVE_DIRECTORY = os.path.join(os.environ.get("WRITABLE_DIR", "/data"), "llm_model")
|
|
|
|
|
|
|
| 50 |
LLAMA_MODEL_PATH = os.path.join(MODEL_SAVE_DIRECTORY, MODEL_FILENAME)
|
|
|
|
| 51 |
|
| 52 |
EMBEDDING_MODEL_NAME = 'sentence-transformers/all-MiniLM-L6-v2'
|
| 53 |
EMBEDDING_MODEL_PATH = os.path.join(ROOT_DIR, 'embedding_model')
|
|
|
|
| 58 |
_llm_instance: Optional[Llama] = None
|
| 59 |
_vector_store: Optional[Any] = None
|
| 60 |
_ai_strategist: Optional[AIStrategist] = None
|
| 61 |
+
_creative_director: CreativeDirector | None = None
|
| 62 |
_support_agent: Optional[SupportAgent] = None
|
| 63 |
_budget_predictor = None
|
| 64 |
_influencer_matcher = None
|
|
|
|
| 508 |
_earnings_optimizer, _earnings_encoder, _likes_predictor, _comments_predictor, \
|
| 509 |
_revenue_forecaster, _performance_scorer
|
| 510 |
|
| 511 |
+
# --- STEP 1: DOWNLOAD AND LOAD THE LLM MODEL ---
|
| 512 |
print("--- π AI Service Starting Up... ---")
|
| 513 |
try:
|
| 514 |
+
# Create the directory where the model will be saved if it doesn't exist
|
| 515 |
os.makedirs(MODEL_SAVE_DIRECTORY, exist_ok=True)
|
| 516 |
+
|
| 517 |
+
# Check if the model file already exists before trying to download it
|
| 518 |
if not os.path.exists(LLAMA_MODEL_PATH):
|
| 519 |
print(f" - LLM model not found locally. Downloading '{MODEL_FILENAME}'...")
|
| 520 |
+
# This function downloads the file from the Hub to the specified directory
|
| 521 |
hf_hub_download(
|
| 522 |
repo_id=MODEL_REPO,
|
| 523 |
filename=MODEL_FILENAME,
|
| 524 |
local_dir=MODEL_SAVE_DIRECTORY,
|
| 525 |
+
local_dir_use_symlinks=False # Important for container environments
|
| 526 |
)
|
| 527 |
print(" - β
Model downloaded successfully.")
|
| 528 |
else:
|
| 529 |
+
print(f" - LLM model found locally at {LLAMA_MODEL_PATH}. Skipping download.")
|
| 530 |
|
| 531 |
+
# Now that the file is guaranteed to be there, load it into memory
|
| 532 |
print(" - Loading Llama LLM into memory...")
|
| 533 |
_llm_instance = Llama(model_path=LLAMA_MODEL_PATH, n_gpu_layers=0, n_ctx=2048, verbose=False, use_mmap=False)
|
| 534 |
+
print(" - β
LLM Loaded successfully.")
|
| 535 |
|
| 536 |
except Exception as e:
|
| 537 |
+
# If anything in this block fails, the LLM is not usable.
|
| 538 |
+
print(f" - β FATAL ERROR: Could not download or load the LLM model. LLM-dependent features will be disabled.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 539 |
traceback.print_exc()
|
| 540 |
+
_llm_instance = None # Ensure the global variable is None
|
| 541 |
+
|
| 542 |
+
# --- STEP 2: INITIALIZE ALL AI COMPONENTS THAT NEED THE LLM ---
|
| 543 |
+
# This part only runs if the LLM was loaded successfully (_llm_instance is not None)
|
| 544 |
+
if _llm_instance:
|
| 545 |
+
try:
|
| 546 |
+
print(" - Initializing AI components...")
|
| 547 |
+
_creative_director = CreativeDirector(llm_instance=_llm_instance)
|
| 548 |
+
|
| 549 |
+
if VectorStore:
|
| 550 |
+
_vector_store = VectorStore()
|
| 551 |
+
print(" - RAG Engine Ready.")
|
| 552 |
+
|
| 553 |
+
_ai_strategist = AIStrategist(llm_instance=_llm_instance, store=_vector_store)
|
| 554 |
+
_support_agent = SupportAgent(llm_instance=_llm_instance, embedding_path=EMBEDDING_MODEL_PATH, db_path=DB_PATH)
|
| 555 |
+
|
| 556 |
+
print(" - β
Core AI components (Director, Strategist, Agent) are online.")
|
| 557 |
+
|
| 558 |
+
except Exception as e:
|
| 559 |
+
print(f" - β FAILED to initialize core AI components: {e}")
|
| 560 |
+
traceback.print_exc()
|
| 561 |
+
else:
|
| 562 |
+
print(" - β οΈ SKIPPING initialization of LLM-dependent components because LLM failed to load.")
|
| 563 |
|
| 564 |
+
# --- STEP 3: LOAD ALL OTHER MODELS (These don't depend on the LLM) ---
|
| 565 |
print(" - Loading ML models from joblib files...")
|
| 566 |
model_paths = {
|
| 567 |
'budget': ('_budget_predictor', 'budget_predictor_v1.joblib'),
|
|
|
|
| 582 |
print(f" - Loaded {name} model.")
|
| 583 |
except FileNotFoundError:
|
| 584 |
globals()[var] = None
|
| 585 |
+
print(f" - β οΈ WARNING: Model '{name}' not found at {path}. Endpoint will be disabled.")
|
| 586 |
|
| 587 |
print(" - Initializing Text Embedding Model...")
|
| 588 |
load_embedding_model(EMBEDDING_MODEL_PATH)
|
| 589 |
|
| 590 |
+
print("\n--- β
AI Service startup sequence finished! ---")
|
| 591 |
+
|
| 592 |
@app.get("/", summary="Health Check")
|
| 593 |
def read_root():
|
| 594 |
+
# We add a check here to see if the LLM loaded successfully during startup.
|
| 595 |
+
# This helps with debugging on the live server.
|
| 596 |
+
if _llm_instance is None:
|
| 597 |
+
return {"status": "AI Service is running, but the Core LLM FAILED to load. Check logs."}
|
| 598 |
+
return {"status": "AI Service is running and all models are loaded."}
|
| 599 |
|
| 600 |
def _cleanup_llm_response(data: dict) -> dict:
|
| 601 |
"""A robust helper to clean common messy JSON outputs from smaller LLMs."""
|
|
|
|
| 1942 |
@app.post("/chat/creative", response_model=Dict[str, str], summary="Brainstorming chat with AI Creative Director")
|
| 1943 |
def creative_chat_endpoint(request: CreativeChatRequest):
|
| 1944 |
if not _creative_director:
|
| 1945 |
+
raise HTTPException(status_code=503, detail="The AI Creative Director is not available due to a startup error.")
|
| 1946 |
try:
|
| 1947 |
history_list = [m.model_dump() for m in request.history]
|
| 1948 |
response_text = _creative_director.chat(
|
|
|
|
| 1953 |
return {"reply": response_text}
|
| 1954 |
except Exception as e:
|
| 1955 |
print(f"π¨ Creative Chat Error: {e}")
|
| 1956 |
+
traceback.print_exc()
|
| 1957 |
raise HTTPException(status_code=500, detail="An error occurred with the AI Director.")
|
| 1958 |
|
| 1959 |
|
| 1960 |
@app.post("/generate/final-from-chat", response_model=FinalScriptResponse, summary="Generates final structured script from chat history")
|
| 1961 |
def finalize_script_endpoint(request: FinalizeScriptRequest):
|
| 1962 |
if not _creative_director:
|
| 1963 |
+
raise HTTPException(status_code=503, detail="The AI Creative Director is not available due to a startup error.")
|
| 1964 |
try:
|
| 1965 |
history_list = [m.model_dump() for m in request.history]
|
| 1966 |
return _creative_director.generate_final_plan(
|
|
|
|
| 1969 |
)
|
| 1970 |
except Exception as e:
|
| 1971 |
print(f"π¨ Finalize Script Error: {e}")
|
| 1972 |
+
traceback.print_exc()
|
| 1973 |
+
raise HTTPException(status_code=500, detail="Failed to generate the final plan.")
|