Spaces:

amitbhatt6075
/

reachify-ai-service

Running

App Files Files Community

amitbhatt6075 commited on 10 days ago

Commit

14b1c48

1 Parent(s): 853482f

Remove ALL mock data from Thunderbird engine

Browse files

Files changed (1) hide show

core/thunderbird_engine.py +83 -88

core/thunderbird_engine.py CHANGED Viewed

@@ -1,117 +1,121 @@
 import os
 import pandas as pd
 import joblib
-import random
 import json
 from datetime import datetime
 from newsapi import NewsApiClient
-from typing import Dict, Any, Optional # <-- THIS IMPORT WAS MISSING
 # --- CONFIGURATION ---
 MODEL_PATH = os.path.join(os.path.dirname(__file__), '..', 'models', 'thunderbird_market_predictor_v1.joblib')
 NEWS_API_KEY = os.getenv("NEWS_API_KEY")
-# --- CORE FUNCTIONS ---
 def get_external_trends() -> dict:
-    """Fetches real-time 'live' data from external news APIs and RSS feeds."""
-    print("🚀 [Thunderbird Engine] Fetching live external trends...")
-    results = {
-        "news_headlines": [],
-        "breakout_keyword": None,
-        "trending_audio": None
-    }
     if NEWS_API_KEY:
         try:
             newsapi = NewsApiClient(api_key=NEWS_API_KEY)
-            top_headlines = newsapi.get_everything(
-                q='("influencer marketing" OR "social media marketing" OR "creator economy")',
-                language='en', sort_by='relevancy', page_size=5
-            )
-            results["news_headlines"] = [{"title": article['title'], "url": article['url']} for article in top_headlines.get('articles', [])]
-            print(f"   - ✅ Found {len(results['news_headlines'])} news articles.")
         except Exception as e:
-            print(f"   - ⚠️ NewsAPI Error: {e}")
-            results["news_headlines"] = [{"title": "News service currently unavailable.", "url": "#"}]
-    else:
-        # Fallback simulation if no API Key
-        results["news_headlines"] = [
-            {"title": "TikTok vs YouTube Shorts: The 2025 Battle for Dominance", "url": "#"},
-            {"title": "AI in Influencer Marketing: What Agencies Need to Know", "url": "#"},
-            {"title": "The Rise of Micro-Influencers in Niche Markets", "url": "#"}
-        ]
-    # Simulate other trends for now to allow frontend development
-    results["breakout_keyword"] = "AI in Marketing"
-    trending_audios = [{"name": "Espresso - Sabrina Carpenter", "cover_art_url": "https://i.scdn.co/image/ab67616d0000b2736599b5003b077a93553250df"}]
-    results["trending_audio"] = random.choice(trending_audios)
-    print("   - ✅ (Simulated) Found trending keyword and audio.")
     return results
 def predict_niche_trends() -> dict:
-    """Loads our trained ML model to predict future interest in market niches."""
-    print("\n🚀 [Thunderbird Engine] Loading model to predict niche trends...")
     try:
         if not os.path.exists(MODEL_PATH):
-             # Graceful fallback if model is missing during build/deploy
-             raise FileNotFoundError("Model file not found")
         model_pack = joblib.load(MODEL_PATH)
         encoder = model_pack['encoder']
-        print(f"   - ✅ Model '{os.path.basename(MODEL_PATH)}' loaded successfully.")
-    except Exception as e:
-        print(f"   - ⚠️ Model load skipped (Using Simulation): {e}")
-        # Return simulated structure directly
-        dates = pd.date_range(end=datetime.now(), periods=12, freq='M').strftime('%Y-%m').tolist()
-        return {"trend_predictions": {
-            "general": [{"date": d, "value": random.randint(40, 80)} for d in dates],
-            "fitness": [{"date": d, "value": random.randint(50, 90)} for d in dates]
-        }}
-    print("   - ⚠️ NOTE: Generating SIMULATED trend data as training set is small.")
-    niches = encoder.get_feature_names_out(['niche'])
-    dates = pd.date_range(end=datetime.now(), periods=12, freq='M').strftime('%Y-%m').tolist()
-    predictions = {}
-    for niche_col_name in niches:
-        niche_name = niche_col_name.split('_')[-1]
-        points = [random.randint(40, 60)]
-        for _ in range(11):
-            points.append(max(20, min(100, points[-1] + random.randint(-10, 10))))
-        predictions[niche_name] = [{"date": date, "value": value} for date, value in zip(dates, points)]
-    return {"trend_predictions": predictions}
 def decode_market_trend(topic: str, llm_instance) -> Dict[str, str]:
-    """
-    Decodes a keyword into a strategy. USES HYBRID AI + TEMPLATE LOGIC.
-    """
-    print(f"🧠 [Thunderbird] Decoding Trend with Upgraded Prompt: {topic}")
-    # --- 1. Smart Fallback (Same as before) ---
-    base_template = FALLBACK_STRATEGIES["Default"]
-    topic_lower = topic.lower()
-    if "ai" in topic_lower: base_template = FALLBACK_STRATEGIES["AI"]
-    elif "tiktok" in topic_lower: base_template = FALLBACK_STRATEGIES["TikTok"]
-    elif "micro" in topic_lower: base_template = FALLBACK_STRATEGIES["Micro"]
     if not llm_instance:
-        return base_template
-    # --- 2. THE UPGRADED PROMPT (This is the fix) ---
-    # We give it today's date and a better persona.
     today_date = datetime.now().strftime("%Y-%m-%d")
     prompt = f"""[INST]
-    You are "PulseAI", a Senior Digital Strategy Director providing a competitive edge.
-    Today's Date is {today_date}. All your analysis must be modern and relevant to today.
     Analyze this trend: "{topic}".
     Provide a concise, expert briefing in a valid JSON format with 3 keys:
-    1. "summary": A single, sharp sentence explaining what is happening now.
-    2. "impact": A single sentence explaining why this matters for a brand's revenue or reach.
-    3. "strategy": One creative, specific content idea an agency can execute this week. Example: "A 30s reel comparing old vs. new AI tools."
     JSON Response:
     [/INST]"""
@@ -119,21 +123,12 @@ def decode_market_trend(topic: str, llm_instance) -> Dict[str, str]:
         response = llm_instance(prompt, max_tokens=256, temperature=0.6, stop=["[INST]"], echo=False)
         text = response['choices'][0]['text'].strip()
-        # Parse the JSON from the response text
         start = text.find('{')
         end = text.rfind('}') + 1
         if start != -1 and end != 0:
-            data = json.loads(text[start:end])
-            # Ensure no key is empty, fall back to template if needed
-            return {
-                "summary": data.get("summary") or base_template["summary"],
-                "impact": data.get("impact") or base_template["impact"],
-                "strategy": data.get("strategy") or base_template["strategy"]
-            }
         else:
-            # If JSON is broken, use the fallback
             raise ValueError("LLM did not return valid JSON.")
     except Exception as e:
-        print(f"   - ⚠️ LLM Error or Bad JSON (Using Fallback): {e}")
-        return base_template

 import os
 import pandas as pd
 import joblib
 import json
 from datetime import datetime
 from newsapi import NewsApiClient
+from pytrends.request import TrendReq
+from typing import Dict, Any, Optional
 # --- CONFIGURATION ---
 MODEL_PATH = os.path.join(os.path.dirname(__file__), '..', 'models', 'thunderbird_market_predictor_v1.joblib')
 NEWS_API_KEY = os.getenv("NEWS_API_KEY")
+# --- NO MOCK DATA. REAL ATTEMPTS ONLY ---
 def get_external_trends() -> dict:
+    """
+    Attempts to fetch REAL data from NewsAPI and Google Trends.
+    If it fails, it returns None, not dummy data.
+    """
+    print("🚀 [Thunderbird Engine] Fetching REAL external trends...")
+    results = { "news_headlines": [], "breakout_keyword": None, "trending_audio": None }
+    # 1. Attempt to fetch real news
     if NEWS_API_KEY:
         try:
             newsapi = NewsApiClient(api_key=NEWS_API_KEY)
+            top_headlines = newsapi.get_everything(q='("influencer marketing" OR "social media trends")', language='en', sort_by='relevancy', page_size=5)
+            articles = top_headlines.get('articles', [])
+            if articles:
+                results["news_headlines"] = [{"title": a['title'], "url": a['url']} for a in articles]
         except Exception as e:
+            print(f"   - ⚠️ NewsAPI connection failed: {e}")
+    # 2. Attempt to fetch real breakout keyword from Google Trends
+    try:
+        pytrends = TrendReq(hl='en-US', tz=360)
+        trending_searches_df = pytrends.trending_searches(pn='united_states')
+        if not trending_searches_df.empty:
+            results["breakout_keyword"] = trending_searches_df.iloc[0,0]
+    except Exception as e:
+        print(f"   - ⚠️ Google Trends connection failed: {e}")
+    # 3. Trending Audio - This requires a dedicated API (e.g., TikTok/Spotify)
+    # Since we don't have one, it will correctly remain None.
     return results
 def predict_niche_trends() -> dict:
+    """
+    Uses the REAL trained ML model to predict future interest.
+    If the model is not trained or fails, it returns an empty dictionary.
+    """
+    print("\n🚀 [Thunderbird Engine] Using REAL ML model for predictions...")
     try:
         if not os.path.exists(MODEL_PATH):
+             raise FileNotFoundError(f"Model not found at {MODEL_PATH}")
         model_pack = joblib.load(MODEL_PATH)
+        model = model_pack['model']
         encoder = model_pack['encoder']
+        # Prepare future dates for prediction
+        future_dates = pd.to_datetime(pd.date_range(start=datetime.now(), periods=12, freq='M'))
+        predictions = {}
+        # Get all possible niche names from the encoder
+        all_niches = encoder.get_feature_names_out(['niche'])
+        for niche_col in all_niches:
+            niche_name = niche_col.replace('niche_', '')
+            # Create a dataframe for prediction for this niche for all future months
+            future_df = pd.DataFrame({
+                'month': future_dates.to_period('M'),
+                'niche': [niche_name] * 12,
+                'trend_score': 50  # Assume an average trend score for future prediction
+            })
+            # Predict using the actual model
+            predicted_values = model.predict(future_df)
+            # Format for the frontend chart
+            predictions[niche_name] = [
+                {"date": dt.strftime('%Y-%m'), "value": max(0, int(val))}
+                for dt, val in zip(future_dates, predicted_values)
+            ]
+        print(f"   - ✅ Successfully generated REAL predictions for niches: {list(predictions.keys())}")
+        return {"trend_predictions": predictions}
+    except Exception as e:
+        print(f"   - ❌ REAL Prediction Failed: {e}. Chart will be empty.")
+        # Return empty data, which the frontend will show as "Unavailable"
+        return {"trend_predictions": {}}
 def decode_market_trend(topic: str, llm_instance) -> Dict[str, str]:
+    """Decodes a keyword into a strategy with a clear failure message."""
+    print(f"🧠 [Thunderbird] Decoding Trend with REAL AI: {topic}")
+    offline_response = {
+        "summary": "AI Analyst is currently offline.",
+        "impact": "The system could not get a real-time analysis.",
+        "strategy": "Please try again in a few moments. If the issue persists, check AI service logs."
+    }
     if not llm_instance:
+        return offline_response
     today_date = datetime.now().strftime("%Y-%m-%d")
     prompt = f"""[INST]
+    You are a Senior Digital Strategy Director. Today's Date is {today_date}.
     Analyze this trend: "{topic}".
     Provide a concise, expert briefing in a valid JSON format with 3 keys:
+    1. "summary": A sharp sentence explaining what's happening now.
+    2. "impact": A sentence explaining why this matters for a brand's revenue or reach.
+    3. "strategy": One creative, specific content idea an agency can execute this week.
     JSON Response:
     [/INST]"""
         response = llm_instance(prompt, max_tokens=256, temperature=0.6, stop=["[INST]"], echo=False)
         text = response['choices'][0]['text'].strip()
         start = text.find('{')
         end = text.rfind('}') + 1
         if start != -1 and end != 0:
+            return json.loads(text[start:end])
         else:
             raise ValueError("LLM did not return valid JSON.")
     except Exception as e:
+        print(f"   - ❌ LLM Error (Returning Offline Message): {e}")
+        return offline_response