amitbhatt6075 commited on
Commit
14b1c48
Β·
1 Parent(s): 853482f

Remove ALL mock data from Thunderbird engine

Browse files
Files changed (1) hide show
  1. core/thunderbird_engine.py +83 -88
core/thunderbird_engine.py CHANGED
@@ -1,117 +1,121 @@
1
  import os
2
  import pandas as pd
3
  import joblib
4
- import random
5
  import json
6
  from datetime import datetime
7
  from newsapi import NewsApiClient
8
- from typing import Dict, Any, Optional # <-- THIS IMPORT WAS MISSING
 
9
 
10
  # --- CONFIGURATION ---
11
  MODEL_PATH = os.path.join(os.path.dirname(__file__), '..', 'models', 'thunderbird_market_predictor_v1.joblib')
12
  NEWS_API_KEY = os.getenv("NEWS_API_KEY")
13
 
14
- # --- CORE FUNCTIONS ---
 
15
  def get_external_trends() -> dict:
16
- """Fetches real-time 'live' data from external news APIs and RSS feeds."""
17
- print("πŸš€ [Thunderbird Engine] Fetching live external trends...")
18
- results = {
19
- "news_headlines": [],
20
- "breakout_keyword": None,
21
- "trending_audio": None
22
- }
23
 
 
24
  if NEWS_API_KEY:
25
  try:
26
  newsapi = NewsApiClient(api_key=NEWS_API_KEY)
27
- top_headlines = newsapi.get_everything(
28
- q='("influencer marketing" OR "social media marketing" OR "creator economy")',
29
- language='en', sort_by='relevancy', page_size=5
30
- )
31
- results["news_headlines"] = [{"title": article['title'], "url": article['url']} for article in top_headlines.get('articles', [])]
32
- print(f" - βœ… Found {len(results['news_headlines'])} news articles.")
33
  except Exception as e:
34
- print(f" - ⚠️ NewsAPI Error: {e}")
35
- results["news_headlines"] = [{"title": "News service currently unavailable.", "url": "#"}]
36
- else:
37
- # Fallback simulation if no API Key
38
- results["news_headlines"] = [
39
- {"title": "TikTok vs YouTube Shorts: The 2025 Battle for Dominance", "url": "#"},
40
- {"title": "AI in Influencer Marketing: What Agencies Need to Know", "url": "#"},
41
- {"title": "The Rise of Micro-Influencers in Niche Markets", "url": "#"}
42
- ]
 
 
 
 
43
 
44
- # Simulate other trends for now to allow frontend development
45
- results["breakout_keyword"] = "AI in Marketing"
46
- trending_audios = [{"name": "Espresso - Sabrina Carpenter", "cover_art_url": "https://i.scdn.co/image/ab67616d0000b2736599b5003b077a93553250df"}]
47
- results["trending_audio"] = random.choice(trending_audios)
48
- print(" - βœ… (Simulated) Found trending keyword and audio.")
49
  return results
50
 
51
  def predict_niche_trends() -> dict:
52
- """Loads our trained ML model to predict future interest in market niches."""
53
- print("\nπŸš€ [Thunderbird Engine] Loading model to predict niche trends...")
 
 
 
 
54
  try:
55
  if not os.path.exists(MODEL_PATH):
56
- # Graceful fallback if model is missing during build/deploy
57
- raise FileNotFoundError("Model file not found")
58
 
59
  model_pack = joblib.load(MODEL_PATH)
 
60
  encoder = model_pack['encoder']
61
- print(f" - βœ… Model '{os.path.basename(MODEL_PATH)}' loaded successfully.")
62
- except Exception as e:
63
- print(f" - ⚠️ Model load skipped (Using Simulation): {e}")
64
- # Return simulated structure directly
65
- dates = pd.date_range(end=datetime.now(), periods=12, freq='M').strftime('%Y-%m').tolist()
66
- return {"trend_predictions": {
67
- "general": [{"date": d, "value": random.randint(40, 80)} for d in dates],
68
- "fitness": [{"date": d, "value": random.randint(50, 90)} for d in dates]
69
- }}
70
 
71
- print(" - ⚠️ NOTE: Generating SIMULATED trend data as training set is small.")
72
- niches = encoder.get_feature_names_out(['niche'])
73
- dates = pd.date_range(end=datetime.now(), periods=12, freq='M').strftime('%Y-%m').tolist()
74
- predictions = {}
75
- for niche_col_name in niches:
76
- niche_name = niche_col_name.split('_')[-1]
77
- points = [random.randint(40, 60)]
78
- for _ in range(11):
79
- points.append(max(20, min(100, points[-1] + random.randint(-10, 10))))
80
- predictions[niche_name] = [{"date": date, "value": value} for date, value in zip(dates, points)]
81
-
82
- return {"trend_predictions": predictions}
 
 
 
 
 
 
 
 
 
83
 
 
 
 
 
84
 
85
  def decode_market_trend(topic: str, llm_instance) -> Dict[str, str]:
86
- """
87
- Decodes a keyword into a strategy. USES HYBRID AI + TEMPLATE LOGIC.
88
- """
89
- print(f"🧠 [Thunderbird] Decoding Trend with Upgraded Prompt: {topic}")
90
 
91
- # --- 1. Smart Fallback (Same as before) ---
92
- base_template = FALLBACK_STRATEGIES["Default"]
93
- topic_lower = topic.lower()
94
- if "ai" in topic_lower: base_template = FALLBACK_STRATEGIES["AI"]
95
- elif "tiktok" in topic_lower: base_template = FALLBACK_STRATEGIES["TikTok"]
96
- elif "micro" in topic_lower: base_template = FALLBACK_STRATEGIES["Micro"]
97
 
98
  if not llm_instance:
99
- return base_template
100
 
101
- # --- 2. THE UPGRADED PROMPT (This is the fix) ---
102
- # We give it today's date and a better persona.
103
  today_date = datetime.now().strftime("%Y-%m-%d")
104
  prompt = f"""[INST]
105
- You are "PulseAI", a Senior Digital Strategy Director providing a competitive edge.
106
- Today's Date is {today_date}. All your analysis must be modern and relevant to today.
107
-
108
  Analyze this trend: "{topic}".
109
-
110
  Provide a concise, expert briefing in a valid JSON format with 3 keys:
111
- 1. "summary": A single, sharp sentence explaining what is happening now.
112
- 2. "impact": A single sentence explaining why this matters for a brand's revenue or reach.
113
- 3. "strategy": One creative, specific content idea an agency can execute this week. Example: "A 30s reel comparing old vs. new AI tools."
114
-
115
  JSON Response:
116
  [/INST]"""
117
 
@@ -119,21 +123,12 @@ def decode_market_trend(topic: str, llm_instance) -> Dict[str, str]:
119
  response = llm_instance(prompt, max_tokens=256, temperature=0.6, stop=["[INST]"], echo=False)
120
  text = response['choices'][0]['text'].strip()
121
 
122
- # Parse the JSON from the response text
123
  start = text.find('{')
124
  end = text.rfind('}') + 1
125
  if start != -1 and end != 0:
126
- data = json.loads(text[start:end])
127
- # Ensure no key is empty, fall back to template if needed
128
- return {
129
- "summary": data.get("summary") or base_template["summary"],
130
- "impact": data.get("impact") or base_template["impact"],
131
- "strategy": data.get("strategy") or base_template["strategy"]
132
- }
133
  else:
134
- # If JSON is broken, use the fallback
135
  raise ValueError("LLM did not return valid JSON.")
136
-
137
  except Exception as e:
138
- print(f" - ⚠️ LLM Error or Bad JSON (Using Fallback): {e}")
139
- return base_template
 
1
  import os
2
  import pandas as pd
3
  import joblib
 
4
  import json
5
  from datetime import datetime
6
  from newsapi import NewsApiClient
7
+ from pytrends.request import TrendReq
8
+ from typing import Dict, Any, Optional
9
 
10
  # --- CONFIGURATION ---
11
  MODEL_PATH = os.path.join(os.path.dirname(__file__), '..', 'models', 'thunderbird_market_predictor_v1.joblib')
12
  NEWS_API_KEY = os.getenv("NEWS_API_KEY")
13
 
14
+ # --- NO MOCK DATA. REAL ATTEMPTS ONLY ---
15
+
16
  def get_external_trends() -> dict:
17
+ """
18
+ Attempts to fetch REAL data from NewsAPI and Google Trends.
19
+ If it fails, it returns None, not dummy data.
20
+ """
21
+ print("πŸš€ [Thunderbird Engine] Fetching REAL external trends...")
22
+ results = { "news_headlines": [], "breakout_keyword": None, "trending_audio": None }
 
23
 
24
+ # 1. Attempt to fetch real news
25
  if NEWS_API_KEY:
26
  try:
27
  newsapi = NewsApiClient(api_key=NEWS_API_KEY)
28
+ top_headlines = newsapi.get_everything(q='("influencer marketing" OR "social media trends")', language='en', sort_by='relevancy', page_size=5)
29
+ articles = top_headlines.get('articles', [])
30
+ if articles:
31
+ results["news_headlines"] = [{"title": a['title'], "url": a['url']} for a in articles]
 
 
32
  except Exception as e:
33
+ print(f" - ⚠️ NewsAPI connection failed: {e}")
34
+
35
+ # 2. Attempt to fetch real breakout keyword from Google Trends
36
+ try:
37
+ pytrends = TrendReq(hl='en-US', tz=360)
38
+ trending_searches_df = pytrends.trending_searches(pn='united_states')
39
+ if not trending_searches_df.empty:
40
+ results["breakout_keyword"] = trending_searches_df.iloc[0,0]
41
+ except Exception as e:
42
+ print(f" - ⚠️ Google Trends connection failed: {e}")
43
+
44
+ # 3. Trending Audio - This requires a dedicated API (e.g., TikTok/Spotify)
45
+ # Since we don't have one, it will correctly remain None.
46
 
 
 
 
 
 
47
  return results
48
 
49
  def predict_niche_trends() -> dict:
50
+ """
51
+ Uses the REAL trained ML model to predict future interest.
52
+ If the model is not trained or fails, it returns an empty dictionary.
53
+ """
54
+ print("\nπŸš€ [Thunderbird Engine] Using REAL ML model for predictions...")
55
+
56
  try:
57
  if not os.path.exists(MODEL_PATH):
58
+ raise FileNotFoundError(f"Model not found at {MODEL_PATH}")
 
59
 
60
  model_pack = joblib.load(MODEL_PATH)
61
+ model = model_pack['model']
62
  encoder = model_pack['encoder']
63
+
64
+ # Prepare future dates for prediction
65
+ future_dates = pd.to_datetime(pd.date_range(start=datetime.now(), periods=12, freq='M'))
66
+
67
+ predictions = {}
68
+ # Get all possible niche names from the encoder
69
+ all_niches = encoder.get_feature_names_out(['niche'])
 
 
70
 
71
+ for niche_col in all_niches:
72
+ niche_name = niche_col.replace('niche_', '')
73
+
74
+ # Create a dataframe for prediction for this niche for all future months
75
+ future_df = pd.DataFrame({
76
+ 'month': future_dates.to_period('M'),
77
+ 'niche': [niche_name] * 12,
78
+ 'trend_score': 50 # Assume an average trend score for future prediction
79
+ })
80
+
81
+ # Predict using the actual model
82
+ predicted_values = model.predict(future_df)
83
+
84
+ # Format for the frontend chart
85
+ predictions[niche_name] = [
86
+ {"date": dt.strftime('%Y-%m'), "value": max(0, int(val))}
87
+ for dt, val in zip(future_dates, predicted_values)
88
+ ]
89
+
90
+ print(f" - βœ… Successfully generated REAL predictions for niches: {list(predictions.keys())}")
91
+ return {"trend_predictions": predictions}
92
 
93
+ except Exception as e:
94
+ print(f" - ❌ REAL Prediction Failed: {e}. Chart will be empty.")
95
+ # Return empty data, which the frontend will show as "Unavailable"
96
+ return {"trend_predictions": {}}
97
 
98
  def decode_market_trend(topic: str, llm_instance) -> Dict[str, str]:
99
+ """Decodes a keyword into a strategy with a clear failure message."""
100
+ print(f"🧠 [Thunderbird] Decoding Trend with REAL AI: {topic}")
 
 
101
 
102
+ offline_response = {
103
+ "summary": "AI Analyst is currently offline.",
104
+ "impact": "The system could not get a real-time analysis.",
105
+ "strategy": "Please try again in a few moments. If the issue persists, check AI service logs."
106
+ }
 
107
 
108
  if not llm_instance:
109
+ return offline_response
110
 
 
 
111
  today_date = datetime.now().strftime("%Y-%m-%d")
112
  prompt = f"""[INST]
113
+ You are a Senior Digital Strategy Director. Today's Date is {today_date}.
 
 
114
  Analyze this trend: "{topic}".
 
115
  Provide a concise, expert briefing in a valid JSON format with 3 keys:
116
+ 1. "summary": A sharp sentence explaining what's happening now.
117
+ 2. "impact": A sentence explaining why this matters for a brand's revenue or reach.
118
+ 3. "strategy": One creative, specific content idea an agency can execute this week.
 
119
  JSON Response:
120
  [/INST]"""
121
 
 
123
  response = llm_instance(prompt, max_tokens=256, temperature=0.6, stop=["[INST]"], echo=False)
124
  text = response['choices'][0]['text'].strip()
125
 
 
126
  start = text.find('{')
127
  end = text.rfind('}') + 1
128
  if start != -1 and end != 0:
129
+ return json.loads(text[start:end])
 
 
 
 
 
 
130
  else:
 
131
  raise ValueError("LLM did not return valid JSON.")
 
132
  except Exception as e:
133
+ print(f" - ❌ LLM Error (Returning Offline Message): {e}")
134
+ return offline_response