reachify-ai-service / training /train_revenue_forecaster.py
amitbhatt6075's picture
Complete fresh start - FINAL UPLOAD
0914e96
# FILE: ai-service/training/train_revenue_forecaster.py (NEW FILE)
import pandas as pd
import joblib
import os
import sys
# Holt's Exponential Smoothing is a powerful forecasting model
from statsmodels.tsa.api import Holt
from pathlib import Path
def train_revenue_forecaster():
"""
Loads the monthly revenue data, trains a Holt's Exponential Smoothing model
on it, and saves the trained model to the /models folder.
"""
print("--- Starting AI Revenue Forecaster Model Training ---")
try:
# --- Setup to find files from the root directory ---
root_dir = Path(__file__).resolve().parents[1]
sys.path.append(str(root_dir))
data_path = root_dir / 'data' / 'revenue_training_data.csv'
# --- Load and prepare the data ---
df = pd.read_csv(data_path)
print(f"Loaded {len(df)} monthly records from {data_path}")
# The model needs a clean time-series index to learn properly
df['month'] = pd.to_datetime(df['month'])
df.set_index('month', inplace=True)
# Ensure the data is sorted by date
df.sort_index(inplace=True)
# We are training on the 'total_revenue' column
time_series = df['total_revenue']
if len(time_series) < 4:
print("πŸ”΄ ERROR: Not enough historical data (at least 4 months required). Aborting training.")
return
except (FileNotFoundError, pd.errors.EmptyDataError):
print(f"πŸ”΄ ERROR: Data file not found or is empty at {data_path}")
return
except Exception as e:
print(f"πŸ”΄ ERROR during data loading/preparation: {e}")
return
# --- Train the Holt's Forecasting Model ---
try:
print("Training the Holt's Exponential Smoothing model...")
# 'initialization_method="estimated"' helps the model find the best starting parameters
# 'fit(optimized=True)' tells it to find the best possible alpha and beta values
model = Holt(time_series, initialization_method="estimated").fit(optimized=True)
print("--- Model training complete! ---")
# --- Save the trained model ---
models_dir = root_dir / 'models'
models_dir.mkdir(exist_ok=True) # Ensure the directory exists
model_path = models_dir / 'revenue_forecaster_v1.joblib'
joblib.dump(model, model_path)
print(f"πŸŽ‰ Success! Revenue Forecaster model saved to: {model_path}")
except Exception as e:
print(f"πŸ”΄ ERROR during model training or saving: {e}")
if __name__ == '__main__':
train_revenue_forecaster()