SOPIYAN commited on
Commit
ddcdc28
·
verified ·
1 Parent(s): 6b90cbd

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +106 -0
  2. best_rf_model.joblib +3 -0
  3. requirements.txt +4 -0
  4. scaler.joblib +3 -0
app.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import gradio as gr
3
+ import pandas as pd
4
+ import joblib
5
+ from sklearn.preprocessing import StandardScaler
6
+
7
+ # 1. Load the trained model and scaler
8
+ best_rf_model = joblib.load('best_rf_model.joblib')
9
+ scaler = joblib.load('scaler.joblib')
10
+
11
+ # 2. Extract mean and std for 'Quantity' and 'Discount'
12
+ # numerical_cols_to_standardize = ['Sales', 'Quantity', 'Discount', 'Profit']
13
+ # Based on this order, Quantity is index 1, Discount is index 2
14
+ quantity_mean = scaler.mean_[1]
15
+ quantity_std = scaler.scale_[1]
16
+ discount_mean = scaler.mean_[2]
17
+ discount_std = scaler.scale_[2]
18
+
19
+ # 3. Define all expected one-hot encoded column names
20
+ all_expected_ohe_cols = [
21
+ 'Ship Mode_First Class', 'Ship Mode_Same Day', 'Ship Mode_Second Class', 'Ship Mode_Standard Class',
22
+ 'Segment_Consumer', 'Segment_Corporate', 'Segment_Home Office',
23
+ 'Region_Central', 'Region_East', 'Region_South', 'Region_West',
24
+ 'Sub-Category_Bookcases', 'Sub-Category_Chairs', 'Sub-Category_Furnishings', 'Sub-Category_Tables'
25
+ ]
26
+
27
+ # 4. Define the exact order of features expected by the model
28
+ # This order must match X_train_sales.columns.tolist()
29
+ model_features_order = [
30
+ 'Quantity', 'Discount',
31
+ 'Ship Mode_First Class', 'Ship Mode_Same Day', 'Ship Mode_Second Class', 'Ship Mode_Standard Class',
32
+ 'Segment_Consumer', 'Segment_Corporate', 'Segment_Home Office',
33
+ 'Region_Central', 'Region_East', 'Region_South', 'Region_West',
34
+ 'Sub-Category_Bookcases', 'Sub-Category_Chairs', 'Sub-Category_Furnishings', 'Sub-Category_Tables'
35
+ ]
36
+
37
+ # Define the prediction function
38
+ def predict_sales(quantity, discount, ship_mode, segment, region, sub_category):
39
+ # Create a DataFrame for the input
40
+ input_data = pd.DataFrame({
41
+ 'Quantity': [quantity],
42
+ 'Discount': [discount],
43
+ 'Ship Mode': [ship_mode],
44
+ 'Segment': [segment],
45
+ 'Region': [region],
46
+ 'Sub-Category': [sub_category]
47
+ })
48
+
49
+ # Apply One-Hot Encoding
50
+ categorical_cols_to_encode_for_inference = ['Ship Mode', 'Segment', 'Region', 'Sub-Category']
51
+ input_encoded = pd.get_dummies(input_data, columns=categorical_cols_to_encode_for_inference, drop_first=False)
52
+
53
+ # Reindex to ensure all expected OHE columns are present, filling missing with 0
54
+ # First, ensure only the one-hot encoded columns from the original categorical features are kept
55
+ # and then add the numerical columns back in the correct order for reindexing.
56
+ processed_input = pd.DataFrame(0, index=[0], columns=all_expected_ohe_cols)
57
+ for col in input_encoded.columns:
58
+ if col in processed_input.columns:
59
+ processed_input[col] = input_encoded[col].values
60
+
61
+ # Add numerical features back to processed_input before final ordering
62
+ processed_input['Quantity'] = input_data['Quantity']
63
+ processed_input['Discount'] = input_data['Discount']
64
+
65
+ # Standardize numerical features using the stored mean and std
66
+ processed_input['Quantity'] = (processed_input['Quantity'] - quantity_mean) / quantity_std
67
+ processed_input['Discount'] = (processed_input['Discount'] - discount_mean) / discount_std
68
+
69
+ # Ensure column order matches model_features_order
70
+ input_final = processed_input[model_features_order]
71
+
72
+ # Make prediction
73
+ prediction = best_rf_model.predict(input_final)[0]
74
+
75
+ return prediction
76
+
77
+ # Define Gradio Interface
78
+ iface = gr.Interface(
79
+ fn=predict_sales,
80
+ inputs=[
81
+ gr.Number(label="Quantity (Units)", minimum=1, maximum=100, step=1),
82
+ gr.Number(label="Discount (0.0 - 0.7)", minimum=0.0, maximum=0.7, step=0.01),
83
+ gr.Dropdown(
84
+ label="Ship Mode",
85
+ choices=['Standard Class', 'Second Class', 'First Class', 'Same Day']
86
+ ),
87
+ gr.Dropdown(
88
+ label="Segment",
89
+ choices=['Consumer', 'Corporate', 'Home Office']
90
+ ),
91
+ gr.Dropdown(
92
+ label="Region",
93
+ choices=['West', 'East', 'Central', 'South']
94
+ ),
95
+ gr.Dropdown(
96
+ label="Sub-Category",
97
+ choices=['Furnishings', 'Chairs', 'Tables', 'Bookcases']
98
+ )
99
+ ],
100
+ outputs=gr.Number(label="Predicted Sales (Standardized Value)"),
101
+ title="Tuned Random Forest Regressor for Sales Prediction",
102
+ description="Enter the features to predict standardized sales using the best-tuned model."
103
+ )
104
+
105
+ # Launch the interface
106
+ iface.launch(share=True, debug=True)
best_rf_model.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c10ae5f658b5e74ef3680cf5f87606dd4a2765ec15aa2a1db941de72d8952401
3
+ size 7075905
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ pandas
2
+ scikit-learn
3
+ gradio
4
+ joblib
scaler.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:709b1598f941f8f6d0d3f497563a651a1880b8d61f182fe66eb67252eccf5b8c
3
+ size 999