Spaces:

SOPIYAN
/

2024121011

Sleeping

App Files Files Community

SOPIYAN commited on Nov 6, 2025

Commit

ddcdc28

verified ·

1 Parent(s): 6b90cbd

Upload 4 files

Browse files

Files changed (4) hide show

app.py +106 -0
best_rf_model.joblib +3 -0
requirements.txt +4 -0
scaler.joblib +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,106 @@

+import gradio as gr
+import pandas as pd
+import joblib
+from sklearn.preprocessing import StandardScaler
+# 1. Load the trained model and scaler
+best_rf_model = joblib.load('best_rf_model.joblib')
+scaler = joblib.load('scaler.joblib')
+# 2. Extract mean and std for 'Quantity' and 'Discount'
+# numerical_cols_to_standardize = ['Sales', 'Quantity', 'Discount', 'Profit']
+# Based on this order, Quantity is index 1, Discount is index 2
+quantity_mean = scaler.mean_[1]
+quantity_std = scaler.scale_[1]
+discount_mean = scaler.mean_[2]
+discount_std = scaler.scale_[2]
+# 3. Define all expected one-hot encoded column names
+all_expected_ohe_cols = [
+    'Ship Mode_First Class', 'Ship Mode_Same Day', 'Ship Mode_Second Class', 'Ship Mode_Standard Class',
+    'Segment_Consumer', 'Segment_Corporate', 'Segment_Home Office',
+    'Region_Central', 'Region_East', 'Region_South', 'Region_West',
+    'Sub-Category_Bookcases', 'Sub-Category_Chairs', 'Sub-Category_Furnishings', 'Sub-Category_Tables'
+]
+# 4. Define the exact order of features expected by the model
+# This order must match X_train_sales.columns.tolist()
+model_features_order = [
+    'Quantity', 'Discount',
+    'Ship Mode_First Class', 'Ship Mode_Same Day', 'Ship Mode_Second Class', 'Ship Mode_Standard Class',
+    'Segment_Consumer', 'Segment_Corporate', 'Segment_Home Office',
+    'Region_Central', 'Region_East', 'Region_South', 'Region_West',
+    'Sub-Category_Bookcases', 'Sub-Category_Chairs', 'Sub-Category_Furnishings', 'Sub-Category_Tables'
+]
+# Define the prediction function
+def predict_sales(quantity, discount, ship_mode, segment, region, sub_category):
+    # Create a DataFrame for the input
+    input_data = pd.DataFrame({
+        'Quantity': [quantity],
+        'Discount': [discount],
+        'Ship Mode': [ship_mode],
+        'Segment': [segment],
+        'Region': [region],
+        'Sub-Category': [sub_category]
+    })
+    # Apply One-Hot Encoding
+    categorical_cols_to_encode_for_inference = ['Ship Mode', 'Segment', 'Region', 'Sub-Category']
+    input_encoded = pd.get_dummies(input_data, columns=categorical_cols_to_encode_for_inference, drop_first=False)
+    # Reindex to ensure all expected OHE columns are present, filling missing with 0
+    # First, ensure only the one-hot encoded columns from the original categorical features are kept
+    # and then add the numerical columns back in the correct order for reindexing.
+    processed_input = pd.DataFrame(0, index=[0], columns=all_expected_ohe_cols)
+    for col in input_encoded.columns:
+        if col in processed_input.columns:
+            processed_input[col] = input_encoded[col].values
+    # Add numerical features back to processed_input before final ordering
+    processed_input['Quantity'] = input_data['Quantity']
+    processed_input['Discount'] = input_data['Discount']
+    # Standardize numerical features using the stored mean and std
+    processed_input['Quantity'] = (processed_input['Quantity'] - quantity_mean) / quantity_std
+    processed_input['Discount'] = (processed_input['Discount'] - discount_mean) / discount_std
+    # Ensure column order matches model_features_order
+    input_final = processed_input[model_features_order]
+    # Make prediction
+    prediction = best_rf_model.predict(input_final)[0]
+    return prediction
+# Define Gradio Interface
+iface = gr.Interface(
+    fn=predict_sales,
+    inputs=[
+        gr.Number(label="Quantity (Units)", minimum=1, maximum=100, step=1),
+        gr.Number(label="Discount (0.0 - 0.7)", minimum=0.0, maximum=0.7, step=0.01),
+        gr.Dropdown(
+            label="Ship Mode",
+            choices=['Standard Class', 'Second Class', 'First Class', 'Same Day']
+        ),
+        gr.Dropdown(
+            label="Segment",
+            choices=['Consumer', 'Corporate', 'Home Office']
+        ),
+        gr.Dropdown(
+            label="Region",
+            choices=['West', 'East', 'Central', 'South']
+        ),
+        gr.Dropdown(
+            label="Sub-Category",
+            choices=['Furnishings', 'Chairs', 'Tables', 'Bookcases']
+        )
+    ],
+    outputs=gr.Number(label="Predicted Sales (Standardized Value)"),
+    title="Tuned Random Forest Regressor for Sales Prediction",
+    description="Enter the features to predict standardized sales using the best-tuned model."
+)
+# Launch the interface
+iface.launch(share=True, debug=True)

best_rf_model.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c10ae5f658b5e74ef3680cf5f87606dd4a2765ec15aa2a1db941de72d8952401
+size 7075905

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+pandas
+scikit-learn
+gradio
+joblib

scaler.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:709b1598f941f8f6d0d3f497563a651a1880b8d61f182fe66eb67252eccf5b8c
+size 999