File size: 4,701 Bytes
dc39b06
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4964c4e
dc39b06
 
 
 
 
4964c4e
 
 
dc39b06
4964c4e
 
 
 
 
 
dc39b06
4964c4e
 
 
 
 
 
dc39b06
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4964c4e
 
dc39b06
 
 
 
 
 
 
 
 
4964c4e
 
dc39b06
 
 
 
 
 
 
4964c4e
dc39b06
4964c4e
dc39b06
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# -*- coding: utf-8 -*-
"""ITI105_SETY_Demo_Final.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1239rYpEr1h2-cpIWEzcYdiaG6QSZCNzu
"""

import gradio as gr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import joblib
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    roc_curve, precision_recall_curve, auc,
    confusion_matrix, ConfusionMatrixDisplay
)

# Load models (adjust paths as needed)
base_model = joblib.load("base_model.pkl")
best_model = joblib.load("best_gb.pkl")

def get_metrics(model, X, y):
    y_pred = model.predict(X)
    y_prob = model.predict_proba(X)[:, 1]

    acc = accuracy_score(y, y_pred)
    prec = precision_score(y, y_pred)
    rec = recall_score(y, y_pred)
    f1 = f1_score(y, y_pred)

    fpr, tpr, _ = roc_curve(y, y_prob)
    precision_vals, recall_vals, _ = precision_recall_curve(y, y_prob)

    return {
        'y_pred': y_pred,
        'y_prob': y_prob,
        'fpr': fpr,
        'tpr': tpr,
        'precision_vals': precision_vals,
        'recall_vals': recall_vals,
        'metrics': f"""
        πŸ“Œ Accuracy: {acc:.4f}
        πŸ“Œ Precision: {prec:.4f}
        πŸ“Œ Recall: {rec:.4f}
        πŸ“Œ F1 Score: {f1:.4f}
        πŸ“Œ ROC AUC: {auc(fpr, tpr):.4f}
        πŸ“Œ PR AUC: {auc(recall_vals, precision_vals):.4f}
        """
    }

def plot_confusion_matrices(model1, model2, X, y, labels=["Logistic Regression", "Gradient Boosting"], class_names=None, value_fontsize=28, value_color='red'):
    fig, axes = plt.subplots(1, 2, figsize=(16, 8))

    for i, model in enumerate([model1, model2]):
        y_pred = model.predict(X)
        cm = confusion_matrix(y, y_pred)

        # Plot with default text
        disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_names)
        disp.plot(ax=axes[i], cmap='Blues', colorbar=False)

        # Remove default text artists
        for artist in axes[i].texts:
            artist.set_visible(False)

        # Manually annotate with custom font and color
        for (j, k), val in np.ndenumerate(cm):
            axes[i].text(k, j, f"{val}", ha='center', va='center', fontsize=value_fontsize, color=value_color)

        axes[i].set_title(f"{labels[i]}", fontsize=30)
        axes[i].tick_params(axis='both', labelsize=24)
        axes[i].set_xlabel("Predicted Default", fontsize=24)
        axes[i].set_ylabel("Actual Default", fontsize=24)

    plt.tight_layout()
    return plt.gcf()

def evaluate(file):
    df = pd.read_csv(file.name)
    if 'Status' not in df.columns:
        return "Error: 'Status' column missing."

    #df = df.tail(5)
    X = df.drop(columns='Status')
    y = df['Status']

    base = get_metrics(base_model, X, y)
    best = get_metrics(best_model, X, y)

    # Combined ROC Curve
    plt.figure()
    plt.plot(base['fpr'], base['tpr'], label=f"Logistic Regression (AUC={auc(base['fpr'], base['tpr']):.2f})", linestyle='--')
    plt.plot(best['fpr'], best['tpr'], label=f"Gradient Boosting (AUC={auc(best['fpr'], best['tpr']):.2f})", linestyle='-')
    plt.plot([0, 1], [0, 1], 'k--', alpha=0.5)
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.title("Combined ROC Curve")
    plt.legend()
    roc_fig = plt.gcf()

    # Combined PR Curve
    plt.figure()
    plt.plot(base['recall_vals'], base['precision_vals'], label=f"Logistic Regression (AUC={auc(base['recall_vals'], base['precision_vals']):.2f})", linestyle='--')
    plt.plot(best['recall_vals'], best['precision_vals'], label=f"Gradient Boosting (AUC={auc(best['recall_vals'], best['precision_vals']):.2f})", linestyle='-')
    plt.xlabel("Recall")
    plt.ylabel("Precision")
    plt.title("Combined Precision-Recall Curve")
    plt.legend()
    pr_fig = plt.gcf()

    # Confusion Matrices
    cm_fig = plot_confusion_matrices(base_model, best_model, X, y, class_names=["Negative", "Positive"], value_fontsize=32, value_color='red')

    combined_metrics = f"πŸ“Š Base Model:\n(Logistic Regression){base['metrics']}\n\nπŸ“Š Best Model:\n(Gradient Boosting){best['metrics']}"

    return combined_metrics, roc_fig, pr_fig, cm_fig

demo = gr.Interface(
    fn=evaluate,
    inputs=gr.File(label="Upload CSV with 'Status' column"),
    outputs=[
        gr.Textbox(label="πŸ“Š Performance Comparison"),
        gr.Plot(label="Combined ROC Curve"),
        gr.Plot(label="Combined Precision-Recall Curve"),
        gr.Plot(label="Confusion Matrices")
    ],
    title="πŸ” Model Comparison Dashboard",
    description="Upload a CSV file to compare base and best model performance side by side."
)

demo.launch(debug=True)