Spaces:

xen2003
/

LoanDefaultPrediction

Sleeping

File size: 4,701 Bytes

# -*- coding: utf-8 -*-
"""ITI105_SETY_Demo_Final.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1239rYpEr1h2-cpIWEzcYdiaG6QSZCNzu
"""

import gradio as gr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import joblib
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    roc_curve, precision_recall_curve, auc,
    confusion_matrix, ConfusionMatrixDisplay
)

# Load models (adjust paths as needed)
base_model = joblib.load("base_model.pkl")
best_model = joblib.load("best_gb.pkl")

def get_metrics(model, X, y):
    y_pred = model.predict(X)
    y_prob = model.predict_proba(X)[:, 1]

    acc = accuracy_score(y, y_pred)
    prec = precision_score(y, y_pred)
    rec = recall_score(y, y_pred)
    f1 = f1_score(y, y_pred)

    fpr, tpr, _ = roc_curve(y, y_prob)
    precision_vals, recall_vals, _ = precision_recall_curve(y, y_prob)

    return {
        'y_pred': y_pred,
        'y_prob': y_prob,
        'fpr': fpr,
        'tpr': tpr,
        'precision_vals': precision_vals,
        'recall_vals': recall_vals,
        'metrics': f"""
        📌 Accuracy: {acc:.4f}
        📌 Precision: {prec:.4f}
        📌 Recall: {rec:.4f}
        📌 F1 Score: {f1:.4f}
        📌 ROC AUC: {auc(fpr, tpr):.4f}
        📌 PR AUC: {auc(recall_vals, precision_vals):.4f}
        """
    }

def plot_confusion_matrices(model1, model2, X, y, labels=["Logistic Regression", "Gradient Boosting"], class_names=None, value_fontsize=28, value_color='red'):
    fig, axes = plt.subplots(1, 2, figsize=(16, 8))

    for i, model in enumerate([model1, model2]):
        y_pred = model.predict(X)
        cm = confusion_matrix(y, y_pred)

        # Plot with default text
        disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_names)
        disp.plot(ax=axes[i], cmap='Blues', colorbar=False)

        # Remove default text artists
        for artist in axes[i].texts:
            artist.set_visible(False)

        # Manually annotate with custom font and color
        for (j, k), val in np.ndenumerate(cm):
            axes[i].text(k, j, f"{val}", ha='center', va='center', fontsize=value_fontsize, color=value_color)

        axes[i].set_title(f"{labels[i]}", fontsize=30)
        axes[i].tick_params(axis='both', labelsize=24)
        axes[i].set_xlabel("Predicted Default", fontsize=24)
        axes[i].set_ylabel("Actual Default", fontsize=24)

    plt.tight_layout()
    return plt.gcf()

def evaluate(file):
    df = pd.read_csv(file.name)
    if 'Status' not in df.columns:
        return "Error: 'Status' column missing."

    #df = df.tail(5)
    X = df.drop(columns='Status')
    y = df['Status']

    base = get_metrics(base_model, X, y)
    best = get_metrics(best_model, X, y)

    # Combined ROC Curve
    plt.figure()
    plt.plot(base['fpr'], base['tpr'], label=f"Logistic Regression (AUC={auc(base['fpr'], base['tpr']):.2f})", linestyle='--')
    plt.plot(best['fpr'], best['tpr'], label=f"Gradient Boosting (AUC={auc(best['fpr'], best['tpr']):.2f})", linestyle='-')
    plt.plot([0, 1], [0, 1], 'k--', alpha=0.5)
    plt.xlabel("False Positive Rate")
    plt.ylabel("True Positive Rate")
    plt.title("Combined ROC Curve")
    plt.legend()
    roc_fig = plt.gcf()

    # Combined PR Curve
    plt.figure()
    plt.plot(base['recall_vals'], base['precision_vals'], label=f"Logistic Regression (AUC={auc(base['recall_vals'], base['precision_vals']):.2f})", linestyle='--')
    plt.plot(best['recall_vals'], best['precision_vals'], label=f"Gradient Boosting (AUC={auc(best['recall_vals'], best['precision_vals']):.2f})", linestyle='-')
    plt.xlabel("Recall")
    plt.ylabel("Precision")
    plt.title("Combined Precision-Recall Curve")
    plt.legend()
    pr_fig = plt.gcf()

    # Confusion Matrices
    cm_fig = plot_confusion_matrices(base_model, best_model, X, y, class_names=["Negative", "Positive"], value_fontsize=32, value_color='red')

    combined_metrics = f"📊 Base Model:\n(Logistic Regression){base['metrics']}\n\n📊 Best Model:\n(Gradient Boosting){best['metrics']}"

    return combined_metrics, roc_fig, pr_fig, cm_fig

demo = gr.Interface(
    fn=evaluate,
    inputs=gr.File(label="Upload CSV with 'Status' column"),
    outputs=[
        gr.Textbox(label="📊 Performance Comparison"),
        gr.Plot(label="Combined ROC Curve"),
        gr.Plot(label="Combined Precision-Recall Curve"),
        gr.Plot(label="Confusion Matrices")
    ],
    title="🔍 Model Comparison Dashboard",
    description="Upload a CSV file to compare base and best model performance side by side."
)

demo.launch(debug=True)