Spaces:
Sleeping
Sleeping
File size: 4,701 Bytes
dc39b06 4964c4e dc39b06 4964c4e dc39b06 4964c4e dc39b06 4964c4e dc39b06 4964c4e dc39b06 4964c4e dc39b06 4964c4e dc39b06 4964c4e dc39b06 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 |
# -*- coding: utf-8 -*-
"""ITI105_SETY_Demo_Final.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1239rYpEr1h2-cpIWEzcYdiaG6QSZCNzu
"""
import gradio as gr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import joblib
from sklearn.metrics import (
accuracy_score, precision_score, recall_score, f1_score,
roc_curve, precision_recall_curve, auc,
confusion_matrix, ConfusionMatrixDisplay
)
# Load models (adjust paths as needed)
base_model = joblib.load("base_model.pkl")
best_model = joblib.load("best_gb.pkl")
def get_metrics(model, X, y):
y_pred = model.predict(X)
y_prob = model.predict_proba(X)[:, 1]
acc = accuracy_score(y, y_pred)
prec = precision_score(y, y_pred)
rec = recall_score(y, y_pred)
f1 = f1_score(y, y_pred)
fpr, tpr, _ = roc_curve(y, y_prob)
precision_vals, recall_vals, _ = precision_recall_curve(y, y_prob)
return {
'y_pred': y_pred,
'y_prob': y_prob,
'fpr': fpr,
'tpr': tpr,
'precision_vals': precision_vals,
'recall_vals': recall_vals,
'metrics': f"""
π Accuracy: {acc:.4f}
π Precision: {prec:.4f}
π Recall: {rec:.4f}
π F1 Score: {f1:.4f}
π ROC AUC: {auc(fpr, tpr):.4f}
π PR AUC: {auc(recall_vals, precision_vals):.4f}
"""
}
def plot_confusion_matrices(model1, model2, X, y, labels=["Logistic Regression", "Gradient Boosting"], class_names=None, value_fontsize=28, value_color='red'):
fig, axes = plt.subplots(1, 2, figsize=(16, 8))
for i, model in enumerate([model1, model2]):
y_pred = model.predict(X)
cm = confusion_matrix(y, y_pred)
# Plot with default text
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_names)
disp.plot(ax=axes[i], cmap='Blues', colorbar=False)
# Remove default text artists
for artist in axes[i].texts:
artist.set_visible(False)
# Manually annotate with custom font and color
for (j, k), val in np.ndenumerate(cm):
axes[i].text(k, j, f"{val}", ha='center', va='center', fontsize=value_fontsize, color=value_color)
axes[i].set_title(f"{labels[i]}", fontsize=30)
axes[i].tick_params(axis='both', labelsize=24)
axes[i].set_xlabel("Predicted Default", fontsize=24)
axes[i].set_ylabel("Actual Default", fontsize=24)
plt.tight_layout()
return plt.gcf()
def evaluate(file):
df = pd.read_csv(file.name)
if 'Status' not in df.columns:
return "Error: 'Status' column missing."
#df = df.tail(5)
X = df.drop(columns='Status')
y = df['Status']
base = get_metrics(base_model, X, y)
best = get_metrics(best_model, X, y)
# Combined ROC Curve
plt.figure()
plt.plot(base['fpr'], base['tpr'], label=f"Logistic Regression (AUC={auc(base['fpr'], base['tpr']):.2f})", linestyle='--')
plt.plot(best['fpr'], best['tpr'], label=f"Gradient Boosting (AUC={auc(best['fpr'], best['tpr']):.2f})", linestyle='-')
plt.plot([0, 1], [0, 1], 'k--', alpha=0.5)
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("Combined ROC Curve")
plt.legend()
roc_fig = plt.gcf()
# Combined PR Curve
plt.figure()
plt.plot(base['recall_vals'], base['precision_vals'], label=f"Logistic Regression (AUC={auc(base['recall_vals'], base['precision_vals']):.2f})", linestyle='--')
plt.plot(best['recall_vals'], best['precision_vals'], label=f"Gradient Boosting (AUC={auc(best['recall_vals'], best['precision_vals']):.2f})", linestyle='-')
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Combined Precision-Recall Curve")
plt.legend()
pr_fig = plt.gcf()
# Confusion Matrices
cm_fig = plot_confusion_matrices(base_model, best_model, X, y, class_names=["Negative", "Positive"], value_fontsize=32, value_color='red')
combined_metrics = f"π Base Model:\n(Logistic Regression){base['metrics']}\n\nπ Best Model:\n(Gradient Boosting){best['metrics']}"
return combined_metrics, roc_fig, pr_fig, cm_fig
demo = gr.Interface(
fn=evaluate,
inputs=gr.File(label="Upload CSV with 'Status' column"),
outputs=[
gr.Textbox(label="π Performance Comparison"),
gr.Plot(label="Combined ROC Curve"),
gr.Plot(label="Combined Precision-Recall Curve"),
gr.Plot(label="Confusion Matrices")
],
title="π Model Comparison Dashboard",
description="Upload a CSV file to compare base and best model performance side by side."
)
demo.launch(debug=True) |