Spaces:
Sleeping
Sleeping
| # -*- coding: utf-8 -*- | |
| """ITI105_SETY_Demo_Final.ipynb | |
| Automatically generated by Colab. | |
| Original file is located at | |
| https://colab.research.google.com/drive/1239rYpEr1h2-cpIWEzcYdiaG6QSZCNzu | |
| """ | |
| import gradio as gr | |
| import pandas as pd | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| import joblib | |
| from sklearn.metrics import ( | |
| accuracy_score, precision_score, recall_score, f1_score, | |
| roc_curve, precision_recall_curve, auc, | |
| confusion_matrix, ConfusionMatrixDisplay | |
| ) | |
| # Load models (adjust paths as needed) | |
| base_model = joblib.load("base_model.pkl") | |
| best_model = joblib.load("best_gb.pkl") | |
| def get_metrics(model, X, y): | |
| y_pred = model.predict(X) | |
| y_prob = model.predict_proba(X)[:, 1] | |
| acc = accuracy_score(y, y_pred) | |
| prec = precision_score(y, y_pred) | |
| rec = recall_score(y, y_pred) | |
| f1 = f1_score(y, y_pred) | |
| fpr, tpr, _ = roc_curve(y, y_prob) | |
| precision_vals, recall_vals, _ = precision_recall_curve(y, y_prob) | |
| return { | |
| 'y_pred': y_pred, | |
| 'y_prob': y_prob, | |
| 'fpr': fpr, | |
| 'tpr': tpr, | |
| 'precision_vals': precision_vals, | |
| 'recall_vals': recall_vals, | |
| 'metrics': f""" | |
| π Accuracy: {acc:.4f} | |
| π Precision: {prec:.4f} | |
| π Recall: {rec:.4f} | |
| π F1 Score: {f1:.4f} | |
| π ROC AUC: {auc(fpr, tpr):.4f} | |
| π PR AUC: {auc(recall_vals, precision_vals):.4f} | |
| """ | |
| } | |
| def plot_confusion_matrices(model1, model2, X, y, labels=["Logistic Regression", "Gradient Boosting"], class_names=None, value_fontsize=28, value_color='red'): | |
| fig, axes = plt.subplots(1, 2, figsize=(16, 8)) | |
| for i, model in enumerate([model1, model2]): | |
| y_pred = model.predict(X) | |
| cm = confusion_matrix(y, y_pred) | |
| # Plot with default text | |
| disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_names) | |
| disp.plot(ax=axes[i], cmap='Blues', colorbar=False) | |
| # Remove default text artists | |
| for artist in axes[i].texts: | |
| artist.set_visible(False) | |
| # Manually annotate with custom font and color | |
| for (j, k), val in np.ndenumerate(cm): | |
| axes[i].text(k, j, f"{val}", ha='center', va='center', fontsize=value_fontsize, color=value_color) | |
| axes[i].set_title(f"{labels[i]}", fontsize=30) | |
| axes[i].tick_params(axis='both', labelsize=24) | |
| axes[i].set_xlabel("Predicted Default", fontsize=24) | |
| axes[i].set_ylabel("Actual Default", fontsize=24) | |
| plt.tight_layout() | |
| return plt.gcf() | |
| def evaluate(file): | |
| df = pd.read_csv(file.name) | |
| if 'Status' not in df.columns: | |
| return "Error: 'Status' column missing." | |
| #df = df.tail(5) | |
| X = df.drop(columns='Status') | |
| y = df['Status'] | |
| base = get_metrics(base_model, X, y) | |
| best = get_metrics(best_model, X, y) | |
| # Combined ROC Curve | |
| plt.figure() | |
| plt.plot(base['fpr'], base['tpr'], label=f"Logistic Regression (AUC={auc(base['fpr'], base['tpr']):.2f})", linestyle='--') | |
| plt.plot(best['fpr'], best['tpr'], label=f"Gradient Boosting (AUC={auc(best['fpr'], best['tpr']):.2f})", linestyle='-') | |
| plt.plot([0, 1], [0, 1], 'k--', alpha=0.5) | |
| plt.xlabel("False Positive Rate") | |
| plt.ylabel("True Positive Rate") | |
| plt.title("Combined ROC Curve") | |
| plt.legend() | |
| roc_fig = plt.gcf() | |
| # Combined PR Curve | |
| plt.figure() | |
| plt.plot(base['recall_vals'], base['precision_vals'], label=f"Logistic Regression (AUC={auc(base['recall_vals'], base['precision_vals']):.2f})", linestyle='--') | |
| plt.plot(best['recall_vals'], best['precision_vals'], label=f"Gradient Boosting (AUC={auc(best['recall_vals'], best['precision_vals']):.2f})", linestyle='-') | |
| plt.xlabel("Recall") | |
| plt.ylabel("Precision") | |
| plt.title("Combined Precision-Recall Curve") | |
| plt.legend() | |
| pr_fig = plt.gcf() | |
| # Confusion Matrices | |
| cm_fig = plot_confusion_matrices(base_model, best_model, X, y, class_names=["Negative", "Positive"], value_fontsize=32, value_color='red') | |
| combined_metrics = f"π Base Model:\n(Logistic Regression){base['metrics']}\n\nπ Best Model:\n(Gradient Boosting){best['metrics']}" | |
| return combined_metrics, roc_fig, pr_fig, cm_fig | |
| demo = gr.Interface( | |
| fn=evaluate, | |
| inputs=gr.File(label="Upload CSV with 'Status' column"), | |
| outputs=[ | |
| gr.Textbox(label="π Performance Comparison"), | |
| gr.Plot(label="Combined ROC Curve"), | |
| gr.Plot(label="Combined Precision-Recall Curve"), | |
| gr.Plot(label="Confusion Matrices") | |
| ], | |
| title="π Model Comparison Dashboard", | |
| description="Upload a CSV file to compare base and best model performance side by side." | |
| ) | |
| demo.launch(debug=True) |