Spaces:

xen2003
/

LoanDefaultPrediction

Sleeping

App Files Files Community

LoanDefaultPrediction / app.py

xen2003

Upload app.py

4964c4e verified 4 months ago

raw

history blame contribute delete

4.7 kB

	# -- coding: utf-8 --
	"""ITI105_SETY_Demo_Final.ipynb

	Automatically generated by Colab.

	Original file is located at
	https://colab.research.google.com/drive/1239rYpEr1h2-cpIWEzcYdiaG6QSZCNzu
	"""

	import gradio as gr
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	import joblib
	from sklearn.metrics import (
	accuracy_score, precision_score, recall_score, f1_score,
	roc_curve, precision_recall_curve, auc,
	confusion_matrix, ConfusionMatrixDisplay
	)

	# Load models (adjust paths as needed)
	base_model = joblib.load("base_model.pkl")
	best_model = joblib.load("best_gb.pkl")

	def get_metrics(model, X, y):
	y_pred = model.predict(X)
	y_prob = model.predict_proba(X)[:, 1]

	acc = accuracy_score(y, y_pred)
	prec = precision_score(y, y_pred)
	rec = recall_score(y, y_pred)
	f1 = f1_score(y, y_pred)

	fpr, tpr, _ = roc_curve(y, y_prob)
	precision_vals, recall_vals, _ = precision_recall_curve(y, y_prob)

	return {
	'y_pred': y_pred,
	'y_prob': y_prob,
	'fpr': fpr,
	'tpr': tpr,
	'precision_vals': precision_vals,
	'recall_vals': recall_vals,
	'metrics': f"""
	📌 Accuracy: {acc:.4f}
	📌 Precision: {prec:.4f}
	📌 Recall: {rec:.4f}
	📌 F1 Score: {f1:.4f}
	📌 ROC AUC: {auc(fpr, tpr):.4f}
	📌 PR AUC: {auc(recall_vals, precision_vals):.4f}
	"""
	}

	def plot_confusion_matrices(model1, model2, X, y, labels=["Logistic Regression", "Gradient Boosting"], class_names=None, value_fontsize=28, value_color='red'):
	fig, axes = plt.subplots(1, 2, figsize=(16, 8))

	for i, model in enumerate([model1, model2]):
	y_pred = model.predict(X)
	cm = confusion_matrix(y, y_pred)

	# Plot with default text
	disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=class_names)
	disp.plot(ax=axes[i], cmap='Blues', colorbar=False)

	# Remove default text artists
	for artist in axes[i].texts:
	artist.set_visible(False)

	# Manually annotate with custom font and color
	for (j, k), val in np.ndenumerate(cm):
	axes[i].text(k, j, f"{val}", ha='center', va='center', fontsize=value_fontsize, color=value_color)

	axes[i].set_title(f"{labels[i]}", fontsize=30)
	axes[i].tick_params(axis='both', labelsize=24)
	axes[i].set_xlabel("Predicted Default", fontsize=24)
	axes[i].set_ylabel("Actual Default", fontsize=24)

	plt.tight_layout()
	return plt.gcf()

	def evaluate(file):
	df = pd.read_csv(file.name)
	if 'Status' not in df.columns:
	return "Error: 'Status' column missing."

	#df = df.tail(5)
	X = df.drop(columns='Status')
	y = df['Status']

	base = get_metrics(base_model, X, y)
	best = get_metrics(best_model, X, y)

	# Combined ROC Curve
	plt.figure()
	plt.plot(base['fpr'], base['tpr'], label=f"Logistic Regression (AUC={auc(base['fpr'], base['tpr']):.2f})", linestyle='--')
	plt.plot(best['fpr'], best['tpr'], label=f"Gradient Boosting (AUC={auc(best['fpr'], best['tpr']):.2f})", linestyle='-')
	plt.plot([0, 1], [0, 1], 'k--', alpha=0.5)
	plt.xlabel("False Positive Rate")
	plt.ylabel("True Positive Rate")
	plt.title("Combined ROC Curve")
	plt.legend()
	roc_fig = plt.gcf()

	# Combined PR Curve
	plt.figure()
	plt.plot(base['recall_vals'], base['precision_vals'], label=f"Logistic Regression (AUC={auc(base['recall_vals'], base['precision_vals']):.2f})", linestyle='--')
	plt.plot(best['recall_vals'], best['precision_vals'], label=f"Gradient Boosting (AUC={auc(best['recall_vals'], best['precision_vals']):.2f})", linestyle='-')
	plt.xlabel("Recall")
	plt.ylabel("Precision")
	plt.title("Combined Precision-Recall Curve")
	plt.legend()
	pr_fig = plt.gcf()

	# Confusion Matrices
	cm_fig = plot_confusion_matrices(base_model, best_model, X, y, class_names=["Negative", "Positive"], value_fontsize=32, value_color='red')

	combined_metrics = f"📊 Base Model:\n(Logistic Regression){base['metrics']}\n\n📊 Best Model:\n(Gradient Boosting){best['metrics']}"

	return combined_metrics, roc_fig, pr_fig, cm_fig

	demo = gr.Interface(
	fn=evaluate,
	inputs=gr.File(label="Upload CSV with 'Status' column"),
	outputs=[
	gr.Textbox(label="📊 Performance Comparison"),
	gr.Plot(label="Combined ROC Curve"),
	gr.Plot(label="Combined Precision-Recall Curve"),
	gr.Plot(label="Confusion Matrices")
	],
	title="🔍 Model Comparison Dashboard",
	description="Upload a CSV file to compare base and best model performance side by side."
	)

	demo.launch(debug=True)