Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| import os | |
| import joblib | |
| from sklearn.metrics import root_mean_squared_error | |
| from scripts.make_dataset import create_features | |
| from scripts.naive_approach import get_column_specs, prepare_data, zeroshot_eval, simple_diagonal_averaging | |
| from scripts.ml_approach import format_dataset | |
| CONTEXT_LENGTH = 52 | |
| PREDICTION_LENGTH = 6 | |
| # Custom theme settings | |
| st.set_page_config( | |
| page_title="Glucose Level Prediction App", | |
| page_icon="📊", | |
| layout="wide" | |
| ) | |
| # Apply custom styling with CSS | |
| st.markdown(""" | |
| <style> | |
| /* Primary accent color */ | |
| .stButton button, .stSelectbox, .stMultiselect, .stSlider, .stNumberInput { | |
| border-color: #58A618 !important; | |
| } | |
| .stProgress .st-bo { | |
| background-color: #58A618 !important; | |
| } | |
| .st-bq { | |
| color: #58A618 !important; | |
| } | |
| /* Header styling */ | |
| h1, h2, h3 { | |
| color: #58A618 !important; | |
| } | |
| .stTabs [data-baseweb="tab-list"] button [data-testid="stMarkdownContainer"] p { | |
| color: #58A618 !important; | |
| } | |
| /* Success messages */ | |
| .element-container .stAlert.st-ae.st-af { | |
| border-color: #58A618 !important; | |
| color: #58A618 !important; | |
| } | |
| /* Link color */ | |
| a { | |
| color: #58A618 !important; | |
| } | |
| /* Button color */ | |
| .stButton>button { | |
| background-color: #58A618 !important; | |
| color: white !important; | |
| } | |
| /* Make background white */ | |
| .stApp { | |
| background-color: white !important; | |
| } | |
| /* Tab styling */ | |
| .stTabs [data-baseweb="tab-list"] { | |
| gap: 10px; | |
| } | |
| .stTabs [data-baseweb="tab"] { | |
| background-color: #f0f0f0; | |
| border-radius: 4px 4px 0 0; | |
| padding: 10px 16px; | |
| border: 1px solid #ccc; | |
| } | |
| .stTabs [data-baseweb="tab"][aria-selected="true"] { | |
| background-color: white; | |
| border-bottom: 3px solid #58A618; | |
| } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # Initialize session state variables if they don't exist | |
| if 'combined_data' not in st.session_state: | |
| st.session_state.combined_data = None | |
| if 'files_uploaded' not in st.session_state: | |
| st.session_state.files_uploaded = False | |
| if 'data_processed' not in st.session_state: | |
| st.session_state.data_processed = False | |
| # Title and description | |
| st.title("Glucose Level Prediction App") | |
| st.markdown(""" | |
| This app allows you to upload glucose measurements, food logs, and accelerometer data | |
| to analyze patterns and predict glucose levels. | |
| """) | |
| # Choose data source | |
| st.subheader("Choose Data Source") | |
| data_option = st.selectbox( | |
| "Select how you'd like to provide input data:", | |
| ("Upload files", "Sample A", "Sample B") | |
| ) | |
| glucose_data = None | |
| food_data = None | |
| accel_data = None | |
| combined_data = None | |
| show_tabs = False | |
| if data_option == "Upload files": | |
| st.subheader("Upload Your Data Files") | |
| glucose_file = st.file_uploader("Upload Glucose Levels CSV", type=["csv"], key="glucose") | |
| food_file = st.file_uploader("Upload Food Logs CSV", type=["csv"], key="food") | |
| accel_file = st.file_uploader("Upload Accelerometer Data CSV", type=["csv"], key="accel") | |
| st.subheader("Patient Demographics") | |
| # Gender selection | |
| gender = st.selectbox("Select Patient Gender", options=["Female", "Male", "Other"], index=0) | |
| # HbA1c input | |
| hba1c = st.number_input("Enter HbA1c (%)", min_value=3.0, max_value=15.0, step=0.1) | |
| all_files_uploaded = (glucose_file is not None) and (food_file is not None) and (accel_file is not None) | |
| # Attempt to load files if they exist | |
| if glucose_file is not None: | |
| try: | |
| glucose_data = pd.read_csv(glucose_file) | |
| st.success("Glucose data loaded successfully!") | |
| except Exception as e: | |
| st.error(f"Error loading glucose data: {e}") | |
| glucose_data = None | |
| if food_file is not None: | |
| try: | |
| food_data = pd.read_csv(food_file) | |
| st.success("Food logs loaded successfully!") | |
| except Exception as e: | |
| st.error(f"Error loading food logs: {e}") | |
| food_data = None | |
| if accel_file is not None: | |
| try: | |
| accel_data = pd.read_csv(accel_file) | |
| st.success("Accelerometer data loaded successfully!") | |
| except Exception as e: | |
| st.error(f"Error loading accelerometer data: {e}") | |
| accel_data = None | |
| # Update the upload status in session state | |
| st.session_state.files_uploaded = all_files_uploaded | |
| # Show message if not all files are uploaded | |
| if not all_files_uploaded: | |
| st.warning("Please upload all three data files to enable data processing.") | |
| col1, col2, col3 = st.columns([1,1,1]) | |
| with col2: | |
| # Add a button to process the data - disabled until all files are uploaded | |
| if st.button('Process Data', key='process_data_button', disabled=not all_files_uploaded): | |
| if all_files_uploaded: | |
| try: | |
| # Call create_features with appropriate parameters | |
| combined_data = create_features( | |
| bg_df=glucose_data, | |
| food_df=food_data, | |
| acc_df=accel_data, | |
| gender=gender, | |
| hba1c=hba1c, | |
| add_patient_id=True | |
| ) | |
| st.session_state.combined_data = combined_data | |
| st.session_state.data_processed = True | |
| st.success("Data processed successfully!") | |
| show_tabs = True | |
| except Exception as e: | |
| st.error(f"Error processing data: {e}") | |
| st.session_state.data_processed = False | |
| show_tabs = False | |
| st.subheader("Expected File Formats:") | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| st.markdown(""" | |
| **Glucose Levels CSV:** | |
| - Timestamp column | |
| - Glucose measurement values | |
| """) | |
| with col2: | |
| st.markdown(""" | |
| **Food Logs CSV:** | |
| - Timestamp column | |
| - Carbohydrates | |
| - Sugar | |
| - Calories | |
| """) | |
| with col3: | |
| st.markdown(""" | |
| **Accelerometer Data CSV:** | |
| - Timestamp column | |
| - Activity measurements | |
| """) | |
| # Check if data was previously processed | |
| if st.session_state.data_processed and st.session_state.combined_data is not None: | |
| combined_data = st.session_state.combined_data | |
| show_tabs = True | |
| elif data_option == "Sample A": | |
| combined_data_path = 'data/processed/samples/sample_A.csv' | |
| combined_data = pd.read_csv(combined_data_path) | |
| st.session_state.combined_data = combined_data | |
| st.session_state.data_processed = True | |
| st.success("Sample A loaded successfully!") | |
| show_tabs = True | |
| elif data_option == "Sample B": | |
| combined_data_path = 'data/processed/samples/sample_B.csv' | |
| combined_data = pd.read_csv(combined_data_path) | |
| st.session_state.combined_data = combined_data | |
| st.session_state.data_processed = True | |
| st.success("Sample B loaded successfully!") | |
| show_tabs = True | |
| # Add some spacing | |
| st.write("") | |
| st.write("") | |
| # Only show tabs if sample data is loaded or user data has been successfully processed | |
| if show_tabs: | |
| # Create tabs for data exploration | |
| tab1, tab2, tab3 = st.tabs(["Naive Model", "Machine Learning Model", "Deep Learning Model"]) | |
| with tab1: | |
| st.subheader("Naive Model") | |
| if st.button('Make prediction', key='naive_button'): | |
| if combined_data is not None: | |
| # Add your naive model prediction code here | |
| try: | |
| # Call naive model prediction functions | |
| column_specs = get_column_specs() | |
| prepared_data = prepare_data(combined_data, column_specs["timestamp_column"]) | |
| train_file = 'data/processed/train_dataset.csv' | |
| train_data = pd.read_csv(train_file) | |
| train_data = prepare_data(train_data, column_specs["timestamp_column"]) | |
| predictions = zeroshot_eval( | |
| train_df=train_data, | |
| test_df=prepared_data, | |
| batch_size=8 | |
| ) | |
| # Get all step columns | |
| step_columns = [col for col in predictions["predictions_df"].columns if col.startswith("Glucose_step_")] | |
| # Apply simple diagonal averaging by patient | |
| final_results = simple_diagonal_averaging( | |
| predictions["predictions_df"], | |
| prepared_data, | |
| CONTEXT_LENGTH, | |
| step_columns | |
| ) | |
| # Visualize predictions vs actual values | |
| fig, ax = plt.subplots(figsize=(10, 6)) | |
| # Filter out zero predictions | |
| non_zero_mask = final_results['averaged_prediction'] != 0 | |
| filtered_results = final_results[non_zero_mask] | |
| # Plot predictions (only non-zero values) in green | |
| ax.plot(filtered_results['Timestamp'], filtered_results['averaged_prediction'], | |
| label='Predicted', alpha=0.7, color='#58A618') | |
| # Plot actual values (all data) in blue | |
| ax.plot(final_results['Timestamp'], final_results['Glucose'], | |
| label='Ground truth', alpha=0.7, color='#1f77b4') | |
| ax.set_title('Glucose Predictions vs Actual Values') | |
| ax.set_xlabel('Time') | |
| ax.set_ylabel('Glucose Level') | |
| ax.legend() | |
| st.pyplot(fig) | |
| y_true = final_results['Glucose'][CONTEXT_LENGTH:].reset_index(drop=True) | |
| y_pred = final_results['averaged_prediction'][CONTEXT_LENGTH:].reset_index(drop=True) | |
| # Filter out zero predictions | |
| non_zero_mask = y_pred != 0 | |
| y_true_filtered = y_true[non_zero_mask] | |
| y_pred_filtered = y_pred[non_zero_mask] | |
| if len(y_pred_filtered) > 0: | |
| rmse = np.sqrt(root_mean_squared_error(y_true_filtered, y_pred_filtered)) | |
| st.subheader("Performance Metrics") | |
| st.metric("AVERAGE RMSE", f"{rmse:.4f}") | |
| else: | |
| st.subheader("Performance Metrics") | |
| st.metric("AVERAGE RMSE", "N/A") | |
| except Exception as e: | |
| st.error(f"Error in naive model prediction: {e}") | |
| else: | |
| st.error("Data not available. Please try again.") | |
| with tab2: | |
| st.subheader("Machine Learning Model") | |
| if st.button('Make prediction', key='ml_button'): | |
| if combined_data is not None: | |
| X_test, y_test = format_dataset(combined_data, CONTEXT_LENGTH, PREDICTION_LENGTH) | |
| model_output_path = "models/xgb_model.pkl" | |
| xgb_model = joblib.load(model_output_path) | |
| y_test_pred = xgb_model.predict(X_test) | |
| final_results = simple_diagonal_averaging( | |
| pd.DataFrame(y_test_pred), | |
| combined_data, | |
| CONTEXT_LENGTH, | |
| pd.DataFrame(y_test_pred).columns | |
| ) | |
| # Visualize predictions vs actual values | |
| fig, ax = plt.subplots(figsize=(10, 6)) | |
| # Plot all actual values in blue | |
| ax.plot(final_results['Timestamp'], final_results['Glucose'], | |
| label='Ground truth', alpha=0.7, color='#1f77b4') | |
| # Replace zeros with NaN (which matplotlib will skip when plotting) | |
| plot_predictions = final_results['averaged_prediction'].copy() | |
| plot_predictions = plot_predictions.replace(0, float('nan')) | |
| # Plot predictions with NaN instead of zeros in green | |
| ax.plot(final_results['Timestamp'], plot_predictions, | |
| label='Predicted', alpha=0.7, color='#58A618') | |
| ax.set_title('Glucose Predictions vs Actual Values') | |
| ax.set_xlabel('Time') | |
| ax.set_ylabel('Glucose Level') | |
| ax.legend() | |
| st.pyplot(fig) | |
| # Calculate and display metrics for single patient | |
| y_true = final_results['Glucose'][CONTEXT_LENGTH:].reset_index(drop=True) | |
| y_pred = final_results['averaged_prediction'][CONTEXT_LENGTH:].reset_index(drop=True) | |
| # Filter out zero predictions | |
| non_zero_mask = y_pred != 0 | |
| y_true_filtered = y_true[non_zero_mask] | |
| y_pred_filtered = y_pred[non_zero_mask] | |
| if len(y_pred_filtered) > 0: | |
| rmse = np.sqrt(root_mean_squared_error(y_true_filtered, y_pred_filtered)) | |
| st.subheader("Performance Metrics") | |
| st.metric("AVERAGE RMSE", f"{rmse:.4f}") | |
| else: | |
| st.subheader("Performance Metrics") | |
| st.metric("AVERAGE RMSE", "N/A") | |
| else: | |
| st.error("Data not available. Please try again.") | |
| with tab3: | |
| st.subheader("Deep Learning Model") | |
| if st.button('Make prediction', key='dl_button'): | |
| if combined_data is not None: | |
| column_specs = get_column_specs() | |
| prepared_data = prepare_data(combined_data, column_specs["timestamp_column"]) | |
| train_file = 'data/processed/train_dataset.csv' | |
| train_data = pd.read_csv(train_file) | |
| train_data = prepare_data(train_data, column_specs["timestamp_column"]) | |
| predictions = zeroshot_eval( | |
| train_df=train_data, | |
| test_df=prepared_data, | |
| batch_size=8, | |
| model_path="iaravagni/ttm-finetuned-model" | |
| ) | |
| # Get all step columns | |
| step_columns = [col for col in predictions["predictions_df"].columns if col.startswith("Glucose_step_")] | |
| # Apply simple diagonal averaging by patient | |
| final_results = simple_diagonal_averaging( | |
| predictions["predictions_df"], | |
| prepared_data, | |
| CONTEXT_LENGTH, | |
| step_columns | |
| ) | |
| # Visualize predictions vs actual values | |
| fig, ax = plt.subplots(figsize=(10, 6)) | |
| # Filter out zero predictions | |
| non_zero_mask = final_results['averaged_prediction'] != 0 | |
| filtered_results = final_results[non_zero_mask] | |
| # Plot predictions (only non-zero values) in green | |
| ax.plot(filtered_results['Timestamp'], filtered_results['averaged_prediction'], | |
| label='Predicted', alpha=0.7, color='#58A618') | |
| # Plot actual values (all data) in blue | |
| ax.plot(final_results['Timestamp'], final_results['Glucose'], | |
| label='Ground truth', alpha=0.7, color='#1f77b4') | |
| ax.set_title('Glucose Predictions vs Actual Values') | |
| ax.set_xlabel('Time') | |
| ax.set_ylabel('Glucose Level') | |
| ax.legend() | |
| st.pyplot(fig) | |
| # Calculate and display metrics for single patient | |
| y_true = final_results['Glucose'][CONTEXT_LENGTH:].reset_index(drop=True) | |
| y_pred = final_results['averaged_prediction'][CONTEXT_LENGTH:].reset_index(drop=True) | |
| # Filter out zero predictions | |
| non_zero_mask = y_pred != 0 | |
| y_true_filtered = y_true[non_zero_mask] | |
| y_pred_filtered = y_pred[non_zero_mask] | |
| if len(y_pred_filtered) > 0: | |
| rmse = np.sqrt(root_mean_squared_error(y_true_filtered, y_pred_filtered)) | |
| st.subheader("Performance Metrics") | |
| st.metric("AVERAGE RMSE", f"{rmse:.4f}") | |
| else: | |
| st.subheader("Performance Metrics") | |
| st.metric("AVERAGE RMSE", "N/A") | |
| else: | |
| st.error("Data not available. Please try again.") | |
| else: | |
| st.info("Upload and process data or select a sample dataset to view prediction models.") | |
| # Add some spacing | |
| st.write("") | |
| st.write("") | |
| # App information and disclaimer | |
| st.markdown(""" | |
| --- | |
| ### About this App | |
| This application is designed to help analyze and predict glucose levels based on glucose measurements, | |
| food logs, and physical activity data. The app merges these datasets based on timestamps to identify | |
| patterns and make predictions. | |
| Please note that this is a demonstration tool and should not be used for medical decisions without | |
| consultation with healthcare professionals. | |
| """) | |
| # Add a footer with the custom color | |
| st.markdown(""" | |
| <style> | |
| .footer { | |
| position: fixed; | |
| left: 0; | |
| bottom: 0; | |
| width: 100%; | |
| background-color: white; | |
| color: #58A618; | |
| text-align: center; | |
| padding: 10px; | |
| border-top: 2px solid #58A618; | |
| } | |
| </style> | |
| <div class="footer"> | |
| <p>Glucose Prediction Application © 2025</p> | |
| </div> | |
| """, unsafe_allow_html=True) |