import gradio as gr import pandas as pd from dataset_previews import mmlupro_dataset_preview, format_preview_for_display def create_dataset_section(): """ Creates the dataset selection section (Section A) of the UI. Returns: dict: Dictionary containing UI components and containers. """ # Dataset Selection Section header = gr.Markdown("## (A) Select Dataset for Evaluation") with gr.Row(): dataset_dropdown = gr.Dropdown( choices=["(Select Dataset)", "MMLU-Pro"], value="(Select Dataset)", label="Dataset", info="Select a dataset to perform the Head-to-Head Evaluation on. Available Datasets: [MMLU-Pro](https://huggingface.co/datasets/TIGER-Lab/MMLU-Pro)" ) preview_toggle = gr.Button("Show Dataset Preview", interactive=False, variant="secondary") # Dataset Preview Container - Initially hidden with gr.Column(visible=False) as dataset_preview_container: gr.Markdown("## Dataset Preview", elem_id="preview_header") preview_output = gr.DataFrame( interactive=False, wrap=True, elem_id="preview_table" ) # Add a divider gr.Markdown("
") # Preview data state to store the loaded preview data preview_data_state = gr.State(None) # Return components dictionary return { 'header': header, 'dropdown': dataset_dropdown, 'preview_toggle': preview_toggle, 'preview_container': dataset_preview_container, 'preview_output': preview_output, 'preview_data_state': preview_data_state } def create_mmlu_config_section(): """ Creates the dataset configuration section (Section B) of the UI. Returns: dict: Dictionary containing UI components and containers. """ with gr.Column(visible=False) as mmlu_config_container: gr.Markdown("## (B) Select Dataset Configuration Options") with gr.Row(): # Left column for subject selection with gr.Column(scale=1): with gr.Group(elem_classes=["config-box"]): gr.Markdown("### Choose Subjects") subject_selection_mode = gr.Radio( choices=["Evaluate All Subjects", "Choose Number of Subjects", "Specify which Subjects to Evaluate"], value="Evaluate All Subjects", label="Subject Selection Mode" ) # Subject number slider - initially hidden with gr.Column(visible=False) as num_subjects_container: num_subjects_slider = gr.Slider( minimum=1, maximum=14, # Will be updated dynamically based on preview data value=1, step=1, label="Number of Subjects", info="Number of subjects to evaluate. They will be loaded in alphabetical order." ) # Subject checkboxes - initially hidden with gr.Column(visible=False) as specific_subjects_container: # Will be populated dynamically from the preview data specific_subjects = gr.CheckboxGroup( choices=[], # Will be populated from preview data label="Select Specific Subjects", info="Select which specific subjects to evaluate" ) # Right column for sample configuration with gr.Column(scale=1): with gr.Group(elem_classes=["config-box"]): gr.Markdown("### Sample Configuration") all_questions_checkbox = gr.Checkbox( label="Evaluate All Questions", value=False, info="When checked, evaluates all available questions for each subject" ) questions_info_text = gr.Markdown(visible=False, value="**All questions across the selected subjects will be evaluated**") # Questions per subject slider num_questions_slider = gr.Slider( minimum=1, maximum=100, value=20, step=1, label="Questions per Subject", info="Choose a subset of questions (1-100) per subject. They will be loaded in order of question_id.", interactive=True ) # Return components dictionary return { 'container': mmlu_config_container, 'subject_selection_mode': subject_selection_mode, 'num_subjects_container': num_subjects_container, 'num_subjects_slider': num_subjects_slider, 'specific_subjects_container': specific_subjects_container, 'specific_subjects': specific_subjects, 'all_questions_checkbox': all_questions_checkbox, 'questions_info_text': questions_info_text, 'num_questions_slider': num_questions_slider } # Utility functions for dataset section def get_subject_choices_from_preview(preview_data): """ Extracts subject choices from preview data. Args: preview_data (dict): Preview data containing subject counts. Returns: tuple: (formatted_subjects, subject_count) """ if not preview_data or 'subject_counts' not in preview_data: return [], 0 # Get subject counts from preview data subject_counts = preview_data['subject_counts'] # Sort subjects alphabetically subjects = sorted(subject_counts.keys()) # Format as "Subject (n=count)" formatted_subjects = [f"{subject} (n={subject_counts[subject]})" for subject in subjects] return formatted_subjects, len(subjects) def load_dataset_preview(dataset): """ Loads preview data for the selected dataset. Args: dataset (str): Selected dataset name. Returns: tuple: (preview_data, specific_subjects_update, num_subjects_slider_update) """ if dataset == "MMLU-Pro": # Load the preview data preview_data = mmlupro_dataset_preview(regenerate_preview=True) # Extract subject choices and count subject_choices, subject_count = get_subject_choices_from_preview(preview_data) # Update the UI components return ( preview_data, # Store the preview data gr.update(choices=subject_choices), # Update checkbox choices gr.update(maximum=subject_count, value=1) # Update slider max ) return None, gr.update(), gr.update() def update_interface_based_on_dataset(dataset, current_visibility, mmlu_config_container, model_config_container, results_container, preview_toggle, dataset_preview_container): """ Updates the interface based on dataset selection. Args: dataset (str): Selected dataset name. current_visibility (bool): Current preview visibility state. mmlu_config_container: MMLU config container component. model_config_container: Model config container component. results_container: Results container component. preview_toggle: Preview toggle button. dataset_preview_container: Dataset preview container. Returns: tuple: Updates for UI components. """ if dataset == "MMLU-Pro": return ( gr.update(visible=True), # mmlu_config_container gr.update(visible=True), # model_config_container gr.update(visible=True), # results_container gr.update(interactive=True), # preview_toggle gr.update(visible=False), # dataset_preview_container - hide it initially False, # Reset preview_visibility to False gr.update(value="Show Dataset Preview") # Reset button text ) else: return ( gr.update(visible=False), # mmlu_config_container gr.update(visible=False), # model_config_container gr.update(visible=False), # results_container gr.update(interactive=False), # preview_toggle gr.update(visible=False), # dataset_preview_container - hide when no dataset False, # Reset preview_visibility to False gr.update(value="Show Dataset Preview") # Reset button text ) def toggle_preview(dataset, preview_visibility, preview_data): """ Toggles the dataset preview visibility. Args: dataset (str): Selected dataset name. preview_visibility (bool): Current preview visibility state. preview_data (dict): Preview data. Returns: tuple: (new_visibility, preview_container_update, preview_output_update, button_text_update) """ # Toggle the visibility state is_visible = not preview_visibility # Update button text based on new state button_text = "Hide Dataset Preview" if is_visible else "Show Dataset Preview" # Format and show preview if becoming visible if is_visible and dataset == "MMLU-Pro": formatted_preview = format_preview_for_display(preview_data) return is_visible, gr.update(visible=True), formatted_preview, gr.update(value=button_text) elif is_visible: # For other datasets (not implemented yet) return is_visible, gr.update(visible=True), None, gr.update(value=button_text) else: # Hiding the preview return is_visible, gr.update(visible=False), None, gr.update(value=button_text) def update_subject_selection_ui(mode, num_subjects_container, specific_subjects_container): """ Updates UI based on subject selection mode. Args: mode (str): Selected subject selection mode. num_subjects_container: Container for number of subjects slider. specific_subjects_container: Container for specific subjects checkboxes. Returns: tuple: (num_subjects_container_update, specific_subjects_container_update) """ if mode == "Evaluate All Subjects": return gr.update(visible=False), gr.update(visible=False) elif mode == "Choose Number of Subjects": return gr.update(visible=True), gr.update(visible=False) else: # "Specify which Subjects to Evaluate" return gr.update(visible=False), gr.update(visible=True) def update_questions_interface(checked, num_questions_slider, questions_info_text): """ Updates questions interface based on "Evaluate All Questions" checkbox. Args: checked (bool): Whether "Evaluate All Questions" is checked. num_questions_slider: Questions per subject slider component. questions_info_text: Questions info text component. Returns: tuple: (num_questions_slider_update, questions_info_text_update) """ if checked: return gr.update(visible=False), gr.update(visible=True) else: return gr.update(visible=True), gr.update(visible=False) def get_subject_mode_param(mode): """ Converts subject selection mode to parameter string. Args: mode (str): Subject selection mode. Returns: str: Parameter value for evaluation function. """ if mode == "Evaluate All Subjects": return "all" elif mode == "Choose Number of Subjects": return "number" else: # "Specify which Subjects to Evaluate" return "specific" def get_subject_names(selected_subjects): """ Extracts subject names from checkbox values. Args: selected_subjects (list): Selected subjects with counts. Returns: list: Clean subject names without count information. """ # Extract just the subject name without the count return [subject.split(" (")[0] for subject in selected_subjects]