Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| from dataset_previews import mmlupro_dataset_preview, format_preview_for_display | |
| def create_dataset_section(): | |
| """ | |
| Creates the dataset selection section (Section A) of the UI. | |
| Returns: | |
| dict: Dictionary containing UI components and containers. | |
| """ | |
| # Dataset Selection Section | |
| header = gr.Markdown("## (A) Select Dataset for Evaluation") | |
| with gr.Row(): | |
| dataset_dropdown = gr.Dropdown( | |
| choices=["(Select Dataset)", "MMLU-Pro"], | |
| value="(Select Dataset)", | |
| label="Dataset", | |
| info="Select a dataset to perform the Head-to-Head Evaluation on. Available Datasets: [MMLU-Pro](https://huggingface.co/datasets/TIGER-Lab/MMLU-Pro)" | |
| ) | |
| preview_toggle = gr.Button("Show Dataset Preview", interactive=False, variant="secondary") | |
| # Dataset Preview Container - Initially hidden | |
| with gr.Column(visible=False) as dataset_preview_container: | |
| gr.Markdown("## Dataset Preview", elem_id="preview_header") | |
| preview_output = gr.DataFrame( | |
| interactive=False, | |
| wrap=True, | |
| elem_id="preview_table" | |
| ) | |
| # Add a divider | |
| gr.Markdown("<div class='section-divider'></div>") | |
| # Preview data state to store the loaded preview data | |
| preview_data_state = gr.State(None) | |
| # Return components dictionary | |
| return { | |
| 'header': header, | |
| 'dropdown': dataset_dropdown, | |
| 'preview_toggle': preview_toggle, | |
| 'preview_container': dataset_preview_container, | |
| 'preview_output': preview_output, | |
| 'preview_data_state': preview_data_state | |
| } | |
| def create_mmlu_config_section(): | |
| """ | |
| Creates the dataset configuration section (Section B) of the UI. | |
| Returns: | |
| dict: Dictionary containing UI components and containers. | |
| """ | |
| with gr.Column(visible=False) as mmlu_config_container: | |
| gr.Markdown("## (B) Select Dataset Configuration Options") | |
| with gr.Row(): | |
| # Left column for subject selection | |
| with gr.Column(scale=1): | |
| with gr.Group(elem_classes=["config-box"]): | |
| gr.Markdown("### Choose Subjects") | |
| subject_selection_mode = gr.Radio( | |
| choices=["Evaluate All Subjects", "Choose Number of Subjects", "Specify which Subjects to Evaluate"], | |
| value="Evaluate All Subjects", | |
| label="Subject Selection Mode" | |
| ) | |
| # Subject number slider - initially hidden | |
| with gr.Column(visible=False) as num_subjects_container: | |
| num_subjects_slider = gr.Slider( | |
| minimum=1, | |
| maximum=14, # Will be updated dynamically based on preview data | |
| value=1, | |
| step=1, | |
| label="Number of Subjects", | |
| info="Number of subjects to evaluate. They will be loaded in alphabetical order." | |
| ) | |
| # Subject checkboxes - initially hidden | |
| with gr.Column(visible=False) as specific_subjects_container: | |
| # Will be populated dynamically from the preview data | |
| specific_subjects = gr.CheckboxGroup( | |
| choices=[], # Will be populated from preview data | |
| label="Select Specific Subjects", | |
| info="Select which specific subjects to evaluate" | |
| ) | |
| # Right column for sample configuration | |
| with gr.Column(scale=1): | |
| with gr.Group(elem_classes=["config-box"]): | |
| gr.Markdown("### Sample Configuration") | |
| all_questions_checkbox = gr.Checkbox( | |
| label="Evaluate All Questions", | |
| value=False, | |
| info="When checked, evaluates all available questions for each subject" | |
| ) | |
| questions_info_text = gr.Markdown(visible=False, value="**All questions across the selected subjects will be evaluated**") | |
| # Questions per subject slider | |
| num_questions_slider = gr.Slider( | |
| minimum=1, | |
| maximum=100, | |
| value=20, | |
| step=1, | |
| label="Questions per Subject", | |
| info="Choose a subset of questions (1-100) per subject. They will be loaded in order of question_id.", | |
| interactive=True | |
| ) | |
| # Return components dictionary | |
| return { | |
| 'container': mmlu_config_container, | |
| 'subject_selection_mode': subject_selection_mode, | |
| 'num_subjects_container': num_subjects_container, | |
| 'num_subjects_slider': num_subjects_slider, | |
| 'specific_subjects_container': specific_subjects_container, | |
| 'specific_subjects': specific_subjects, | |
| 'all_questions_checkbox': all_questions_checkbox, | |
| 'questions_info_text': questions_info_text, | |
| 'num_questions_slider': num_questions_slider | |
| } | |
| # Utility functions for dataset section | |
| def get_subject_choices_from_preview(preview_data): | |
| """ | |
| Extracts subject choices from preview data. | |
| Args: | |
| preview_data (dict): Preview data containing subject counts. | |
| Returns: | |
| tuple: (formatted_subjects, subject_count) | |
| """ | |
| if not preview_data or 'subject_counts' not in preview_data: | |
| return [], 0 | |
| # Get subject counts from preview data | |
| subject_counts = preview_data['subject_counts'] | |
| # Sort subjects alphabetically | |
| subjects = sorted(subject_counts.keys()) | |
| # Format as "Subject (n=count)" | |
| formatted_subjects = [f"{subject} (n={subject_counts[subject]})" for subject in subjects] | |
| return formatted_subjects, len(subjects) | |
| def load_dataset_preview(dataset): | |
| """ | |
| Loads preview data for the selected dataset. | |
| Args: | |
| dataset (str): Selected dataset name. | |
| Returns: | |
| tuple: (preview_data, specific_subjects_update, num_subjects_slider_update) | |
| """ | |
| if dataset == "MMLU-Pro": | |
| # Load the preview data | |
| preview_data = mmlupro_dataset_preview(regenerate_preview=True) | |
| # Extract subject choices and count | |
| subject_choices, subject_count = get_subject_choices_from_preview(preview_data) | |
| # Update the UI components | |
| return ( | |
| preview_data, # Store the preview data | |
| gr.update(choices=subject_choices), # Update checkbox choices | |
| gr.update(maximum=subject_count, value=1) # Update slider max | |
| ) | |
| return None, gr.update(), gr.update() | |
| def update_interface_based_on_dataset(dataset, current_visibility, | |
| mmlu_config_container, model_config_container, | |
| results_container, preview_toggle, | |
| dataset_preview_container): | |
| """ | |
| Updates the interface based on dataset selection. | |
| Args: | |
| dataset (str): Selected dataset name. | |
| current_visibility (bool): Current preview visibility state. | |
| mmlu_config_container: MMLU config container component. | |
| model_config_container: Model config container component. | |
| results_container: Results container component. | |
| preview_toggle: Preview toggle button. | |
| dataset_preview_container: Dataset preview container. | |
| Returns: | |
| tuple: Updates for UI components. | |
| """ | |
| if dataset == "MMLU-Pro": | |
| return ( | |
| gr.update(visible=True), # mmlu_config_container | |
| gr.update(visible=True), # model_config_container | |
| gr.update(visible=True), # results_container | |
| gr.update(interactive=True), # preview_toggle | |
| gr.update(visible=False), # dataset_preview_container - hide it initially | |
| False, # Reset preview_visibility to False | |
| gr.update(value="Show Dataset Preview") # Reset button text | |
| ) | |
| else: | |
| return ( | |
| gr.update(visible=False), # mmlu_config_container | |
| gr.update(visible=False), # model_config_container | |
| gr.update(visible=False), # results_container | |
| gr.update(interactive=False), # preview_toggle | |
| gr.update(visible=False), # dataset_preview_container - hide when no dataset | |
| False, # Reset preview_visibility to False | |
| gr.update(value="Show Dataset Preview") # Reset button text | |
| ) | |
| def toggle_preview(dataset, preview_visibility, preview_data): | |
| """ | |
| Toggles the dataset preview visibility. | |
| Args: | |
| dataset (str): Selected dataset name. | |
| preview_visibility (bool): Current preview visibility state. | |
| preview_data (dict): Preview data. | |
| Returns: | |
| tuple: (new_visibility, preview_container_update, preview_output_update, button_text_update) | |
| """ | |
| # Toggle the visibility state | |
| is_visible = not preview_visibility | |
| # Update button text based on new state | |
| button_text = "Hide Dataset Preview" if is_visible else "Show Dataset Preview" | |
| # Format and show preview if becoming visible | |
| if is_visible and dataset == "MMLU-Pro": | |
| formatted_preview = format_preview_for_display(preview_data) | |
| return is_visible, gr.update(visible=True), formatted_preview, gr.update(value=button_text) | |
| elif is_visible: | |
| # For other datasets (not implemented yet) | |
| return is_visible, gr.update(visible=True), None, gr.update(value=button_text) | |
| else: | |
| # Hiding the preview | |
| return is_visible, gr.update(visible=False), None, gr.update(value=button_text) | |
| def update_subject_selection_ui(mode, num_subjects_container, specific_subjects_container): | |
| """ | |
| Updates UI based on subject selection mode. | |
| Args: | |
| mode (str): Selected subject selection mode. | |
| num_subjects_container: Container for number of subjects slider. | |
| specific_subjects_container: Container for specific subjects checkboxes. | |
| Returns: | |
| tuple: (num_subjects_container_update, specific_subjects_container_update) | |
| """ | |
| if mode == "Evaluate All Subjects": | |
| return gr.update(visible=False), gr.update(visible=False) | |
| elif mode == "Choose Number of Subjects": | |
| return gr.update(visible=True), gr.update(visible=False) | |
| else: # "Specify which Subjects to Evaluate" | |
| return gr.update(visible=False), gr.update(visible=True) | |
| def update_questions_interface(checked, num_questions_slider, questions_info_text): | |
| """ | |
| Updates questions interface based on "Evaluate All Questions" checkbox. | |
| Args: | |
| checked (bool): Whether "Evaluate All Questions" is checked. | |
| num_questions_slider: Questions per subject slider component. | |
| questions_info_text: Questions info text component. | |
| Returns: | |
| tuple: (num_questions_slider_update, questions_info_text_update) | |
| """ | |
| if checked: | |
| return gr.update(visible=False), gr.update(visible=True) | |
| else: | |
| return gr.update(visible=True), gr.update(visible=False) | |
| def get_subject_mode_param(mode): | |
| """ | |
| Converts subject selection mode to parameter string. | |
| Args: | |
| mode (str): Subject selection mode. | |
| Returns: | |
| str: Parameter value for evaluation function. | |
| """ | |
| if mode == "Evaluate All Subjects": | |
| return "all" | |
| elif mode == "Choose Number of Subjects": | |
| return "number" | |
| else: # "Specify which Subjects to Evaluate" | |
| return "specific" | |
| def get_subject_names(selected_subjects): | |
| """ | |
| Extracts subject names from checkbox values. | |
| Args: | |
| selected_subjects (list): Selected subjects with counts. | |
| Returns: | |
| list: Clean subject names without count information. | |
| """ | |
| # Extract just the subject name without the count | |
| return [subject.split(" (")[0] for subject in selected_subjects] |