Spaces:

Synthefy
/

MUSEval

Running

File size: 18,326 Bytes

"""
Synthefy MUSEval Leaderboard - Main Gradio Application
Following GIFT-Eval import structure with custom layout
"""

import gradio as gr
import pandas as pd

# Optional imports for production features
try:
    from apscheduler.schedulers.background import BackgroundScheduler
    SCHEDULER_AVAILABLE = True
except ImportError:
    SCHEDULER_AVAILABLE = False
    print("Warning: apscheduler not available, scheduler features disabled")

try:
    from huggingface_hub import snapshot_download
    HUB_AVAILABLE = True
except ImportError:
    HUB_AVAILABLE = False
    print("Warning: huggingface_hub not available, hub features disabled")

from src.about import (
    CITATION_BUTTON_LABEL,
    CITATION_BUTTON_TEXT,
    EVALUATION_QUEUE_TEXT,
    INTRODUCTION_TEXT,
    BENCHMARKS_TEXT,
    TITLE,
)
from src.display.css_html_js import custom_css
from src.display.utils import (
    BENCHMARK_COLS,
    EVAL_COLS,
    EVAL_TYPES,
    ModelInfoColumn,
    ModelType,
    fields,
    WeightType,
    Precision
)
from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
from src.populate import get_evaluation_queue_df, get_leaderboard_df, get_model_info_df, get_merged_df
from src.utils import norm_sNavie, pivot_df, get_grouped_dfs, pivot_existed_df, rename_metrics, format_df
from src.load_results import (
    load_results_with_metadata,
    create_overall_table,
    create_html_table,
    create_html_table_from_df,
    get_filter_options,
    get_model_metadata,
    create_model_metadata_display,
    get_overall_summary,
    sort_table_by_column,
    get_available_models
)

def create_model_buttons():
    """Create buttons for each model that can trigger Model Inspector updates"""
    from src.load_results import get_available_models
    
    models = get_available_models()
    buttons = []
    
    for model in models:
        btn = gr.Button(
            value=model,
            variant="secondary",
            size="sm",
            scale=0.5
        )
        buttons.append(btn)
    
    return buttons

def restart_space():
    API.restart_space(repo_id=REPO_ID)

def create_leaderboard_interface():
    """Create the main leaderboard interface"""
    demo = gr.Blocks(css=custom_css)
    with demo:
        gr.HTML(TITLE)
        
        # Minimizable description section
        with gr.Accordion("📖 Description", open=False, elem_id="description-accordion"):
            gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text", elem_id="description-text")
        
        # Get filter options
        filter_options = get_filter_options()
        
        # Compact filters in a single horizontal scrollable row
        with gr.Row(elem_id="filter-row"):
            model_search = gr.Textbox(
                label="🔍 Filter by Model",
                placeholder="Search...",
                value="",
                elem_id="model-search",
                scale=0
            )
            category_dropdown = gr.Dropdown(
                choices=filter_options["categories"],
                value="all",
                label="📂 Filter By Category",
                allow_custom_value=False,
                elem_id="category-filter",
                scale=0
            )
            domain_dropdown = gr.Dropdown(
                choices=filter_options["domains"],
                value="all",
                label="🌐 Filter By Domain",
                allow_custom_value=False,
                elem_id="domain-filter",
                scale=0
            )
            dataset_dropdown = gr.Dropdown(
                choices=filter_options["datasets"],
                value="all",
                label="📊 Filter by Dataset",
                allow_custom_value=False,
                elem_id="dataset-filter",
                scale=0
            )
            sort_dropdown = gr.Dropdown(
                choices=[
                    "Rank",
                    "Model A-Z",
                    "Organization A-Z",
                    "Top-Performer ↓",
                    "Multi-MAPE ↓",
                    "Uni-MAPE ↓",
                    "Uni-Multi-MAPE ↑",
                    "NMAE ↓",
                    "Date ↑"
                ],
                value="Rank",
                label="🔄 Sort",
                allow_custom_value=False,
                elem_id="sort-filter",
                scale=0
            )
        
        # Full width table
        gr.Markdown("### Models ranked by the number of datasets where they achieve the lowest MAPE (Top-Performer). Click on the model cell to details.")
        
        # Hidden input to handle model selection from table
        hidden_model_input = gr.Textbox(visible=False)
        
        # Hidden component to trigger scrolling only for model clicks
        scroll_trigger = gr.HTML(visible=False, elem_id="scroll-trigger")
        
        # Main results table with clickable rows
        df = create_overall_table()
        
        # Convert DataFrame to list of lists for Gradio
        df_values = df.values.tolist()
        df_headers = df.columns.tolist()
        
        results_table = gr.Dataframe(
            value=df_values,
            headers=df_headers,
            label="",
            interactive=False,  # Disable editing but keep select events
            elem_id="results-table"
        )
        
        refresh_btn = gr.Button("🔄 Refresh Table", variant="primary")
        
        # Model metadata section at bottom
        model_inspector_accordion = gr.Accordion("🔍 Model Inspector", open=False, elem_id="model-inspector")
        with model_inspector_accordion:
            with gr.Row():
                with gr.Column(scale=1):
                    model_selector = gr.Dropdown(
                        choices=filter_options["models"],
                        value=None,
                        label="Select Model",
                        info="Choose a model to view its metadata",
                        allow_custom_value=False
                    )
                
                with gr.Column(scale=3):
                    metadata_display = gr.Markdown(
                        value="Select a model to view its metadata.",
                        label="Model Metadata"
                    )
        
        # About section
        with gr.Accordion("📖 About MUSEval Leaderboard", open=False, elem_id="about-accordion"):
            gr.Markdown(BENCHMARKS_TEXT, elem_classes="markdown-text", elem_id="about-text")
        
        # Citation section
        with gr.Row():
            with gr.Accordion("📙 Citation", open=False, elem_id="citation-accordion"):
                citation_button = gr.Textbox(
                    value=CITATION_BUTTON_TEXT,
                    label=CITATION_BUTTON_LABEL,
                    lines=20,
                    elem_id="citation-button",
                    show_copy_button=True,
                )
        
        # Submit section
        with gr.Row():
            with gr.Accordion("🚀 Submit Your Model", open=False, elem_id="submit-accordion"):
                gr.HTML("""
                <div style="text-align: center; padding: 20px;">
                    <h3>Submit by creating a pull request with your model's performance here:</h3>
                    <a href='https://github.com/Synthefy/MUSEval' 
                       target='_blank' 
                       style='display: inline-block; 
                              background-color: #FF6B6B; 
                              color: white; 
                              padding: 15px 30px; 
                              text-decoration: none; 
                              border-radius: 8px; 
                              font-weight: bold; 
                              font-size: 18px;
                              transition: background-color 0.3s ease;'
                       onmouseover='this.style.backgroundColor="#FF5252"'
                       onmouseout='this.style.backgroundColor="#FF6B6B"'>
                        🚀 Submit Here
                    </a>
                </div>
                """)
        
        # Event handlers
        def update_table(domain, category, dataset, model):
            return create_html_table(domain, category, dataset, model)
        
        def clear_filters():
            return "all", "all", "all", ""
        
        def reset_other_filters(selected_filter, filter_type):
            """Reset other filters when one is selected"""
            if filter_type == "category" and selected_filter != "all":
                return gr.update(value="all"), gr.update(value="all")  # Reset domain and dataset
            elif filter_type == "domain" and selected_filter != "all":
                return gr.update(value="all"), gr.update(value="all")  # Reset category and dataset
            elif filter_type == "dataset" and selected_filter != "all":
                return gr.update(value="all"), gr.update(value="all")  # Reset category and domain
            else:
                return gr.update(), gr.update()  # No change
        
        def sort_by_dropdown(sort_option, domain, category, dataset, model):
            """Sort table based on dropdown selection - apply filters first, then sort"""
            # Map dropdown options to column names
            sort_mapping = {
                "Rank": "Rank",
                "Model A-Z": "Model",
                "Organization A-Z": "Organization",
                "Top-Performer ↓": "Top-Performer",
                "Multi-MAPE ↓": "Multi-MAPE",
                "Uni-MAPE ↓": "Uni-MAPE",
                "Uni-Multi-MAPE ↑": "Uni-Multi-MAPE",
                "NMAE ↓": "NMAE",
                "Date ↑": "Submission Date"
            }
            
            column_name = sort_mapping.get(sort_option, "Rank")
            
            # First apply filters to get the filtered data
            df = create_overall_table(domain_filter=domain, category_filter=category, dataset_filter=dataset, model_filter=model)
            
            # Then sort the filtered data
            sorted_df = sort_table_by_column(df, column_name)
            
            # Convert sorted DataFrame back to list format for Gradio Dataframe
            return sorted_df.values.tolist()
        
        def update_table_with_sort(sort_option, domain, category, dataset, model):
            """Update table with current filters and sorting"""
            return sort_by_dropdown(sort_option, domain, category, dataset, model)
        
        def update_table_with_model_search(model, sort_option, domain, category, dataset):
            """Update table with model search - keep other filters unchanged"""
            # Update the table with current filter values (don't reset other filters)
            table_result = update_table_with_sort(sort_option, domain, category, dataset, model)
            
            # Return the table and no changes to other dropdowns
            return (table_result, gr.update(), gr.update(), gr.update())
        
        def update_table_with_reset(selected_filter, filter_type, sort_option, domain, category, dataset, model):
            """Update table with proper filter reset logic"""
            # First, determine what the reset values should be
            if filter_type == "category" and selected_filter != "all":
                domain = "all"
                dataset = "all"
            elif filter_type == "domain" and selected_filter != "all":
                category = "all"
                dataset = "all"
            elif filter_type == "dataset" and selected_filter != "all":
                category = "all"
                domain = "all"
            
            # Update the table with the corrected filter values
            table_result = update_table_with_sort(sort_option, domain, category, dataset, model)
            
            # Return the table and the reset updates
            reset_updates = reset_other_filters(selected_filter, filter_type)
            return (table_result, *reset_updates)
        
        
        # Connect filters to table updates with mutual exclusivity and sorting
        domain_dropdown.change(
            fn=lambda domain, category, dataset, model, sort_option: update_table_with_reset(domain, "domain", sort_option, domain, category, dataset, model),
            inputs=[domain_dropdown, category_dropdown, dataset_dropdown, model_search, sort_dropdown],
            outputs=[results_table, category_dropdown, dataset_dropdown]
        )
        
        category_dropdown.change(
            fn=lambda domain, category, dataset, model, sort_option: update_table_with_reset(category, "category", sort_option, domain, category, dataset, model),
            inputs=[domain_dropdown, category_dropdown, dataset_dropdown, model_search, sort_dropdown],
            outputs=[results_table, domain_dropdown, dataset_dropdown]
        )
        
        dataset_dropdown.change(
            fn=lambda domain, category, dataset, model, sort_option: update_table_with_reset(dataset, "dataset", sort_option, domain, category, dataset, model),
            inputs=[domain_dropdown, category_dropdown, dataset_dropdown, model_search, sort_dropdown],
            outputs=[results_table, category_dropdown, domain_dropdown]
        )
        
        model_search.change(
            fn=lambda model, sort_option, domain, category, dataset: update_table_with_model_search(model, sort_option, domain, category, dataset),
            inputs=[model_search, sort_dropdown, domain_dropdown, category_dropdown, dataset_dropdown],
            outputs=[results_table, domain_dropdown, category_dropdown, dataset_dropdown]
        )
        
        refresh_btn.click(
            fn=update_table_with_sort,
            inputs=[domain_dropdown, category_dropdown, dataset_dropdown, model_search, sort_dropdown],
            outputs=results_table
        )
        
        # Sort dropdown event handler - independent of filters
        sort_dropdown.change(
            fn=sort_by_dropdown,
            inputs=[sort_dropdown, domain_dropdown, category_dropdown, dataset_dropdown, model_search],
            outputs=results_table
        )
        
        # Model selector event handler
        model_selector.change(
            fn=create_model_metadata_display,
            inputs=[model_selector],
            outputs=[metadata_display]
        )
        
        # Model column cell selection handler (with controlled scrolling)
        def handle_model_column_clicks(evt: gr.SelectData):
            """Handle only model column cell clicks for model selection"""
            print(f"DEBUG: Click detected - Row: {evt.index[0]}, Column: {evt.index[1]}, Value: {evt.value}")
            
            row_idx = evt.index[0]
            col_idx = evt.index[1]
            
            # Only handle model column clicks (column 0)
            if col_idx == 0:  # Model column
                print("🎯 MODEL COLUMN CELL CLICK DETECTED!")
                if hasattr(evt, 'row_value') and evt.row_value is not None and len(evt.row_value) > 0:
                    model_name = evt.row_value[0]
                    print(f"🎯 Model selected: {model_name}")
                    # Return model selection, accordion expansion, and scroll trigger
                    return gr.update(value=model_name), gr.update(open=True), gr.update(value="scroll")
                elif evt.value is not None:
                    model_name = evt.value
                    print(f"🎯 Model selected: {model_name}")
                    # Return model selection, accordion expansion, and scroll trigger
                    return gr.update(value=model_name), gr.update(open=True), gr.update(value="scroll")
            else:
                print("🎯 OTHER COLUMN CELL CLICK - NO ACTION (NO SCROLLING)")
            
            # For non-model column clicks, return no changes (no scrolling)
            return gr.update(), gr.update(), gr.update()
        
        results_table.select(
            fn=handle_model_column_clicks,
            inputs=[],
            outputs=[model_selector, model_inspector_accordion, scroll_trigger]
        )
        
        # Handle scroll trigger - only scroll when model is selected
        def handle_scroll_trigger(trigger_value):
            """Handle scrolling only when model is selected"""
            if trigger_value == "scroll":
                print("🎯 SCROLL TRIGGER ACTIVATED!")
                # Trigger scrolling by updating the scroll trigger and ensuring accordion is open
                return gr.update(value="scrolled"), gr.update(open=True)
            return gr.update(), gr.update()
        
        scroll_trigger.change(
            fn=handle_scroll_trigger,
            inputs=[scroll_trigger],
            outputs=[scroll_trigger, model_inspector_accordion],
            scroll_to_output=True
        )
        
        # Handle change events from interactive table
        def handle_table_changes(new_value):
            """Handle changes from interactive table behavior"""
            print("=" * 50)
            print("DEBUG: Table Change Event Detected")
            print("=" * 50)
            print(f"New value: {new_value}")
            print(f"New value type: {type(new_value)}")
            print("=" * 50)
            return gr.update()
        
        results_table.change(
            fn=handle_table_changes,
            inputs=[results_table],
            outputs=[]
        )
        
        # Hidden input event handler - when model is selected from table
        def update_model_from_hidden(hidden_value):
            if hidden_value:
                return gr.update(value=hidden_value)
            return gr.update()
        
        hidden_model_input.change(
            fn=update_model_from_hidden,
            inputs=[hidden_model_input],
            outputs=[model_selector]
        )
        
    
    return demo

# Start scheduler if available
if SCHEDULER_AVAILABLE:
    scheduler = BackgroundScheduler()
    scheduler.start()
else:
    scheduler = None

# Launch the demo
if __name__ == "__main__":
    demo = create_leaderboard_interface()
    demo.queue(default_concurrency_limit=40).launch()