File size: 18,326 Bytes
22df562
7939a4f
22df562
 
 
79986c4
 
 
22df562
 
 
 
 
 
 
79986c4
22df562
 
 
 
 
 
79986c4
22df562
 
 
 
 
4cebdf8
22df562
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37a713b
 
22df562
 
 
a5be063
37a713b
 
22df562
79986c4
37a713b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22df562
 
79986c4
22df562
 
 
 
 
79986c4
22df562
4c58c33
37a713b
79986c4
22df562
 
79986c4
75d6072
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c710d0b
 
dcd853c
37a713b
 
 
 
 
 
 
 
 
 
 
 
 
c710d0b
 
37a713b
 
4c58c33
37a713b
 
c710d0b
 
 
22df562
 
37a713b
 
22df562
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79986c4
22df562
7939a4f
37a713b
79986c4
22df562
 
4c58c33
22df562
 
 
 
 
 
 
79986c4
96220c9
 
 
 
 
56da6c7
7939a4f
96220c9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22df562
 
37a713b
79986c4
22df562
 
79986c4
c710d0b
 
 
73c450a
c710d0b
73c450a
c710d0b
73c450a
c710d0b
 
 
a5be063
0d9efd3
a5be063
 
 
 
 
0d9efd3
4cebdf8
a5be063
 
 
 
 
 
 
0d9efd3
 
73c450a
0d9efd3
 
37a713b
 
 
 
a5be063
 
 
 
 
73c450a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a5be063
 
c710d0b
73c450a
a5be063
c710d0b
79986c4
 
c710d0b
73c450a
a5be063
c710d0b
79986c4
 
c710d0b
73c450a
a5be063
c710d0b
79986c4
 
22df562
73c450a
 
 
79986c4
 
22df562
a5be063
 
22df562
79986c4
 
a5be063
 
 
 
 
 
 
22df562
 
 
 
 
 
37a713b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79986c4
22df562
 
 
 
 
 
 
 
79986c4
22df562
79986c4
22df562
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
"""
Synthefy MUSEval Leaderboard - Main Gradio Application
Following GIFT-Eval import structure with custom layout
"""

import gradio as gr
import pandas as pd

# Optional imports for production features
try:
    from apscheduler.schedulers.background import BackgroundScheduler
    SCHEDULER_AVAILABLE = True
except ImportError:
    SCHEDULER_AVAILABLE = False
    print("Warning: apscheduler not available, scheduler features disabled")

try:
    from huggingface_hub import snapshot_download
    HUB_AVAILABLE = True
except ImportError:
    HUB_AVAILABLE = False
    print("Warning: huggingface_hub not available, hub features disabled")

from src.about import (
    CITATION_BUTTON_LABEL,
    CITATION_BUTTON_TEXT,
    EVALUATION_QUEUE_TEXT,
    INTRODUCTION_TEXT,
    BENCHMARKS_TEXT,
    TITLE,
)
from src.display.css_html_js import custom_css
from src.display.utils import (
    BENCHMARK_COLS,
    EVAL_COLS,
    EVAL_TYPES,
    ModelInfoColumn,
    ModelType,
    fields,
    WeightType,
    Precision
)
from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
from src.populate import get_evaluation_queue_df, get_leaderboard_df, get_model_info_df, get_merged_df
from src.utils import norm_sNavie, pivot_df, get_grouped_dfs, pivot_existed_df, rename_metrics, format_df
from src.load_results import (
    load_results_with_metadata,
    create_overall_table,
    create_html_table,
    create_html_table_from_df,
    get_filter_options,
    get_model_metadata,
    create_model_metadata_display,
    get_overall_summary,
    sort_table_by_column,
    get_available_models
)

def create_model_buttons():
    """Create buttons for each model that can trigger Model Inspector updates"""
    from src.load_results import get_available_models
    
    models = get_available_models()
    buttons = []
    
    for model in models:
        btn = gr.Button(
            value=model,
            variant="secondary",
            size="sm",
            scale=0.5
        )
        buttons.append(btn)
    
    return buttons

def restart_space():
    API.restart_space(repo_id=REPO_ID)

def create_leaderboard_interface():
    """Create the main leaderboard interface"""
    demo = gr.Blocks(css=custom_css)
    with demo:
        gr.HTML(TITLE)
        
        # Minimizable description section
        with gr.Accordion("πŸ“– Description", open=False, elem_id="description-accordion"):
            gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text", elem_id="description-text")
        
        # Get filter options
        filter_options = get_filter_options()
        
        # Compact filters in a single horizontal scrollable row
        with gr.Row(elem_id="filter-row"):
            model_search = gr.Textbox(
                label="πŸ” Filter by Model",
                placeholder="Search...",
                value="",
                elem_id="model-search",
                scale=0
            )
            category_dropdown = gr.Dropdown(
                choices=filter_options["categories"],
                value="all",
                label="πŸ“‚ Filter By Category",
                allow_custom_value=False,
                elem_id="category-filter",
                scale=0
            )
            domain_dropdown = gr.Dropdown(
                choices=filter_options["domains"],
                value="all",
                label="🌐 Filter By Domain",
                allow_custom_value=False,
                elem_id="domain-filter",
                scale=0
            )
            dataset_dropdown = gr.Dropdown(
                choices=filter_options["datasets"],
                value="all",
                label="πŸ“Š Filter by Dataset",
                allow_custom_value=False,
                elem_id="dataset-filter",
                scale=0
            )
            sort_dropdown = gr.Dropdown(
                choices=[
                    "Rank",
                    "Model A-Z",
                    "Organization A-Z",
                    "Top-Performer ↓",
                    "Multi-MAPE ↓",
                    "Uni-MAPE ↓",
                    "Uni-Multi-MAPE ↑",
                    "NMAE ↓",
                    "Date ↑"
                ],
                value="Rank",
                label="πŸ”„ Sort",
                allow_custom_value=False,
                elem_id="sort-filter",
                scale=0
            )
        
        # Full width table
        gr.Markdown("### Models ranked by the number of datasets where they achieve the lowest MAPE (Top-Performer). Click on the model cell to details.")
        
        # Hidden input to handle model selection from table
        hidden_model_input = gr.Textbox(visible=False)
        
        # Hidden component to trigger scrolling only for model clicks
        scroll_trigger = gr.HTML(visible=False, elem_id="scroll-trigger")
        
        # Main results table with clickable rows
        df = create_overall_table()
        
        # Convert DataFrame to list of lists for Gradio
        df_values = df.values.tolist()
        df_headers = df.columns.tolist()
        
        results_table = gr.Dataframe(
            value=df_values,
            headers=df_headers,
            label="",
            interactive=False,  # Disable editing but keep select events
            elem_id="results-table"
        )
        
        refresh_btn = gr.Button("πŸ”„ Refresh Table", variant="primary")
        
        # Model metadata section at bottom
        model_inspector_accordion = gr.Accordion("πŸ” Model Inspector", open=False, elem_id="model-inspector")
        with model_inspector_accordion:
            with gr.Row():
                with gr.Column(scale=1):
                    model_selector = gr.Dropdown(
                        choices=filter_options["models"],
                        value=None,
                        label="Select Model",
                        info="Choose a model to view its metadata",
                        allow_custom_value=False
                    )
                
                with gr.Column(scale=3):
                    metadata_display = gr.Markdown(
                        value="Select a model to view its metadata.",
                        label="Model Metadata"
                    )
        
        # About section
        with gr.Accordion("πŸ“– About MUSEval Leaderboard", open=False, elem_id="about-accordion"):
            gr.Markdown(BENCHMARKS_TEXT, elem_classes="markdown-text", elem_id="about-text")
        
        # Citation section
        with gr.Row():
            with gr.Accordion("πŸ“™ Citation", open=False, elem_id="citation-accordion"):
                citation_button = gr.Textbox(
                    value=CITATION_BUTTON_TEXT,
                    label=CITATION_BUTTON_LABEL,
                    lines=20,
                    elem_id="citation-button",
                    show_copy_button=True,
                )
        
        # Submit section
        with gr.Row():
            with gr.Accordion("πŸš€ Submit Your Model", open=False, elem_id="submit-accordion"):
                gr.HTML("""
                <div style="text-align: center; padding: 20px;">
                    <h3>Submit by creating a pull request with your model's performance here:</h3>
                    <a href='https://github.com/Synthefy/MUSEval' 
                       target='_blank' 
                       style='display: inline-block; 
                              background-color: #FF6B6B; 
                              color: white; 
                              padding: 15px 30px; 
                              text-decoration: none; 
                              border-radius: 8px; 
                              font-weight: bold; 
                              font-size: 18px;
                              transition: background-color 0.3s ease;'
                       onmouseover='this.style.backgroundColor="#FF5252"'
                       onmouseout='this.style.backgroundColor="#FF6B6B"'>
                        πŸš€ Submit Here
                    </a>
                </div>
                """)
        
        # Event handlers
        def update_table(domain, category, dataset, model):
            return create_html_table(domain, category, dataset, model)
        
        def clear_filters():
            return "all", "all", "all", ""
        
        def reset_other_filters(selected_filter, filter_type):
            """Reset other filters when one is selected"""
            if filter_type == "category" and selected_filter != "all":
                return gr.update(value="all"), gr.update(value="all")  # Reset domain and dataset
            elif filter_type == "domain" and selected_filter != "all":
                return gr.update(value="all"), gr.update(value="all")  # Reset category and dataset
            elif filter_type == "dataset" and selected_filter != "all":
                return gr.update(value="all"), gr.update(value="all")  # Reset category and domain
            else:
                return gr.update(), gr.update()  # No change
        
        def sort_by_dropdown(sort_option, domain, category, dataset, model):
            """Sort table based on dropdown selection - apply filters first, then sort"""
            # Map dropdown options to column names
            sort_mapping = {
                "Rank": "Rank",
                "Model A-Z": "Model",
                "Organization A-Z": "Organization",
                "Top-Performer ↓": "Top-Performer",
                "Multi-MAPE ↓": "Multi-MAPE",
                "Uni-MAPE ↓": "Uni-MAPE",
                "Uni-Multi-MAPE ↑": "Uni-Multi-MAPE",
                "NMAE ↓": "NMAE",
                "Date ↑": "Submission Date"
            }
            
            column_name = sort_mapping.get(sort_option, "Rank")
            
            # First apply filters to get the filtered data
            df = create_overall_table(domain_filter=domain, category_filter=category, dataset_filter=dataset, model_filter=model)
            
            # Then sort the filtered data
            sorted_df = sort_table_by_column(df, column_name)
            
            # Convert sorted DataFrame back to list format for Gradio Dataframe
            return sorted_df.values.tolist()
        
        def update_table_with_sort(sort_option, domain, category, dataset, model):
            """Update table with current filters and sorting"""
            return sort_by_dropdown(sort_option, domain, category, dataset, model)
        
        def update_table_with_model_search(model, sort_option, domain, category, dataset):
            """Update table with model search - keep other filters unchanged"""
            # Update the table with current filter values (don't reset other filters)
            table_result = update_table_with_sort(sort_option, domain, category, dataset, model)
            
            # Return the table and no changes to other dropdowns
            return (table_result, gr.update(), gr.update(), gr.update())
        
        def update_table_with_reset(selected_filter, filter_type, sort_option, domain, category, dataset, model):
            """Update table with proper filter reset logic"""
            # First, determine what the reset values should be
            if filter_type == "category" and selected_filter != "all":
                domain = "all"
                dataset = "all"
            elif filter_type == "domain" and selected_filter != "all":
                category = "all"
                dataset = "all"
            elif filter_type == "dataset" and selected_filter != "all":
                category = "all"
                domain = "all"
            
            # Update the table with the corrected filter values
            table_result = update_table_with_sort(sort_option, domain, category, dataset, model)
            
            # Return the table and the reset updates
            reset_updates = reset_other_filters(selected_filter, filter_type)
            return (table_result, *reset_updates)
        
        
        # Connect filters to table updates with mutual exclusivity and sorting
        domain_dropdown.change(
            fn=lambda domain, category, dataset, model, sort_option: update_table_with_reset(domain, "domain", sort_option, domain, category, dataset, model),
            inputs=[domain_dropdown, category_dropdown, dataset_dropdown, model_search, sort_dropdown],
            outputs=[results_table, category_dropdown, dataset_dropdown]
        )
        
        category_dropdown.change(
            fn=lambda domain, category, dataset, model, sort_option: update_table_with_reset(category, "category", sort_option, domain, category, dataset, model),
            inputs=[domain_dropdown, category_dropdown, dataset_dropdown, model_search, sort_dropdown],
            outputs=[results_table, domain_dropdown, dataset_dropdown]
        )
        
        dataset_dropdown.change(
            fn=lambda domain, category, dataset, model, sort_option: update_table_with_reset(dataset, "dataset", sort_option, domain, category, dataset, model),
            inputs=[domain_dropdown, category_dropdown, dataset_dropdown, model_search, sort_dropdown],
            outputs=[results_table, category_dropdown, domain_dropdown]
        )
        
        model_search.change(
            fn=lambda model, sort_option, domain, category, dataset: update_table_with_model_search(model, sort_option, domain, category, dataset),
            inputs=[model_search, sort_dropdown, domain_dropdown, category_dropdown, dataset_dropdown],
            outputs=[results_table, domain_dropdown, category_dropdown, dataset_dropdown]
        )
        
        refresh_btn.click(
            fn=update_table_with_sort,
            inputs=[domain_dropdown, category_dropdown, dataset_dropdown, model_search, sort_dropdown],
            outputs=results_table
        )
        
        # Sort dropdown event handler - independent of filters
        sort_dropdown.change(
            fn=sort_by_dropdown,
            inputs=[sort_dropdown, domain_dropdown, category_dropdown, dataset_dropdown, model_search],
            outputs=results_table
        )
        
        # Model selector event handler
        model_selector.change(
            fn=create_model_metadata_display,
            inputs=[model_selector],
            outputs=[metadata_display]
        )
        
        # Model column cell selection handler (with controlled scrolling)
        def handle_model_column_clicks(evt: gr.SelectData):
            """Handle only model column cell clicks for model selection"""
            print(f"DEBUG: Click detected - Row: {evt.index[0]}, Column: {evt.index[1]}, Value: {evt.value}")
            
            row_idx = evt.index[0]
            col_idx = evt.index[1]
            
            # Only handle model column clicks (column 0)
            if col_idx == 0:  # Model column
                print("🎯 MODEL COLUMN CELL CLICK DETECTED!")
                if hasattr(evt, 'row_value') and evt.row_value is not None and len(evt.row_value) > 0:
                    model_name = evt.row_value[0]
                    print(f"🎯 Model selected: {model_name}")
                    # Return model selection, accordion expansion, and scroll trigger
                    return gr.update(value=model_name), gr.update(open=True), gr.update(value="scroll")
                elif evt.value is not None:
                    model_name = evt.value
                    print(f"🎯 Model selected: {model_name}")
                    # Return model selection, accordion expansion, and scroll trigger
                    return gr.update(value=model_name), gr.update(open=True), gr.update(value="scroll")
            else:
                print("🎯 OTHER COLUMN CELL CLICK - NO ACTION (NO SCROLLING)")
            
            # For non-model column clicks, return no changes (no scrolling)
            return gr.update(), gr.update(), gr.update()
        
        results_table.select(
            fn=handle_model_column_clicks,
            inputs=[],
            outputs=[model_selector, model_inspector_accordion, scroll_trigger]
        )
        
        # Handle scroll trigger - only scroll when model is selected
        def handle_scroll_trigger(trigger_value):
            """Handle scrolling only when model is selected"""
            if trigger_value == "scroll":
                print("🎯 SCROLL TRIGGER ACTIVATED!")
                # Trigger scrolling by updating the scroll trigger and ensuring accordion is open
                return gr.update(value="scrolled"), gr.update(open=True)
            return gr.update(), gr.update()
        
        scroll_trigger.change(
            fn=handle_scroll_trigger,
            inputs=[scroll_trigger],
            outputs=[scroll_trigger, model_inspector_accordion],
            scroll_to_output=True
        )
        
        # Handle change events from interactive table
        def handle_table_changes(new_value):
            """Handle changes from interactive table behavior"""
            print("=" * 50)
            print("DEBUG: Table Change Event Detected")
            print("=" * 50)
            print(f"New value: {new_value}")
            print(f"New value type: {type(new_value)}")
            print("=" * 50)
            return gr.update()
        
        results_table.change(
            fn=handle_table_changes,
            inputs=[results_table],
            outputs=[]
        )
        
        # Hidden input event handler - when model is selected from table
        def update_model_from_hidden(hidden_value):
            if hidden_value:
                return gr.update(value=hidden_value)
            return gr.update()
        
        hidden_model_input.change(
            fn=update_model_from_hidden,
            inputs=[hidden_model_input],
            outputs=[model_selector]
        )
        
    
    return demo

# Start scheduler if available
if SCHEDULER_AVAILABLE:
    scheduler = BackgroundScheduler()
    scheduler.start()
else:
    scheduler = None

# Launch the demo
if __name__ == "__main__":
    demo = create_leaderboard_interface()
    demo.queue(default_concurrency_limit=40).launch()