Commit
Β·
a5be063
1
Parent(s):
11de9b3
updated dropdown
Browse files- app.py +102 -56
- src/about.py +5 -5
- src/load_results.py +64 -4
app.py
CHANGED
|
@@ -49,7 +49,8 @@ from src.load_results import (
|
|
| 49 |
get_filter_options,
|
| 50 |
get_model_metadata,
|
| 51 |
create_model_metadata_display,
|
| 52 |
-
get_overall_summary
|
|
|
|
| 53 |
)
|
| 54 |
|
| 55 |
def restart_space():
|
|
@@ -68,48 +69,60 @@ def create_leaderboard_interface():
|
|
| 68 |
# Get filter options
|
| 69 |
filter_options = get_filter_options()
|
| 70 |
|
| 71 |
-
#
|
| 72 |
with gr.Row():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
with gr.Column(scale=1):
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
with gr.Column(scale=1):
|
| 82 |
-
with gr.Accordion("π Category Filter", open=False):
|
| 83 |
-
category_dropdown = gr.Dropdown(
|
| 84 |
-
choices=filter_options["categories"],
|
| 85 |
-
value="all",
|
| 86 |
-
label="Category",
|
| 87 |
-
info="Filter by category",
|
| 88 |
-
allow_custom_value=False
|
| 89 |
-
)
|
| 90 |
-
|
| 91 |
with gr.Column(scale=1):
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
)
|
| 100 |
-
|
| 101 |
with gr.Column(scale=1):
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
with gr.Column(scale=1):
|
| 112 |
-
clear_filters_btn = gr.Button("ποΈ Clear
|
| 113 |
|
| 114 |
# Full width table
|
| 115 |
gr.Markdown("### π Model Rankings")
|
|
@@ -117,10 +130,10 @@ def create_leaderboard_interface():
|
|
| 117 |
# Main results table
|
| 118 |
results_table = gr.Dataframe(
|
| 119 |
value=create_overall_table(),
|
| 120 |
-
headers=["
|
| 121 |
-
"MAPE", "Uni-MAPE", "Uni-Multi-MAPE", "MAE", "RMSE", "NMAE", "Submission Date"],
|
| 122 |
-
datatype=["
|
| 123 |
-
"str", "str", "str", "str", "str", "str", "str"],
|
| 124 |
interactive=False,
|
| 125 |
label="Overall Rankings",
|
| 126 |
wrap=True,
|
|
@@ -187,43 +200,69 @@ def create_leaderboard_interface():
|
|
| 187 |
else:
|
| 188 |
return gr.update(), gr.update() # No change
|
| 189 |
|
| 190 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
domain_dropdown.change(
|
| 192 |
-
fn=lambda domain, category, dataset, model: (
|
| 193 |
-
|
| 194 |
*reset_other_filters(domain, "domain")
|
| 195 |
),
|
| 196 |
-
inputs=[domain_dropdown, category_dropdown, dataset_dropdown, model_search],
|
| 197 |
outputs=[results_table, category_dropdown, dataset_dropdown]
|
| 198 |
)
|
| 199 |
|
| 200 |
category_dropdown.change(
|
| 201 |
-
fn=lambda domain, category, dataset, model: (
|
| 202 |
-
|
| 203 |
*reset_other_filters(category, "category")
|
| 204 |
),
|
| 205 |
-
inputs=[domain_dropdown, category_dropdown, dataset_dropdown, model_search],
|
| 206 |
outputs=[results_table, domain_dropdown, dataset_dropdown]
|
| 207 |
)
|
| 208 |
|
| 209 |
dataset_dropdown.change(
|
| 210 |
-
fn=lambda domain, category, dataset, model: (
|
| 211 |
-
|
| 212 |
*reset_other_filters(dataset, "dataset")
|
| 213 |
),
|
| 214 |
-
inputs=[domain_dropdown, category_dropdown, dataset_dropdown, model_search],
|
| 215 |
outputs=[results_table, category_dropdown, domain_dropdown]
|
| 216 |
)
|
| 217 |
|
| 218 |
model_search.change(
|
| 219 |
-
fn=
|
| 220 |
-
inputs=[domain_dropdown, category_dropdown, dataset_dropdown, model_search],
|
| 221 |
outputs=results_table
|
| 222 |
)
|
| 223 |
|
| 224 |
refresh_btn.click(
|
| 225 |
-
fn=
|
| 226 |
-
inputs=[domain_dropdown, category_dropdown, dataset_dropdown, model_search],
|
| 227 |
outputs=results_table
|
| 228 |
)
|
| 229 |
|
|
@@ -232,6 +271,13 @@ def create_leaderboard_interface():
|
|
| 232 |
outputs=[domain_dropdown, category_dropdown, dataset_dropdown, model_search]
|
| 233 |
)
|
| 234 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
# Model selector event handler
|
| 236 |
model_selector.change(
|
| 237 |
fn=create_model_metadata_display,
|
|
|
|
| 49 |
get_filter_options,
|
| 50 |
get_model_metadata,
|
| 51 |
create_model_metadata_display,
|
| 52 |
+
get_overall_summary,
|
| 53 |
+
sort_table_by_column
|
| 54 |
)
|
| 55 |
|
| 56 |
def restart_space():
|
|
|
|
| 69 |
# Get filter options
|
| 70 |
filter_options = get_filter_options()
|
| 71 |
|
| 72 |
+
# Compact filters across the top - narrower for better fit
|
| 73 |
with gr.Row():
|
| 74 |
+
with gr.Column(scale=2):
|
| 75 |
+
model_search = gr.Textbox(
|
| 76 |
+
label="π Model",
|
| 77 |
+
placeholder="Search...",
|
| 78 |
+
scale=1
|
| 79 |
+
)
|
| 80 |
with gr.Column(scale=1):
|
| 81 |
+
category_dropdown = gr.Dropdown(
|
| 82 |
+
choices=filter_options["categories"],
|
| 83 |
+
value="all",
|
| 84 |
+
label="π Category",
|
| 85 |
+
allow_custom_value=False,
|
| 86 |
+
scale=1
|
| 87 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
with gr.Column(scale=1):
|
| 89 |
+
domain_dropdown = gr.Dropdown(
|
| 90 |
+
choices=filter_options["domains"],
|
| 91 |
+
value="all",
|
| 92 |
+
label="π Domain",
|
| 93 |
+
allow_custom_value=False,
|
| 94 |
+
scale=1
|
| 95 |
+
)
|
|
|
|
|
|
|
| 96 |
with gr.Column(scale=1):
|
| 97 |
+
dataset_dropdown = gr.Dropdown(
|
| 98 |
+
choices=filter_options["datasets"],
|
| 99 |
+
value="all",
|
| 100 |
+
label="π Dataset",
|
| 101 |
+
allow_custom_value=False,
|
| 102 |
+
scale=1
|
| 103 |
+
)
|
| 104 |
+
with gr.Column(scale=2):
|
| 105 |
+
sort_dropdown = gr.Dropdown(
|
| 106 |
+
choices=[
|
| 107 |
+
"Rank",
|
| 108 |
+
"Model A-Z",
|
| 109 |
+
"Organization A-Z",
|
| 110 |
+
"Datasets β",
|
| 111 |
+
"MAPE β",
|
| 112 |
+
"Uni-MAPE β",
|
| 113 |
+
"Uni-Multi-MAPE β",
|
| 114 |
+
"MAE β",
|
| 115 |
+
"RMSE β",
|
| 116 |
+
"NMAE β",
|
| 117 |
+
"Date β"
|
| 118 |
+
],
|
| 119 |
+
value="Rank",
|
| 120 |
+
label="π Sort",
|
| 121 |
+
allow_custom_value=False,
|
| 122 |
+
scale=1
|
| 123 |
+
)
|
| 124 |
with gr.Column(scale=1):
|
| 125 |
+
clear_filters_btn = gr.Button("ποΈ Clear", variant="secondary", scale=1)
|
| 126 |
|
| 127 |
# Full width table
|
| 128 |
gr.Markdown("### π Model Rankings")
|
|
|
|
| 130 |
# Main results table
|
| 131 |
results_table = gr.Dataframe(
|
| 132 |
value=create_overall_table(),
|
| 133 |
+
headers=["Model", "Organization", "Datasets",
|
| 134 |
+
"MAPE", "Uni-MAPE", "Uni-Multi-MAPE", "MAE", "RMSE", "NMAE", "Submission Date", "Rank"],
|
| 135 |
+
datatype=["str", "str", "number",
|
| 136 |
+
"str", "str", "str", "str", "str", "str", "str", "number"],
|
| 137 |
interactive=False,
|
| 138 |
label="Overall Rankings",
|
| 139 |
wrap=True,
|
|
|
|
| 200 |
else:
|
| 201 |
return gr.update(), gr.update() # No change
|
| 202 |
|
| 203 |
+
def sort_by_dropdown(sort_option, domain, category, dataset, model):
|
| 204 |
+
"""Sort table based on dropdown selection - independent of filters"""
|
| 205 |
+
# Map dropdown options to column names
|
| 206 |
+
sort_mapping = {
|
| 207 |
+
"Rank": "Rank",
|
| 208 |
+
"Model A-Z": "Model",
|
| 209 |
+
"Organization A-Z": "Organization",
|
| 210 |
+
"Datasets β": "Datasets",
|
| 211 |
+
"MAPE β": "MAPE",
|
| 212 |
+
"Uni-MAPE β": "Uni-MAPE",
|
| 213 |
+
"Uni-Multi-MAPE β": "Uni-Multi-MAPE",
|
| 214 |
+
"MAE β": "MAE",
|
| 215 |
+
"RMSE β": "RMSE",
|
| 216 |
+
"NMAE β": "NMAE",
|
| 217 |
+
"Date β": "Submission Date"
|
| 218 |
+
}
|
| 219 |
+
|
| 220 |
+
column_name = sort_mapping.get(sort_option, "Rank")
|
| 221 |
+
df = create_overall_table(domain, category, dataset, model)
|
| 222 |
+
return sort_table_by_column(df, column_name)
|
| 223 |
+
|
| 224 |
+
def update_table_with_sort(sort_option, domain, category, dataset, model):
|
| 225 |
+
"""Update table with current filters and sorting"""
|
| 226 |
+
return sort_by_dropdown(sort_option, domain, category, dataset, model)
|
| 227 |
+
|
| 228 |
+
|
| 229 |
+
# Connect filters to table updates with mutual exclusivity and sorting
|
| 230 |
domain_dropdown.change(
|
| 231 |
+
fn=lambda domain, category, dataset, model, sort_option: (
|
| 232 |
+
update_table_with_sort(sort_option, domain, category, dataset, model),
|
| 233 |
*reset_other_filters(domain, "domain")
|
| 234 |
),
|
| 235 |
+
inputs=[domain_dropdown, category_dropdown, dataset_dropdown, model_search, sort_dropdown],
|
| 236 |
outputs=[results_table, category_dropdown, dataset_dropdown]
|
| 237 |
)
|
| 238 |
|
| 239 |
category_dropdown.change(
|
| 240 |
+
fn=lambda domain, category, dataset, model, sort_option: (
|
| 241 |
+
update_table_with_sort(sort_option, domain, category, dataset, model),
|
| 242 |
*reset_other_filters(category, "category")
|
| 243 |
),
|
| 244 |
+
inputs=[domain_dropdown, category_dropdown, dataset_dropdown, model_search, sort_dropdown],
|
| 245 |
outputs=[results_table, domain_dropdown, dataset_dropdown]
|
| 246 |
)
|
| 247 |
|
| 248 |
dataset_dropdown.change(
|
| 249 |
+
fn=lambda domain, category, dataset, model, sort_option: (
|
| 250 |
+
update_table_with_sort(sort_option, domain, category, dataset, model),
|
| 251 |
*reset_other_filters(dataset, "dataset")
|
| 252 |
),
|
| 253 |
+
inputs=[domain_dropdown, category_dropdown, dataset_dropdown, model_search, sort_dropdown],
|
| 254 |
outputs=[results_table, category_dropdown, domain_dropdown]
|
| 255 |
)
|
| 256 |
|
| 257 |
model_search.change(
|
| 258 |
+
fn=update_table_with_sort,
|
| 259 |
+
inputs=[domain_dropdown, category_dropdown, dataset_dropdown, model_search, sort_dropdown],
|
| 260 |
outputs=results_table
|
| 261 |
)
|
| 262 |
|
| 263 |
refresh_btn.click(
|
| 264 |
+
fn=update_table_with_sort,
|
| 265 |
+
inputs=[domain_dropdown, category_dropdown, dataset_dropdown, model_search, sort_dropdown],
|
| 266 |
outputs=results_table
|
| 267 |
)
|
| 268 |
|
|
|
|
| 271 |
outputs=[domain_dropdown, category_dropdown, dataset_dropdown, model_search]
|
| 272 |
)
|
| 273 |
|
| 274 |
+
# Sort dropdown event handler - independent of filters
|
| 275 |
+
sort_dropdown.change(
|
| 276 |
+
fn=sort_by_dropdown,
|
| 277 |
+
inputs=[sort_dropdown, domain_dropdown, category_dropdown, dataset_dropdown, model_search],
|
| 278 |
+
outputs=results_table
|
| 279 |
+
)
|
| 280 |
+
|
| 281 |
# Model selector event handler
|
| 282 |
model_selector.change(
|
| 283 |
fn=create_model_metadata_display,
|
src/about.py
CHANGED
|
@@ -95,11 +95,11 @@ The leaderboard aggregates results across all datasets to provide overall model
|
|
| 95 |
"""
|
| 96 |
|
| 97 |
CITATION_BUTTON_LABEL = "π Citation"
|
| 98 |
-
CITATION_BUTTON_TEXT = """@article{mused-
|
| 99 |
-
title={
|
| 100 |
-
author={Synthefy
|
| 101 |
-
journal={
|
| 102 |
-
year={
|
| 103 |
}"""
|
| 104 |
|
| 105 |
EVALUATION_QUEUE_TEXT = """
|
|
|
|
| 95 |
"""
|
| 96 |
|
| 97 |
CITATION_BUTTON_LABEL = "π Citation"
|
| 98 |
+
CITATION_BUTTON_TEXT = """@article{mused-fm2025,
|
| 99 |
+
title={MuSED-FM: A Benchmark for Evaluating Multivariate Time Series Foundation Models},
|
| 100 |
+
author={Synthefy Inc.},
|
| 101 |
+
journal={preprint},
|
| 102 |
+
year={2025}
|
| 103 |
}"""
|
| 104 |
|
| 105 |
EVALUATION_QUEUE_TEXT = """
|
src/load_results.py
CHANGED
|
@@ -152,16 +152,76 @@ def create_overall_table(domain_filter="all", category_filter="all", dataset_fil
|
|
| 152 |
"MAE": f"{avg_mae:.3f}",
|
| 153 |
"RMSE": f"{avg_rmse:.3f}",
|
| 154 |
"NMAE": f"{avg_nmae:.3f}",
|
| 155 |
-
"Submission Date": stats["submission_date"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
}
|
| 157 |
table_data.append(row)
|
| 158 |
|
| 159 |
-
#
|
| 160 |
-
table_data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
for i, row in enumerate(table_data):
|
| 162 |
row["Rank"] = i + 1
|
| 163 |
|
| 164 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
|
| 166 |
|
| 167 |
def get_filter_options():
|
|
|
|
| 152 |
"MAE": f"{avg_mae:.3f}",
|
| 153 |
"RMSE": f"{avg_rmse:.3f}",
|
| 154 |
"NMAE": f"{avg_nmae:.3f}",
|
| 155 |
+
"Submission Date": stats["submission_date"],
|
| 156 |
+
# Store raw values for ranking and sorting
|
| 157 |
+
"_mape_raw": avg_mape,
|
| 158 |
+
"_uni_mape_raw": avg_uni_mape,
|
| 159 |
+
"_uni_multi_mape_raw": avg_uni_multi_mape,
|
| 160 |
+
"_mae_raw": avg_mae,
|
| 161 |
+
"_rmse_raw": avg_rmse,
|
| 162 |
+
"_nmae_raw": avg_nmae
|
| 163 |
}
|
| 164 |
table_data.append(row)
|
| 165 |
|
| 166 |
+
# Calculate rank based on max(MAPE, Uni-MAPE) - lower is better
|
| 167 |
+
for row in table_data:
|
| 168 |
+
max_error = max(row["_mape_raw"], row["_uni_mape_raw"])
|
| 169 |
+
row["_rank_score"] = max_error
|
| 170 |
+
|
| 171 |
+
# Sort by rank score (lower is better) and assign ranks
|
| 172 |
+
table_data.sort(key=lambda x: x["_rank_score"])
|
| 173 |
for i, row in enumerate(table_data):
|
| 174 |
row["Rank"] = i + 1
|
| 175 |
|
| 176 |
+
# Remove raw values from final output
|
| 177 |
+
for row in table_data:
|
| 178 |
+
del row["_mape_raw"]
|
| 179 |
+
del row["_uni_mape_raw"]
|
| 180 |
+
del row["_uni_multi_mape_raw"]
|
| 181 |
+
del row["_mae_raw"]
|
| 182 |
+
del row["_rmse_raw"]
|
| 183 |
+
del row["_nmae_raw"]
|
| 184 |
+
del row["_rank_score"]
|
| 185 |
+
|
| 186 |
+
# Create DataFrame and reorder columns to put Rank last
|
| 187 |
+
df = pd.DataFrame(table_data)
|
| 188 |
+
column_order = ["Model", "Organization", "Datasets",
|
| 189 |
+
"MAPE", "Uni-MAPE", "Uni-Multi-MAPE", "MAE", "RMSE", "NMAE", "Submission Date", "Rank"]
|
| 190 |
+
df = df[column_order]
|
| 191 |
+
|
| 192 |
+
return df
|
| 193 |
+
|
| 194 |
+
|
| 195 |
+
def sort_table_by_column(df, column_name, ascending=True):
|
| 196 |
+
"""
|
| 197 |
+
Sort the table by a specific column.
|
| 198 |
+
For Uni-Multi-MAPE, higher values are better (ascending=False).
|
| 199 |
+
For all other metrics, lower values are better (ascending=True).
|
| 200 |
+
"""
|
| 201 |
+
if column_name == "Uni-Multi-MAPE":
|
| 202 |
+
# Higher values are better for Uni-Multi-MAPE
|
| 203 |
+
ascending = False
|
| 204 |
+
else:
|
| 205 |
+
# Lower values are better for all other metrics
|
| 206 |
+
ascending = True
|
| 207 |
+
|
| 208 |
+
# Convert percentage strings to float for proper sorting
|
| 209 |
+
if column_name in ["MAPE", "Uni-MAPE", "Uni-Multi-MAPE"]:
|
| 210 |
+
df_sorted = df.copy()
|
| 211 |
+
df_sorted[f"{column_name}_sort"] = df_sorted[column_name].str.replace('%', '').astype(float)
|
| 212 |
+
df_sorted = df_sorted.sort_values(f"{column_name}_sort", ascending=ascending)
|
| 213 |
+
df_sorted = df_sorted.drop(columns=[f"{column_name}_sort"])
|
| 214 |
+
elif column_name in ["MAE", "RMSE", "NMAE"]:
|
| 215 |
+
df_sorted = df.sort_values(column_name, ascending=ascending)
|
| 216 |
+
else:
|
| 217 |
+
# For non-metric columns, use default sorting
|
| 218 |
+
df_sorted = df.sort_values(column_name, ascending=ascending)
|
| 219 |
+
|
| 220 |
+
# Reassign ranks after sorting
|
| 221 |
+
df_sorted = df_sorted.reset_index(drop=True)
|
| 222 |
+
df_sorted["Rank"] = range(1, len(df_sorted) + 1)
|
| 223 |
+
|
| 224 |
+
return df_sorted
|
| 225 |
|
| 226 |
|
| 227 |
def get_filter_options():
|