Spaces:
Sleeping
Sleeping
Update content and formatting
Browse files
app.py
CHANGED
|
@@ -1,5 +1,4 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
-
from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
|
| 3 |
import pandas as pd
|
| 4 |
|
| 5 |
# Static data
|
|
@@ -21,56 +20,37 @@ STATIC_DATA = [
|
|
| 21 |
]
|
| 22 |
|
| 23 |
COLUMNS = ["Model Type", "Method", "Control Type", "Input Type", "#Param.", "Acc. β", "Mean Traj. β"]
|
| 24 |
-
LEADERBOARD_DF = pd.DataFrame(STATIC_DATA, columns=COLUMNS)
|
| 25 |
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
.gradio-container {
|
| 30 |
-
max-width: 1200px !important;
|
| 31 |
-
}
|
| 32 |
-
"""
|
| 33 |
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
value=dataframe,
|
| 39 |
-
datatype=["str", "str", "str", "str", "str", "number", "number"],
|
| 40 |
-
select_columns=SelectColumns(
|
| 41 |
-
default_selection=COLUMNS,
|
| 42 |
-
cant_deselect=["Model Type", "Method", "Acc. β"],
|
| 43 |
-
label="Select Columns to Display:",
|
| 44 |
-
),
|
| 45 |
-
search_columns=["Model Type", "Method"],
|
| 46 |
-
hide_columns=[],
|
| 47 |
-
filter_columns=[
|
| 48 |
-
ColumnFilter("Model Type", type="checkboxgroup", label="Model types"),
|
| 49 |
-
ColumnFilter("Control Type", type="checkboxgroup", label="Control types"),
|
| 50 |
-
ColumnFilter("Input Type", type="checkboxgroup", label="Input types"),
|
| 51 |
-
],
|
| 52 |
-
bool_checkboxgroup_label="Hide models",
|
| 53 |
-
interactive=False,
|
| 54 |
-
)
|
| 55 |
-
|
| 56 |
-
demo = gr.Blocks(css=custom_css, title="Model Performance Leaderboard")
|
| 57 |
-
with demo:
|
| 58 |
-
gr.HTML("<h1 style='text-align: center'>π Model Performance Leaderboard</h1>")
|
| 59 |
gr.Markdown("""
|
| 60 |
**Performance comparison across vision-language models, image generation, and video generation models.**
|
| 61 |
|
| 62 |
π **Metrics:** Acc. β (Accuracy - higher is better) | Mean Traj. β (Mean Trajectory error - lower is better)
|
| 63 |
-
"""
|
| 64 |
-
|
| 65 |
-
with gr.Tabs(
|
| 66 |
-
with gr.TabItem("
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
gr.Markdown("""
|
| 71 |
-
#
|
| 72 |
|
| 73 |
-
This leaderboard showcases performance metrics across different types of AI models:
|
| 74 |
|
| 75 |
## Model Categories
|
| 76 |
- **VLM**: Vision-Language Models
|
|
@@ -87,8 +67,8 @@ with demo:
|
|
| 87 |
- XXX indicates results pending/unavailable
|
| 88 |
- β indicates not applicable or not available
|
| 89 |
|
| 90 |
-
*Results may vary across different evaluation settings
|
| 91 |
-
"""
|
| 92 |
|
| 93 |
if __name__ == "__main__":
|
| 94 |
demo.launch()
|
|
|
|
| 1 |
import gradio as gr
|
|
|
|
| 2 |
import pandas as pd
|
| 3 |
|
| 4 |
# Static data
|
|
|
|
| 20 |
]
|
| 21 |
|
| 22 |
COLUMNS = ["Model Type", "Method", "Control Type", "Input Type", "#Param.", "Acc. β", "Mean Traj. β"]
|
|
|
|
| 23 |
|
| 24 |
+
def create_leaderboard():
|
| 25 |
+
df = pd.DataFrame(STATIC_DATA, columns=COLUMNS)
|
| 26 |
+
return df
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
+
# Create the Gradio interface
|
| 29 |
+
with gr.Blocks(title="World-in-World: Building a Closed-Loop World Interface to Evaluate World Models", theme=gr.themes.Soft()) as demo:
|
| 30 |
+
gr.HTML("<h1 style='text-align: center; margin-bottom: 1rem'>π World-in-World: Building a Closed-Loop World Interface to Evaluate World Models</h1>")
|
| 31 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
gr.Markdown("""
|
| 33 |
**Performance comparison across vision-language models, image generation, and video generation models.**
|
| 34 |
|
| 35 |
π **Metrics:** Acc. β (Accuracy - higher is better) | Mean Traj. β (Mean Trajectory error - lower is better)
|
| 36 |
+
""")
|
| 37 |
+
|
| 38 |
+
with gr.Tabs():
|
| 39 |
+
with gr.TabItem("π Leaderboard"):
|
| 40 |
+
leaderboard_table = gr.DataFrame(
|
| 41 |
+
value=create_leaderboard(),
|
| 42 |
+
headers=COLUMNS,
|
| 43 |
+
datatype=["str", "str", "str", "str", "str", "number", "number"],
|
| 44 |
+
interactive=False,
|
| 45 |
+
wrap=True,
|
| 46 |
+
height=600
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
with gr.TabItem("π About"):
|
| 50 |
gr.Markdown("""
|
| 51 |
+
# World-in-World: Building a Closed-Loop World Interface to Evaluate World Models
|
| 52 |
|
| 53 |
+
This leaderboard showcases performance metrics across different types of AI models in world modeling tasks:
|
| 54 |
|
| 55 |
## Model Categories
|
| 56 |
- **VLM**: Vision-Language Models
|
|
|
|
| 67 |
- XXX indicates results pending/unavailable
|
| 68 |
- β indicates not applicable or not available
|
| 69 |
|
| 70 |
+
*Results represent performance on world modeling evaluation benchmarks and may vary across different evaluation settings.*
|
| 71 |
+
""")
|
| 72 |
|
| 73 |
if __name__ == "__main__":
|
| 74 |
demo.launch()
|