Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove Git LFS from repo #10

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions .gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
*.pth filter=lfs diff=lfs merge=lfs -text
*.rar filter=lfs diff=lfs merge=lfs -text
*.safetensors filter=lfs diff=lfs merge=lfs -text
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.tar.* filter=lfs diff=lfs merge=lfs -text
*.tar filter=lfs diff=lfs merge=lfs -text
*.tflite filter=lfs diff=lfs merge=lfs -text
Expand All @@ -33,5 +32,3 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
videos/clustering_explanation.mp4 filter=lfs diff=lfs merge=lfs -text
videos/sts_explanation.mp4 filter=lfs diff=lfs merge=lfs -text
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,5 @@
corpus.jsonl
corpus.jsonl.gz
results_dataset_to_upload
*.pyc
*.pyc
videos/*.mp4
26 changes: 18 additions & 8 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,16 @@
from models import ModelManager
from ui import build_side_by_side_ui_anon, build_side_by_side_ui_anon_sts, build_side_by_side_ui_anon_clustering, build_side_by_side_ui_named, build_side_by_side_ui_named_sts, build_side_by_side_ui_named_clustering, build_single_model_ui, build_single_model_ui_sts, build_single_model_ui_clustering


# download the videos
from huggingface_hub import hf_hub_url
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At runtime we download the videos from Huggingface. No need to keep them in this repo as I assume they are static and people won't be iterating on them.

for file_to_download in ["sts_explanation.mp4", "clustering_explanation.mp4"]:
file_url = hf_hub_url(repo_id="mteb/arena-videos", repo_type="dataset", endpoint=None, filename=file_to_download)
# download it to videos/ folder using wget
os.system(f"wget {file_url} -O videos/{file_to_download}")



acknowledgment_md = """
### Acknowledgment
We thank X, Y, Z, [Contextual AI](https://contextual.ai/) and [Hugging Face](https://huggingface.co/) for their generous sponsorship. If you'd like to sponsor us, please get in [touch](mailto:[email protected]).
Expand Down Expand Up @@ -39,15 +49,15 @@ def load_elo_results(elo_results_dir):
elo_results_dir = Path(elo_results_dir)
elo_results_file = {}
leaderboard_table_file = {}
for file in elo_results_dir.glob('elo_results_*.pkl'):
if 'clustering' in file.name:
elo_results_file['clustering'] = file
elif 'retrieval' in file.name:
elo_results_file['retrieval'] = file
elif 'sts' in file.name:
elo_results_file['sts'] = file
for folder in elo_results_dir.glob('elo_results_*'):
if 'clustering' in folder.name:
elo_results_file['clustering'] = folder
elif 'retrieval' in folder.name:
elo_results_file['retrieval'] = folder
elif 'sts' in folder.name:
elo_results_file['sts'] = folder
else:
raise ValueError(f"Unknown file name: {file.name}")
raise ValueError(f"Unknown folder name: {folder.name}")
for file in elo_results_dir.glob('*_leaderboard.csv'):
if 'clustering' in file.name:
leaderboard_table_file['clustering'] = file
Expand Down
124 changes: 122 additions & 2 deletions arena_elo/elo_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
import plotly.express as px
from tqdm import tqdm
from datasets import load_dataset
import plotly
import os

from .basic_stats import get_log_files
from .clean_battle_data import clean_battle_data
Expand Down Expand Up @@ -337,6 +339,125 @@ def pretty_print_elo_rating(rating):
print(f"{i+1:2d}, {model:25s}, {rating[model]:.0f}")



def write_out_results(item: dict, item_name: str):
"""
Due to their complex structure, let's recursively create subfolders until we reach the end
of the item and then save the DFs as jsonl files

Args:
item (dict): The item to save
item_name (str): The name of the item

Returns:
None
"""
main_folder = item_name

if isinstance(item, list):
for i, v in enumerate(item):
write_out_results(v, os.path.join(main_folder, str(i)))

elif isinstance(item, dict):
for key, value in item.items():
if isinstance(value, dict):
write_out_results(value, os.path.join(main_folder, key))
elif isinstance(value, list):
for i, v in enumerate(value):
write_out_results(v, os.path.join(main_folder, key + str(i)))
else:
write_out_results(value, os.path.join(main_folder, key))

elif isinstance(item, pd.DataFrame):
print(f"Saving {main_folder} to {main_folder}/default.jsonl")
os.makedirs(main_folder, exist_ok=True)
item.reset_index().to_json(f"{main_folder}/default.jsonl", orient="records", lines=True)

elif isinstance(item, pd.Series):
print(f"Saving {main_folder} to {main_folder}/default.jsonl")
os.makedirs(main_folder, exist_ok=True)
item.to_frame().reset_index().to_json(f"{main_folder}/default.jsonl", orient="records", lines=True)

elif isinstance(item, plotly.graph_objs._figure.Figure):
print(f"Saving {main_folder} to {main_folder}/default.png")
os.makedirs(main_folder, exist_ok=True)
item.write_image(f"{main_folder}/default.png")

elif isinstance(item, str):
print(f"Saving {main_folder} to {main_folder}/default.txt")
os.makedirs(main_folder, exist_ok=True)
with open(f"{main_folder}/default.txt", "w") as f:
f.write(item)

elif item is None:
# write out an empty file
print(f"Saving {main_folder} to {main_folder}/default.txt")
os.makedirs(main_folder, exist_ok=True)
with open(f"{main_folder}/default.txt", "w") as f:
f.write("")

elif isinstance(item, float):
print(f"Saving {main_folder} to {main_folder}/default.txt")
os.makedirs(main_folder, exist_ok=True)
with open(f"{main_folder}/default.txt", "w") as f:
f.write(str(item))

else:
print(main_folder)
raise Exception(f"Unknown type {type(item)}")



def load_results(data_path):
"""
Do the reverse of `write_out_results` to reconstruct the item

Args:
data_path (str): The path to the data to load

Returns:
dict: The loaded data
"""
if os.path.isdir(data_path):
# if the folder just has numbers from 0 to N, load as a list
all_files_in_dir = list(os.listdir(data_path))
if set(all_files_in_dir) == set([str(i) for i in range(len(all_files_in_dir))]):
### the list case
return [load_results(os.path.join(data_path, str(i))) for i in range(len(os.listdir(data_path)))]
else:
if len(all_files_in_dir) == 1:
file_name = all_files_in_dir[0]
if file_name == "default.jsonl":
return load_results(os.path.join(data_path, file_name))
else: ### the dict case
return {file_name: load_results(os.path.join(data_path, file_name))}
else:
return {file_name: load_results(os.path.join(data_path, file_name)) for file_name in all_files_in_dir}

elif data_path.endswith(".png"):
return None

elif data_path.endswith(".jsonl"):
df = pd.read_json(data_path, orient="records", lines=True)
if "index" in df.columns:
df = df.set_index("index")
return df

else:
with open(data_path, "r") as f:
data = f.read()

try:
return float(data)
except ValueError:
pass # not a float

if data == "":
return None
else:
return data


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--clean-battle-file", type=str)
Expand Down Expand Up @@ -383,5 +504,4 @@ def pretty_print_elo_rating(rating):
"anony": anony_results,
"full": full_results,
}
with open(f"elo_results_{cutoff_date}.pkl", "wb") as fout:
pickle.dump(results, fout)
write_out_results(results, f"elo_results_{cutoff_date}")
6 changes: 3 additions & 3 deletions arena_elo/generate_leaderboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import pandas as pd
import pickle
from yaml import safe_load
from .elo_analysis import load_results

RENAME_KEYS = {
"organization": "Organization",
Expand All @@ -15,7 +16,7 @@
}

def main(
elo_rating_pkl: str,
elo_rating_folder: str,
output_csv: str
):

Expand All @@ -31,8 +32,7 @@ def main(
if key in model_info[model]:
model_info[model][RENAME_KEYS[key]] = model_info[model].pop(key)

with open(elo_rating_pkl, "rb") as fin:
elo_rating_results = pickle.load(fin)
elo_rating_results = load_results(elo_rating_folder)
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Results are now saved and loaded as folders, but it does make the commits quite long as each dataframe is a separate file... sorry!


anony_elo_rating_results = elo_rating_results["anony"]
full_elo_rating_results = elo_rating_results["full"]
Expand Down
15 changes: 9 additions & 6 deletions arena_elo/update_elo.sh
Original file line number Diff line number Diff line change
@@ -1,22 +1,25 @@
#!/bin/bash

mkdir -p results

mkdir -p results/latest
Comment on lines 3 to +4
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably don't need mkdir -p results since we have the new line?

# For battle data

for task in "retrieval" "clustering" "sts"; do
python -m arena_elo.clean_battle_data --task_name $task
battle_cutoff_date=`cat cut_off_date.txt` && rm cut_off_date.txt && echo "$task battle data last updated on $battle_cutoff_date"
mkdir -p ./results/$battle_cutoff_date
cp clean_battle_${task}_$battle_cutoff_date.json ./results/latest/clean_battle_$task.json
mv clean_battle_${task}_$battle_cutoff_date.json ./results/$battle_cutoff_date/clean_results_${task}.json
python3 -m arena_elo.elo_analysis --clean-battle-file ./results/$battle_cutoff_date/clean_results_${task}.json --num-bootstrap 1
mv ./elo_results_$battle_cutoff_date.pkl ./results/$battle_cutoff_date/elo_results_${task}.pkl
python -m arena_elo.generate_leaderboard \
--elo_rating_pkl "./results/$battle_cutoff_date/elo_results_${task}.pkl" \
--output_csv "./results/$battle_cutoff_date/${task}_leaderboard.csv"
mv ./elo_results_$battle_cutoff_date ./results/$battle_cutoff_date/elo_results_${task}
cmd="""python -m arena_elo.generate_leaderboard \
--elo_rating_folder "./results/$battle_cutoff_date/elo_results_${task}" \
--output_csv "./results/$battle_cutoff_date/${task}_leaderboard.csv""""
echo $cmd
eval $cmd
mkdir -p ./results/latest
cp ./results/$battle_cutoff_date/${task}_leaderboard.csv ./results/latest/${task}_leaderboard.csv
cp ./results/$battle_cutoff_date/elo_results_${task}.pkl ./results/latest/elo_results_${task}.pkl
cp -R ./results/$battle_cutoff_date/elo_results_${task} ./results/latest/elo_results_${task}
echo "$task leaderboard updated"
done

5 changes: 2 additions & 3 deletions leaderboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import numpy as np
import pandas as pd

from arena_elo.elo_analysis import load_results

leader_component_values = [None] * 5

Expand Down Expand Up @@ -77,9 +78,7 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=Fa
md = "Loading ..."
p1 = p2 = p3 = p4 = None
else:
with open(elo_results_file, "rb") as fin:
elo_results = pickle.load(fin)

elo_results = load_results(elo_results_file)
anony_elo_results = elo_results["anony"]
anony_arena_df = anony_elo_results["leaderboard_table_df"]
p1 = anony_elo_results["win_fraction_heatmap"]
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ gritlm
mteb
plotly
umap-learn
kaleido
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Apparently is needed to write Plotly plots to file. We could not save to file, but it was saved in the pickle files, so I thought we might as well write it to file for now.

3 changes: 0 additions & 3 deletions results/20240606/elo_results_clustering.pkl

This file was deleted.

3 changes: 0 additions & 3 deletions results/20240606/elo_results_retrieval.pkl

This file was deleted.

3 changes: 0 additions & 3 deletions results/20240606/elo_results_sts.pkl

This file was deleted.

3 changes: 0 additions & 3 deletions results/20240614/elo_results_clustering.pkl

This file was deleted.

3 changes: 0 additions & 3 deletions results/20240614/elo_results_retrieval.pkl

This file was deleted.

3 changes: 0 additions & 3 deletions results/20240614/elo_results_sts.pkl

This file was deleted.

3 changes: 0 additions & 3 deletions results/20240615/elo_results_clustering.pkl

This file was deleted.

3 changes: 0 additions & 3 deletions results/20240616/elo_results_clustering.pkl

This file was deleted.

3 changes: 0 additions & 3 deletions results/20240616/elo_results_retrieval.pkl

This file was deleted.

3 changes: 0 additions & 3 deletions results/20240616/elo_results_sts.pkl

This file was deleted.

3 changes: 0 additions & 3 deletions results/20240617/elo_results_clustering.pkl

This file was deleted.

3 changes: 0 additions & 3 deletions results/20240617/elo_results_retrieval.pkl

This file was deleted.

3 changes: 0 additions & 3 deletions results/20240617/elo_results_sts.pkl

This file was deleted.

3 changes: 0 additions & 3 deletions results/20240618/elo_results_retrieval.pkl

This file was deleted.

3 changes: 0 additions & 3 deletions results/20240620/elo_results_clustering.pkl

This file was deleted.

3 changes: 0 additions & 3 deletions results/20240620/elo_results_sts.pkl

This file was deleted.

3 changes: 0 additions & 3 deletions results/20240624/elo_results_clustering.pkl

This file was deleted.

3 changes: 0 additions & 3 deletions results/20240624/elo_results_retrieval.pkl

This file was deleted.

4 changes: 2 additions & 2 deletions results/20240704/clustering_leaderboard.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
key,Model,Arena Elo rating (anony),Arena Elo rating (full),MTEB Overall Avg,MTEB Retrieval Avg,MTEB Clustering Avg,MTEB STS Avg,License,Organization,Link
sentence-transformers/all-MiniLM-L6-v2,sentence-transformers/all-MiniLM-L6-v2,1051.7304902423814,1036.2219844563203,56.26,41.95,42.35,78.9,Apache-2.0,Sentence Transformers,https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2
intfloat/multilingual-e5-small,intfloat/multilingual-e5-small,948.2695097576186,963.7780155436797,57.87,46.64,37.08,79.1,MIT License,Microsoft,https://huggingface.co/intfloat/multilingual-e5-small
sentence-transformers/all-MiniLM-L6-v2,sentence-transformers/all-MiniLM-L6-v2,1051.7304902424,1036.2219844563,56.26,41.95,42.35,78.9,Apache-2.0,Sentence Transformers,https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2
intfloat/multilingual-e5-small,intfloat/multilingual-e5-small,948.2695097576,963.7780155437,57.87,46.64,37.08,79.1,MIT License,Microsoft,https://huggingface.co/intfloat/multilingual-e5-small
3 changes: 0 additions & 3 deletions results/20240704/elo_results_clustering.pkl

This file was deleted.

Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"index":0,"sentence-transformers\/all-MiniLM-L6-v2":1094.4361603648,"intfloat\/multilingual-e5-small":905.5638396352}
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
{"index":"sentence-transformers\/all-MiniLM-L6-v2","0":1051.7304902424}
{"index":"intfloat\/multilingual-e5-small","0":948.2695097576}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
987.0071375154072
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1012.9928624845928
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
2024-07-04 12:16:37 PDT
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1720120597.967
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
| Rank | Model | Elo Rating | Description |
| --- | --- | --- | --- |
| 1 | 🥇 [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) | 1052 | all-MiniLM-L6-v2 by Sentence Transformers |
| 2 | 🥈 [intfloat/multilingual-e5-small](https://huggingface.co/intfloat/multilingual-e5-small) | 948 | multilingual-e5-small by Microsoft |
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
{"index":"intfloat\/multilingual-e5-small","rating":948.2695097576,"variance":null,"rating_q975":905.5638396352,"rating_q025":905.5638396352,"num_battles":24}
{"index":"sentence-transformers\/all-MiniLM-L6-v2","rating":1051.7304902424,"variance":null,"rating_q975":1094.4361603648,"rating_q025":1094.4361603648,"num_battles":24}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
bt
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading