Skip to content

Commit

Permalink
renamed runbooks, increased max pts on runbooks
Browse files Browse the repository at this point in the history
  • Loading branch information
magdalendobson committed Nov 19, 2024
1 parent 1ac1618 commit 38e4c08
Show file tree
Hide file tree
Showing 8 changed files with 2,358 additions and 3,304 deletions.
26 changes: 26 additions & 0 deletions benchmark/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,30 @@ def __init__(self, nb_M=1000):
def distance(self):
return "euclidean"

# 1M slice of MSTuring dataset, with ground truth corresponding to the 10K query set
# this is needed for backwards compatibility with the streaming code
class MSTuringANNSPQ(BillionScaleDatasetCompetitionFormat):
def __init__(self, nb_M=1):
self.nb_M = nb_M
self.nb = 10**6 * nb_M
self.d = 100
self.nq = 10000
self.dtype = "float32"
self.ds_fn = "base1b.fbin"
self.qs_fn = "testQuery10K.fbin"
self.gt_fn = (
"msturing-1M-private-gt100" if self.nb_M == 1 else
None
)
self.base_url = "https://comp21storage.z5.web.core.windows.net/comp21/MSFT-TURING-ANNS/"
self.basedir = os.path.join(BASEDIR, "MSTuringANNSPQ")

self.private_qs_url = None
self.private_gt_url = None

def distance(self):
return "euclidean"

class MSTuringClustered10M(DatasetCompetitionFormat):
def __init__(self):
self.nb = 10**6 * 10
Expand Down Expand Up @@ -1285,6 +1309,8 @@ def short_name(self):
'msturing-10M': lambda : MSTuringANNS(10),
'msturing-1M': lambda : MSTuringANNS(1),

'msturingpq-1M': lambda : MSTuringANNSPQ(1),

'msturing-10M-clustered': lambda: MSTuringClustered10M(),
'msturing-30M-clustered': lambda: MSTuringClustered30M(),

Expand Down
3 changes: 3 additions & 0 deletions data_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,9 @@ def cleaned_run_metric(run_metrics):
'neurips23/runbooks/delete_runbook.yaml',
'neurips23/runbooks/final_runbook.yaml',
'neurips23/runbooks/msturing-10M_slidingwindow_runbook.yaml',
'neurips23/runbooks/msturingpq-1M_expiration_time_runbook.yaml',
'neurips23/runbooks/msturingpq-1M_expiration_time_replace_only_runbook.yaml',
'neurips23/runbooks/msturingpq-1M_expiration_time_replace_delete_runbook.yaml',
'neurips23/runbooks/wikipedia-35M_expirationtime_runbook.yaml',
'neurips23/runbooks/wikipedia-1M_expiration_time_runbook.yaml',
'neurips23/runbooks/wikipedia-35M_expiration_time_replace_only_runbook.yaml',
Expand Down
26 changes: 18 additions & 8 deletions neurips23/runbooks/gen_expiration_time_runbook.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,21 +178,31 @@ def gen_exp_time_runbook(dataset_name, dataset_size, max_t, runbook_filename, ra

ratios = (0, 4, 18)
timesteps = (0, 100, 20)
seed = 732
dataset_file = 'msturing-35M_expiration_time_runbook.yaml'
dataset_name = 'msturing-100M'
dataset_size = 35000000
max_t = 350
seed = 5554
dataset_file = 'msturingpq-1M_expiration_time_runbook.yaml'
dataset_name = 'msturingpq-1M'
dataset_size = 1000000
max_t = 100
gt_url = None
gen_exp_time_runbook(dataset_name, dataset_size, max_t, dataset_file, ratios, timesteps, seed, False, gt_url)

ratios = (0, 4, 18)
timesteps = (0, 100, 20)
seed = 5554
dataset_file = 'msturing-1M_expiration_time_runbook.yaml'
seed = 762
dataset_file = 'msturingpq-1M_expiration_time_replace_only_runbook.yaml'
dataset_name = 'msturing-1M'
dataset_size = 1000000
max_t = 100
gt_url = None
gen_exp_time_runbook(dataset_name, dataset_size, max_t, dataset_file, ratios, timesteps, seed, False, gt_url)
gen_exp_time_runbook(dataset_name, dataset_size, max_t, dataset_file, ratios, timesteps, seed, True, gt_url, False)

ratios = (1, 8, 18)
timesteps = (0, 100, 20)
seed = 83
dataset_file = 'msturingpq-1M_expiration_time_replace_delete_runbook.yaml'
dataset_name = 'msturingpq-1M'
dataset_size = 1000000
max_t = 100
gt_url = None
gen_exp_time_runbook(dataset_name, dataset_size, max_t, dataset_file, ratios, timesteps, seed, True, gt_url)

Loading

0 comments on commit 38e4c08

Please sign in to comment.