-
Notifications
You must be signed in to change notification settings - Fork 0
/
search
executable file
·97 lines (86 loc) · 3.08 KB
/
search
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#!/usr/bin/env /root/miniconda3/bin/python
import argparse
import json
import subprocess
print("Searching...")
def evaluate_command(name, k,
algorithm="block_max_wand", index_type="block_simdbp",
wand_type="vbmw", stemmer="porter2"):
return ("evaluate_queries "
f"-t {index_type} "
f"-a {algorithm} "
f"-i {name}.{index_type} "
f"-w {name}.{wand_type} "
f"-r {algorithm}-{index_type}-{wand_type}-{stemmer} "
f"--stemmer {stemmer} "
f"--documents {name}.fwd.bp.doclex "
f"--terms {name}.fwd.termlex "
f"-k {k} "
f"-q {name}.title "
f"> /output/BM25")
# Allow user to input stemmer, validate
def get_stemmer(options):
valid = ["porter2", "krovetz"]
stemmer = "porter2"
if "stemmer" in options and options["stemmer"].lower() in valid:
stemmer = options["stemmer"].lower()
else:
print("Using default stemmer: " + stemmer)
return stemmer
# Allow user to input compression type
def get_compressor(options):
valid = ["opt", "block_interpolative", "block_simdbp", "block_optpfor",
"block_varintg8iu"]
compressor = "block_simdbp"
if "compressor" in options and options["compressor"].lower() in valid:
compressor = options["compressor"].lower()
else:
print("Using default compressor: " + compressor)
return compressor
# Allow user to input algorithm
def get_algo(options):
valid = ["maxscore", "wand", "block_max_wand"]
algo = "block_max_wand"
if "algorithm" in options and options["algorithm"].lower() in valid:
algo = options["algorithm"].lower()
else:
print("Using default algorithm: " + algo)
return algo
# Allow user to input whether they selected fixed or variable-size blocks
def get_wand_type(options):
valid = ["variable", "fixed"]
wand_type = "variable"
if "wand_type" in options and options["wand_type"].lower() in valid:
wand_type = options["wand_type"].lower()
else:
print("Using default wand type: " + wand_type)
if wand_type == "fixed":
return "bmw"
else:
return "vbmw"
parser = argparse.ArgumentParser()
parser.add_argument("--json", type=json.loads, required=True,
help="the json input")
args, _ = parser.parse_known_args()
name, k = args.json["collection"]["name"], args.json["top_k"]
# Grab user opts/defaults
options = args.json["opts"]
compressor = get_compressor(options)
stemmer = get_stemmer(options)
algo = get_algo(options)
wand_type = get_wand_type(options)
# Do not extract topics if `--opts not_extract_topics` is passed, but just rename it
if "not_extract_topics" in options:
subprocess.check_call(
"cp {0} {1}.title".format(args.json["topic"]["path"], name),
shell=True
)
else:
subprocess.check_call(
"extract_topics -i {0} -o {1}".format(args.json["topic"]["path"], name),
shell=True
)
subprocess.check_call(
evaluate_command(name, k, algo, compressor, wand_type, stemmer),
shell=True
)