-
Notifications
You must be signed in to change notification settings - Fork 10
/
extract_dialogs.py
52 lines (45 loc) · 1.67 KB
/
extract_dialogs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import json
import argparse
import os
from utils import GenshinLoader
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--repo",
default="Path/To/AnimeGameData",
type=str,
required=True,
help="data dir",
)
parser.add_argument("--lang", default="CHS", type=str, help="language type")
parser.add_argument(
"--ignore_dialogue_branch",
action="store_false",
help="whether to ignore branch (only choose a random branch)",
)
parser.add_argument(
"--n_utter",
default=1000,
type=int,
help="max number of utterances for a session",
)
args = parser.parse_args()
# load genshin data
genshin = GenshinLoader(repo=args.repo, lang=args.lang)
# process
output_dialog_list, nodes_per_session = genshin.process_dialog(
max_utter=args.n_utter, ignore_dialogue_branch=args.ignore_dialogue_branch
)
output_dir = "extracted_dialog"
# output dialog
if len(output_dialog_list):
output_file = os.path.join(output_dir, "dialog_{}.jsonl".format(args.lang))
with open(output_file, "w", encoding="utf-8") as f:
for dialog in output_dialog_list:
print(json.dumps(dialog, ensure_ascii=False), file=f)
print("Output dialog at {}".format(output_file))
output_file = os.path.join(output_dir, "raw_dialog_{}.jsonl".format(args.lang))
with open(output_file, "w", encoding="utf-8") as f:
for dialog in nodes_per_session:
print(json.dumps(dialog, ensure_ascii=False), file=f)
print("Output dialog at {}".format(output_file))