-
Notifications
You must be signed in to change notification settings - Fork 187
/
demonstrations_statistics.py
116 lines (75 loc) · 3.43 KB
/
demonstrations_statistics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import json
import glob
import argparse
import re
import datetime
DOI_PATTERN = r"\b(10[.][0-9]{4,}(?:[.][0-9]+)*/(?:(?![\"&\'<>])\S)+)\b"
def getAllMetadata():
metadatas = {}
filePaths = glob.glob("demonstrations/*.metadata.json")
for filePath in filePaths:
i2 = filePath.find(".metadata")
fileName = filePath[:i2]
with open(filePath, "r", encoding="utf-8") as fo:
metadata = json.load(fo)
metadatas[fileName] = metadata
return metadatas
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--action")
parser.add_argument("--title-1")
parser.add_argument("--title-2")
arguments = parser.parse_args()
if arguments.action == "count":
metadatas = getAllMetadata()
print(len(metadatas))
if arguments.action == "count_per_year":
metadatas = getAllMetadata()
perYear = []
for year in [2018, 2019, 2020, 2021, 2022, 2023]:
perYear.append({"Year": year, "Count": len([d for k, d in metadatas.items() if d["dateOfPublication"].startswith(str(year))])})
for year in perYear:
print("{0}: {1}".format(year["Year"], year["Count"]))
if arguments.action == "check":
metadatas = getAllMetadata()
for name, metadata in metadatas.items():
if not metadata["seoDescription"].endswith("."):
pass
#print(name)
if len(metadata["categories"]) == 0:
pass
#print("{0} is not in any category.".format(name))
for doi in metadata["basedOnPapers"]:
if doi != "" and not re.match(DOI_PATTERN, doi):
print("{0} has an incorrectly-formatted DOI.".format(name))
for reference in metadata["references"]:
doi = reference.get("doi", "")
if doi != "" and not re.match(DOI_PATTERN, doi):
print("{0} has an incorrectly-formatted DOI.".format(name))
if arguments.action == "retitle-category":
title1 = arguments.title_1.strip()
title2 = arguments.title_2.strip()
fps = glob.glob("./demonstrations/*.metadata.json")
for fp in fps:
with open(fp, "r", encoding="utf-8") as fo:
metadata = json.load(fo)
metadata["categories"] = [title2 if c.strip() == title1 else c.strip() for c in metadata["categories"]]
with open(fp, "w", encoding="utf-8") as fo:
json.dump(metadata, fo, indent=4, ensure_ascii=False)
if arguments.action == "get_all_categories_used":
fps = glob.glob("./demonstrations/*.metadata.json")
categories = {}
for fp in fps:
with open(fp, "r", encoding="utf-8") as fo:
metadata = json.load(fo)
for category in metadata["categories"]:
if category.strip() != "":
categories[category] = category
print([k for k, v in categories.items()])
if arguments.action == "get_most_recent_demos":
metadata = getAllMetadata()
mostRecent = [v for k, v in metadata.items()]
mostRecent = sorted(mostRecent, key=lambda m: datetime.datetime.strptime(m["dateOfPublication"], "%Y-%m-%dT%H:%M:%S"), reverse=True)
for m in mostRecent[:5]:
#print(m)
print(m["title"] + ", " + m["dateOfPublication"])