-
Notifications
You must be signed in to change notification settings - Fork 1
/
gamma_agreement.py
executable file
·80 lines (60 loc) · 2.43 KB
/
gamma_agreement.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import sys
import pandas as pd
from inceptalytics.utils import construct_feature_path
from inceptalytics import Project
from time import time
from datetime import datetime
import argparse
from pathlib import Path
layer = "webanno.custom.SNOMEDEntity"
features = ['detail', 'value']
# Skip sentences for which gamma computation did not finish after 24 hrs
skip_sentences = [
'18_hodgkin-lymphom_0051.txt_1979-2462',
'22_prostatakarzinom_0153.txt_0-458'
]
def calculate_agreement(file):
results = []
print('> File:', file)
total_start_time = time()
project = Project.from_zipped_xmi(str(file))
selected_annotators = [a for a in project.annotators if a != 'annotator_jw']
for f in features:
print('>> Feature: ', f)
view = project.select(
annotation=construct_feature_path(layer, f),
annotators=selected_annotators,
source_files=project.source_file_names
)
view._annotation_dataframe = view._annotation_dataframe.query("not sentence in @skip_sentences")
n_docs = len(view._annotation_dataframe.index.get_level_values(0).unique())
n_sentences = len(view._annotation_dataframe.index.get_level_values(1).unique())
labels = [l for l in view.labels if l != 'None']
print(labels)
if not labels:
print('>>> Skipping empty layer')
continue
for l in (['all'] + labels):
print('>>> Label:', l)
start_time = time()
gamma = (view if l == 'all' else view.filter_labels(l)).iaa(measure='gamma')
results.append({
'file' : file.name,
'n_docs' : n_docs,
'n_sentences' : n_sentences,
'feature' : f,
'n_anno' : len(selected_annotators),
'label' : l,
'gamma' : gamma
})
end_time = time()
print(">>> %.2f seconds" % (end_time - start_time))
total_end_time = time()
print("> Total time: %.2f seconds" % (total_end_time - total_start_time))
gamma_agreements = pd.DataFrame(results)
gamma_agreements.to_csv(f'{file.name}_{datetime.now().strftime("%Y-%m-%d_%H_%M_%S")}_gamma_agreement_.csv')
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('filename')
args = parser.parse_args()
calculate_agreement(Path(args.filename))