forked from roman807/Fraud-Detection
-
Notifications
You must be signed in to change notification settings - Fork 0
/
eval_results.py
63 lines (57 loc) · 2.95 KB
/
eval_results.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
# Roman Moser, 3/28/19
"""
eval_results.py
* calculate costs and max_costs
* evaluate results (called from main.py -> print results to console)
* define score_and_costs for results.ipynb
"""
import numpy as np
from sklearn.metrics import f1_score
def costs(y_true, y_pred, cost_mat):
cost = y_true * ((1 - y_pred) * cost_mat[:, 1] + y_pred * cost_mat[:, 2]) +\
(1 - y_true) * (y_pred * cost_mat[:, 0] + (1 - y_pred) * cost_mat[:, 3])
return np.round(sum(cost), 2)
def max_costs(y_true, cost_mat):
cost_all_neg = sum(y_true * cost_mat[:, 1] + (1 - y_true) * cost_mat[:, 3])
return cost_all_neg
def evaluate(name, y_train, y_test, y_pred_train, y_pred_test, cost_mat_train, cost_mat_test):
data = zip(y_train, y_test, y_pred_train, y_pred_test, cost_mat_train, cost_mat_test)
pos_train, pos_test = [], []
f1_train, f1_test = [], []
cost_savings_train, cost_savings_test = [], []
for y_train, y_test, y_pred_train, y_pred_test, cost_mat_train, cost_mat_test in data:
pos_train.append(np.round(y_pred_train.sum() / y_pred_train.shape[0], 3))
pos_test.append(np.round(y_pred_test.sum() / y_pred_test.shape[0], 3))
f1_train.append(np.round(f1_score(y_train, y_pred_train), 3))
f1_test.append(np.round(f1_score(y_test, y_pred_test), 3))
max_cost_train = max_costs(y_train, cost_mat_train)
cost_train = costs(y_train, y_pred_train, cost_mat_train)
cost_savings_train.append(np.round((max_cost_train - cost_train) / max_cost_train, 3))
max_cost_test = max_costs(y_test, cost_mat_test)
cost_test = costs(y_test, y_pred_test, cost_mat_test)
cost_savings_test.append(np.round((max_cost_test - cost_test) / max_cost_test, 3))
print('--------------------')
print(name)
print('Positives train ' + str(np.round(np.mean(pos_train), 3)) +
', individual: ' + str(pos_train))
print('Positives test: ' + str(np.round(np.mean(pos_test), 3)) +
', individual: ' + str(pos_test))
print('F1-score train: ' + str(np.round(np.mean(f1_train), 3)) +
', individual: ' + str(f1_train))
print('F1-score test: ' + str(np.round(np.mean(f1_test), 3)) +
', individual: ' + str(f1_test))
print('Cost savings train: ' + str(np.round(np.mean(cost_savings_train), 3)) +
', individual: ' + str(cost_savings_train))
print('Cost savings test: ' + str(np.round(np.mean(cost_savings_test), 3)) +
', individual: ' + str(cost_savings_test))
def scores_and_costs(y_true_l, y_pred_l, cost_mat):
f1_scores, cost_savings = [], []
for y_true, y_pred, cost_mat in zip(y_true_l, y_pred_l, cost_mat):
f1_scores.append(np.round(f1_score(y_true, y_pred), 3))
max_costs_ = max_costs(y_true, cost_mat)
costs_ = costs(y_true, y_pred, cost_mat)
cost_savings.append(np.round((max_costs_ - costs_) / max_costs_, 3))
return f1_scores, cost_savings