-
Notifications
You must be signed in to change notification settings - Fork 0
/
loss_function.py
147 lines (118 loc) · 5.13 KB
/
loss_function.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
"""
Created on :2019/03/30
@author: Freeman, foreverfc1994
"""
import math
import abc
class LossFunction(metaclass=abc.ABCMeta):
@abc.abstractmethod
def initialize_f_0(self, data):
"""初始化 F_0 """
@abc.abstractmethod
def calculate_residual(self, data, iter):
"""计算负梯度"""
@abc.abstractmethod
def update_f_m(self, data, trees, iter, learning_rate, logger):
"""计算 F_m """
@abc.abstractmethod
def update_leaf_values(self, targets, y):
"""更新叶子节点的预测值"""
@abc.abstractmethod
def get_train_loss(self, y, f, iter, logger):
"""计算训练损失"""
class SquaresError(LossFunction):
def initialize_f_0(self, data):
data['f_0'] = data['labelsuper'].mean()
return data['label'].mean()
def calculate_residual(self, data, iter):
res_name = 'res_' + str(iter)
f_prev_name = 'f_' + str(iter - 1)
data[res_name] = data['label'] - data[f_prev_name]
def update_f_m(self, data, trees, iter, learning_rate, logger):
f_prev_name = 'f_' + str(iter - 1)
f_m_name = 'f_' + str(iter)
data[f_m_name] = data[f_prev_name]
for leaf_node in trees[iter].leaf_nodes:
data.loc[leaf_node.data_index, f_m_name] += learning_rate * leaf_node.predict_value
# 打印每棵树的 train loss
self.get_train_loss(data['label'], data[f_m_name], iter, logger)
def update_leaf_values(self, targets, y):
return targets.mean()
def get_train_loss(self, y, f, iter, logger):
loss = ((y - f) ** 2).mean()
logger.info(('第%d棵树: mse_loss:%.4f' % (iter, loss)))
class BinomialDeviance(LossFunction):
def initialize_f_0(self, data):
pos = data['label'].sum()
neg = data.shape[0] - pos
# 此处log是以e为底,也就是ln
f_0 = math.log(pos / neg)
data['f_0'] = f_0
return f_0
def calculate_residual(self, data, iter):
# calculate negative gradient
res_name = 'res_' + str(iter)
f_prev_name = 'f_' + str(iter - 1)
data[res_name] = data['label'] - 1 / (1 + data[f_prev_name].apply(lambda x: math.exp(-x)))
def update_f_m(self, data, trees, iter, learning_rate, logger):
f_prev_name = 'f_' + str(iter - 1)
f_m_name = 'f_' + str(iter)
data[f_m_name] = data[f_prev_name]
for leaf_node in trees[iter].leaf_nodes:
data.loc[leaf_node.data_index, f_m_name] += learning_rate * leaf_node.predict_value
# 打印每棵树的 train loss
self.get_train_loss(data['label'], data[f_m_name], iter, logger)
def update_leaf_values(self, targets, y):
numerator = targets.sum()
if numerator == 0:
return 0.0
denominator = ((y - targets) * (1 - y + targets)).sum()
if abs(denominator) < 1e-150:
return 0.0
else:
return numerator / denominator
def get_train_loss(self, y, f, iter, logger):
loss = -2.0 * ((y * f) - f.apply(lambda x: math.exp(1+x))).mean()
logger.info(('第%d棵树: log-likelihood:%.4f' % (iter, loss)))
class MultinomialDeviance:
def init_classes(self, classes):
self.classes = classes
@abc.abstractmethod
def initialize_f_0(self, data, class_name):
label_name = 'label_' + class_name
f_name = 'f_' + class_name + '_0'
class_counts = data[label_name].sum()
f_0 = class_counts / len(data)
data[f_name] = f_0
return f_0
def calculate_residual(self, data, iter):
# calculate negative gradient
data['sum_exp'] = data.apply(lambda x:
sum([math.exp(x['f_' + i + '_' + str(iter - 1)]) for i in self.classes]),
axis=1)
for class_name in self.classes:
label_name = 'label_' + class_name
res_name = 'res_' + class_name + '_' + str(iter)
f_prev_name = 'f_' + class_name + '_' + str(iter - 1)
data[res_name] = data[label_name] - math.e ** data[f_prev_name] / data['sum_exp']
def update_f_m(self, data, trees, iter, class_name, learning_rate, logger):
f_prev_name = 'f_' + class_name + '_' + str(iter - 1)
f_m_name = 'f_' + class_name + '_' + str(iter)
data[f_m_name] = data[f_prev_name]
for leaf_node in trees[iter][class_name].leaf_nodes:
data.loc[leaf_node.data_index, f_m_name] += learning_rate * leaf_node.predict_value
# 打印每棵树的 train loss
self.get_train_loss(data['label'], data[f_m_name], iter, logger)
def update_leaf_values(self, targets, y):
numerator = targets.sum()
if numerator == 0:
return 0.0
numerator *= (self.classes.size - 1) / self.classes.size
denominator = ((y - targets) * (1 - y + targets)).sum()
if abs(denominator) < 1e-150:
return 0.0
else:
return numerator / denominator
def get_train_loss(self, y, f, iter, logger):
loss = -2.0 * ((y * f) - f.apply(lambda x: math.exp(1+x))).mean()
logger.info(('第%d棵树: log-likelihood:%.4f' % (iter, loss)))