-
Notifications
You must be signed in to change notification settings - Fork 5
/
tabular_transformer.py
103 lines (81 loc) · 3.06 KB
/
tabular_transformer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import numpy as np
import pandas as pd
CATEGORICAL = "categorical"
CONTINUOUS = "continuous"
class Transformer:
@staticmethod
def get_metadata(data, categorical_columns=tuple()):
meta = []
df = pd.DataFrame(data)
for index in df:
column = df[index]
if index in categorical_columns:
mapper = column.value_counts().index.tolist()
meta.append({
"name": index,
"type": CATEGORICAL,
"size": len(mapper),
"i2s": mapper
})
else:
meta.append({
"name": index,
"type": CONTINUOUS,
"min": column.min(),
"max": column.max(),
})
return meta
def fit(self, data, categorical_columns=tuple()):
raise NotImplementedError
def transform(self, data):
raise NotImplementedError
def inverse_transform(self, data):
raise NotImplementedError
class GeneralTransformer(Transformer):
def __init__(self, act='tanh'):
self.act = act
self.meta = None
self.output_dim = None
def fit(self, data, categorical_columns=tuple()):
self.meta = self.get_metadata(data, categorical_columns)
self.output_dim = 0
for info in self.meta:
if info['type'] in [CONTINUOUS]:
self.output_dim += 1
else:
self.output_dim += info['size']
def transform(self, data):
data_t = []
self.output_info = []
for id_, info in enumerate(self.meta):
col = data[:, id_]
if info['type'] == CONTINUOUS:
col = (col - (info['min'])) / (info['max'] - info['min'])
if self.act == 'tanh':
col = col * 2 - 1
data_t.append(col.reshape([-1, 1]))
self.output_info.append((1, self.act))
else:
col_t = np.zeros([len(data), info['size']])
idx = list(map(info['i2s'].index, col))
col_t[np.arange(len(data)), idx] = 1
data_t.append(col_t)
self.output_info.append((info['size'], 'softmax'))
return np.concatenate(data_t, axis=1)
def inverse_transform(self, data):
data_t = np.zeros([len(data), len(self.meta)])
data = data.copy()
for id_, info in enumerate(self.meta):
if info['type'] == CONTINUOUS:
current = data[:, 0]
data = data[:, 1:]
if self.act == 'tanh':
current = (current + 1) / 2
current = np.clip(current, 0, 1)
data_t[:, id_] = current * (info['max'] - info['min']) + info['min']
else:
current = data[:, :info['size']]
data = data[:, info['size']:]
idx = np.argmax(current, axis=1)
data_t[:, id_] = list(map(info['i2s'].__getitem__, idx))
return data_t