-
Notifications
You must be signed in to change notification settings - Fork 2
/
rnn_train_sc.py
143 lines (114 loc) · 4.87 KB
/
rnn_train_sc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
'''
train mdn-rnn from pre-processed data.
also save 1000 initial mu and logvar, for generative experiments (not related to training).
'''
import numpy as np
import os
import json
import tensorflow as tf
import random
import time
from vae.vae import ConvVAE, reset_graph
from rnn.rnn import HyperParams, MDNRNN
os.environ["CUDA_VISIBLE_DEVICES"]="0"
np.set_printoptions(precision=4, edgeitems=6, linewidth=100, suppress=True)
F_MODE_I_N = 0 # combine non-image feature and image feature
F_MODE_N = 1 # only non-image feature obs
F_MODE_I = 2 # only image obs
ACTION_SPACE = 10
SIZE_1 = 32 # image latent size
SIZE_2 = 20 # non-image obs feature size
THE_MODE = F_MODE_I_N
DATA_DIR = "series"
model_save_path = "tf_rnn"
if not os.path.exists(model_save_path):
os.makedirs(model_save_path)
initial_z_save_path = "tf_initial_z"
if not os.path.exists(initial_z_save_path):
os.makedirs(initial_z_save_path)
def get_one_hot(targets, nb_classes):
res = np.eye(nb_classes)[np.array(targets).reshape(-1)]
return res.reshape(list(targets.shape)+[nb_classes])
def random_batch(mode=THE_MODE):
indices = np.random.permutation(N_data)[0:batch_size]
nf = data_non_image_feature[indices]
mu = data_mu[indices]
logvar = data_logvar[indices]
action = data_action[indices]
s = logvar.shape
img = mu + np.exp(logvar/2.0) * np.random.randn(*s)
assert img.shape[:-1] == nf.shape[:-1]
if mode == F_MODE_I_N:
z = np.concatenate([img, nf], axis=-1)
elif mode == F_MODE_N:
z = nf
elif mode == F_MODE_I:
z = img
else:
z = np.concatenate([img, nf], axis=-1)
return z, action
def default_hps():
return HyperParams(num_steps=50000,
max_seq_len=299, # train on sequences of 1000 (so 999 + teacher forcing shift)
input_seq_width=SIZE_1+SIZE_2+ACTION_SPACE, # width of our data (32 + 20 + 10 actions)
output_seq_width=SIZE_1+SIZE_2, # width of our data is 32 + 20
rnn_size=256, # number of rnn cells
batch_size=100, # minibatch sizes
grad_clip=1.0,
num_mixture=5, # number of mixtures in MDN
learning_rate=0.001,
decay_rate=1.0,
min_learning_rate=0.00001,
use_layer_norm=0, # set this to 1 to get more stable results (less chance of NaN), but slower
use_recurrent_dropout=0,
recurrent_dropout_prob=0.90,
use_input_dropout=0,
input_dropout_prob=0.90,
use_output_dropout=0,
output_dropout_prob=0.90,
is_training=1)
hps_model = default_hps()
hps_sample = hps_model._replace(batch_size=1, max_seq_len=1, use_recurrent_dropout=0, is_training=0)
raw_data = np.load(os.path.join(DATA_DIR, "series_10000.npz"))
# load preprocessed data and change data type
data_non_image_feature = raw_data["obs"]
#print(type(data_non_image_feature[0]))
data_mu = raw_data["mu"]
#print(data_mu[:1000, 0, :])
data_logvar = raw_data["logvar"]
#print(data_logvar[0].shape)
#print(raw_data["action"][15, 24])
data_action = get_one_hot(raw_data["action"], ACTION_SPACE)
max_seq_len = hps_model.max_seq_len
N_data = len(data_mu) # should be 10k
batch_size = hps_model.batch_size
# save 1000 initial mu and logvars:
#
'''
initial_non_image_feature = np.copy(data_non_image_feature[:1000, 0, :]*10000).astype(np.int).tolist()
initial_mu = np.copy(data_mu[:1000, 0, :]*10000).astype(np.int).tolist()
initial_logvar = np.copy(data_logvar[:1000, 0, :]*10000).astype(np.int).tolist()
with open(os.path.join("tf_initial_z", "initial_z.json"), 'wt') as outfile:
json.dump([initial_non_image_feature, initial_mu, initial_logvar], outfile, sort_keys=True, indent=0, separators=(',', ': '))
'''
reset_graph()
rnn = MDNRNN(hps_model)
# train loop:
hps = hps_model
start = time.time()
for local_step in range(hps.num_steps):
step = rnn.sess.run(rnn.global_step)
curr_learning_rate = (hps.learning_rate-hps.min_learning_rate) * (hps.decay_rate) ** step + hps.min_learning_rate
raw_z, raw_a = random_batch()
inputs = np.concatenate((raw_z[:, :-1, :], raw_a[:, :-1, :]), axis=2)
outputs = raw_z[:, 1:, :] # teacher forcing (shift by one predictions)
feed = {rnn.input_x: inputs, rnn.output_x: outputs, rnn.lr: curr_learning_rate}
(train_cost, state, train_step, _) = rnn.sess.run([rnn.cost, rnn.final_state, rnn.global_step, rnn.train_op], feed)
if (step%20==0 and step > 0):
end = time.time()
time_taken = end-start
start = time.time()
output_log = "step: %d, lr: %.6f, cost: %.4f, train_time_taken: %.4f" % (step, curr_learning_rate, train_cost, time_taken)
print(output_log)
# save the model (don't bother with tf checkpoints json all the way ...)
rnn.save_json(os.path.join(model_save_path, "rnn.json"))