-
Notifications
You must be signed in to change notification settings - Fork 191
/
main.py
165 lines (131 loc) · 6.65 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# #################################################################
# Deep Reinforcement Learning for Online Offloading in Wireless Powered Mobile-Edge Computing Networks
#
# This file contains the main code of DROO. It loads the training samples saved in ./data/data_#.mat, splits the samples into two parts (training and testing data constitutes 80% and 20%), trains the DNN with training and validation samples, and finally tests the DNN with test data.
#
# Input: ./data/data_#.mat
# Data samples are generated according to the CD method presented in [2]. There are 30,000 samples saved in each ./data/data_#.mat, where # is the user number. Each data sample includes
# -----------------------------------------------------------------
# | wireless channel gain | input_h |
# -----------------------------------------------------------------
# | computing mode selection | output_mode |
# -----------------------------------------------------------------
# | energy broadcasting parameter | output_a |
# -----------------------------------------------------------------
# | transmit time of wireless device | output_tau |
# -----------------------------------------------------------------
# | weighted sum computation rate | output_obj |
# -----------------------------------------------------------------
#
#
# References:
# [1] 1. Liang Huang, Suzhi Bi, and Ying-Jun Angela Zhang, "Deep Reinforcement Learning for Online Offloading in Wireless Powered Mobile-Edge Computing Networks," in IEEE Transactions on Mobile Computing, early access, 2019, DOI:10.1109/TMC.2019.2928811.
# [2] S. Bi and Y. J. Zhang, “Computation rate maximization for wireless powered mobile-edge computing with binary computation offloading,” IEEE Trans. Wireless Commun., vol. 17, no. 6, pp. 4177-4190, Jun. 2018.
#
# version 1.0 -- July 2018. Written by Liang Huang (lianghuang AT zjut.edu.cn)
# #################################################################
import scipy.io as sio # import scipy.io for .mat file I/
import numpy as np # import numpy
from memory import MemoryDNN
from optimization import bisection
import time
def plot_rate( rate_his, rolling_intv = 50):
import matplotlib.pyplot as plt
import pandas as pd
import matplotlib as mpl
rate_array = np.asarray(rate_his)
df = pd.DataFrame(rate_his)
mpl.style.use('seaborn')
fig, ax = plt.subplots(figsize=(15,8))
# rolling_intv = 20
plt.plot(np.arange(len(rate_array))+1, np.hstack(df.rolling(rolling_intv, min_periods=1).mean().values), 'b')
plt.fill_between(np.arange(len(rate_array))+1, np.hstack(df.rolling(rolling_intv, min_periods=1).min()[0].values), np.hstack(df.rolling(rolling_intv, min_periods=1).max()[0].values), color = 'b', alpha = 0.2)
plt.ylabel('Normalized Computation Rate')
plt.xlabel('Time Frames')
plt.show()
def save_to_txt(rate_his, file_path):
with open(file_path, 'w') as f:
for rate in rate_his:
f.write("%s \n" % rate)
if __name__ == "__main__":
'''
This algorithm generates K modes from DNN, and chooses with largest
reward. The mode with largest reward is stored in the memory, which is
further used to train the DNN.
Adaptive K is implemented. K = max(K, K_his[-memory_size])
'''
N = 10 # number of users
n = 30000 # number of time frames
K = N # initialize K = N
decoder_mode = 'OP' # the quantization mode could be 'OP' (Order-preserving) or 'KNN'
Memory = 1024 # capacity of memory structure
Delta = 32 # Update interval for adaptive K
print('#user = %d, #channel=%d, K=%d, decoder = %s, Memory = %d, Delta = %d'%(N,n,K,decoder_mode, Memory, Delta))
# Load data
channel = sio.loadmat('./data/data_%d' %N)['input_h']
rate = sio.loadmat('./data/data_%d' %N)['output_obj'] # this rate is only used to plot figures; never used to train DROO.
# increase h to close to 1 for better training; it is a trick widely adopted in deep learning
channel = channel * 1000000
# generate the train and test data sample index
# data are splitted as 80:20
# training data are randomly sampled with duplication if n > total data size
split_idx = int(.8* len(channel))
num_test = min(len(channel) - split_idx, n - int(.8* n)) # training data size
mem = MemoryDNN(net = [N, 120, 80, N],
learning_rate = 0.01,
training_interval=10,
batch_size=128,
memory_size=Memory
)
start_time=time.time()
rate_his = []
rate_his_ratio = []
mode_his = []
k_idx_his = []
K_his = []
for i in range(n):
if i % (n//10) == 0:
print("%0.1f"%(i/n))
if i> 0 and i % Delta == 0:
# index counts from 0
if Delta > 1:
max_k = max(k_idx_his[-Delta:-1]) +1;
else:
max_k = k_idx_his[-1] +1;
K = min(max_k +1, N)
if i < n - num_test:
# training
i_idx = i % split_idx
else:
# test
i_idx = i - n + num_test + split_idx
h = channel[i_idx,:]
# the action selection must be either 'OP' or 'KNN'
m_list = mem.decode(h, K, decoder_mode)
r_list = []
for m in m_list:
r_list.append(bisection(h/1000000, m)[0])
# encode the mode with largest reward
mem.encode(h, m_list[np.argmax(r_list)])
# the main code for DROO training ends here
# the following codes store some interested metrics for illustrations
# memorize the largest reward
rate_his.append(np.max(r_list))
rate_his_ratio.append(rate_his[-1] / rate[i_idx][0])
# record the index of largest reward
k_idx_his.append(np.argmax(r_list))
# record K in case of adaptive K
K_his.append(K)
mode_his.append(m_list[np.argmax(r_list)])
total_time=time.time()-start_time
mem.plot_cost()
plot_rate(rate_his_ratio)
print("Averaged normalized computation rate:", sum(rate_his_ratio[-num_test: -1])/num_test)
print('Total time consumed:%s'%total_time)
print('Average time per channel:%s'%(total_time/n))
# save data into txt
save_to_txt(k_idx_his, "k_idx_his.txt")
save_to_txt(K_his, "K_his.txt")
save_to_txt(mem.cost_his, "cost_his.txt")
save_to_txt(rate_his_ratio, "rate_his_ratio.txt")
save_to_txt(mode_his, "mode_his.txt")