forked from hanzhaoml/MDAN
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main_alfalfa_DANN_PA.py
206 lines (189 loc) · 10 KB
/
main_alfalfa_DANN_PA.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
import time
import argparse
import pickle
import torch
import torch.optim as optim
import torch.nn.functional as F
from scipy.sparse import coo_matrix
from model import MDANet
from utils import get_logger
from utils import data_loader
from utils import multi_data_loader
from sklearn.metrics import mean_absolute_error
import math
parser = argparse.ArgumentParser()
parser.add_argument("-n", "--name", help="Name used to save the log file.", type=str, default="alfalfa_log")
parser.add_argument("-f", "--frac", help="Fraction of the supervised training data to be used.",
type=float, default=1.0)
parser.add_argument("-s", "--seed", help="Random seed.", type=int, default=42)
parser.add_argument("-v", "--verbose", help="Verbose mode: True -- show training progress. False -- "
"not show training progress.", type=bool, default=True)
parser.add_argument("-m", "--model", help="Choose a model to train: [mdan]",
type=str, default="mdan")
# The experimental setting of using 5000 dimensions of features is according to the papers in the literature.
parser.add_argument("-d", "--dimension", help="Number of features to be used in the experiment",
type=int, default=8)
parser.add_argument("-u", "--mu", help="Hyperparameter of the coefficient for the domain adversarial loss",
type=float, default=1e-1)
parser.add_argument("-e", "--epoch", help="Number of training epochs", type=int, default=500)
parser.add_argument("-b", "--batch_size", help="Batch size during training", type=int, default=50)
parser.add_argument("-o", "--mode", help="Mode of combination rule for MDANet: [maxmin|dynamic]", type=str, default="dynamic")
# Compile and configure all the model parameters.
args = parser.parse_args()
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
logger = get_logger(args.name)
# Set random number seed.
np.random.seed(args.seed)
torch.manual_seed(args.seed)
# Loading the randomly partition the amazon data set.
time_start = time.time()
alfalfa = np.load("./alfalfa_DANN_PA.npz", allow_pickle=True)
xColumnsToKeep = ["Julian Day", "Time Since Sown (Days)", "Total Radiation (MJ/m^2)",
"Total Rainfall (mm)", "Avg Air Temp (C)", "Avg Min Temp (C)", "Avg Max Temp (C)",
"Avg Soil Moisture (%)"]
alf_data = pd.read_csv("aggregateData_PA_WI_KY_MS_GA.csv")
alfalfa_offset = alfalfa['offset'].flatten()
alfalfa_xx = alf_data[xColumnsToKeep]
alfalfa_xx = alfalfa_xx.iloc[1:alfalfa_offset[2]]
alfalfa_yy = alf_data["Yield (tons/acre)"]
alfalfa_yy = alfalfa_yy.iloc[1:alfalfa_offset[2]]
time_end = time.time()
logger.info("Time used to process the Alfalfa data set = {} seconds.".format(time_end - time_start))
logger.info("Number of training instances = {}, number of features = {}."
.format(len(alfalfa_xx), len(xColumnsToKeep)))
#logger.info("Number of nonzero elements = {}".format(alfalfa_xx.nnz))
logger.info("alfalfa_xx shape = {}.".format(alfalfa_xx.shape))
logger.info("alfalfa_yy shape = {}.".format(alfalfa_yy.shape))
# Partition the data into four categories and for each category partition the data set into training and test set.
data_name = ["GA", "PA, WI, KY, MS"]
num_data_sets = 2
data_insts, data_labels, num_insts = [], [], []
for i in range(num_data_sets):
data_insts.append(alfalfa_xx[alfalfa_offset[i]: alfalfa_offset[i+1]])
data_labels.append(alfalfa_yy[alfalfa_offset[i]: alfalfa_offset[i+1]])
logger.info("Length of the {} data set label list = {}".format(
data_name[i],
len(alfalfa_yy[alfalfa_offset[i]: alfalfa_offset[i + 1]])
))
num_insts.append(alfalfa_offset[i+1] - alfalfa_offset[i])
# Randomly shuffle.
r_order = np.arange(num_insts[i])
np.random.shuffle(r_order)
#data_insts[i] = data_insts[i][r_order]
#data_labels[i] = data_labels[i][r_order]
logger.info("Data sets: {}".format(data_name))
logger.info("Number of total instances in the data sets: {}".format(num_insts))
# Partition the data set into training and test parts, following the convention in the ICML-2012 paper, use a fixed
# amount of instances as training and the rest as test.
# num_trains = int(2000 * args.frac)
# input_dim = amazon_xx.shape[1]
num_trains = len(alfalfa_xx)
#input_dim = 8
input_dim = alfalfa_xx.shape[1]
# The confusion matrix stores the prediction accuracy between the source and the target tasks. The row index the source
# task and the column index the target task.
results = {}
#logger.info("Training fraction = {}, number of actual training data instances = {}".format(args.frac, num_trains))
logger.info("-" * 100)
if args.model == "mdan":
configs = {"input_dim": input_dim, "hidden_layers": [64, 56, 48, 40, 32, 24, 16, 8], "num_classes": num_trains,
"num_epochs": args.epoch, "batch_size": args.batch_size, "lr": .0001, "mu": args.mu, "num_domains":
num_data_sets - 1, "mode": args.mode, "gamma": 10.0, "verbose": args.verbose}
num_epochs = configs["num_epochs"]
#batch_size = configs["batch_size"]
num_domains = configs["num_domains"]
lr = configs["lr"]
mu = configs["mu"]
gamma = configs["gamma"]
mode = configs["mode"]
logger.info("Training with domain adaptation using PyTorch DANN-R: ")
logger.info("Hyperparameter setting = {}.".format(configs))
error_dicts = {}
r2s = []
maes = []
for i in range(num_data_sets):
# Build source instances.
source_insts = []
source_labels = []
source_insts_built = False
for j in range(num_data_sets):
if j != i:
source_insts.append(data_insts[j][:num_trains].astype(np.float32))
source_labels.append(data_labels[j][:num_trains].astype(np.float32))
batch_size = len(data_insts[j])
else:
batch_size = len(data_insts[0])
# Build target instances
target_idx = i
num_trains = math.floor(data_insts[i].shape[0] * .90)
target_insts = data_insts[i].values[num_trains:, :].astype(np.float32)
target_labels = data_labels[i][num_trains:].astype(np.float32)
# Train DannNet.
mdan = MDANet(configs).to(device)
#optimizer = optim.Adadelta(mdan.parameters(), lr=lr)
optimizer = optim.SGD(mdan.parameters(), lr=lr)
mdan.train()
# Training phase.
time_start = time.time()
for t in range(num_epochs):
running_loss = 0.0
train_loader = multi_data_loader(source_insts, source_labels, batch_size)
for xs, ys in train_loader:
slabels = torch.ones(batch_size, requires_grad=True, dtype=torch.float32).to(device)
tlabels = torch.zeros(batch_size, requires_grad=True, dtype=torch.float32).to(device)
for j in range(num_domains):
xs[j] = torch.tensor(xs[j].values, requires_grad=True, dtype=torch.float32).to(device)
ys[j] = torch.tensor(ys[j].values, requires_grad=True, dtype=torch.float32).to(device)
#ridx = np.random.choice(target_insts.shape[1], batch_size)
ridx = np.random.choice(target_insts.shape[0], batch_size)
tinputs = target_insts[ridx, :]
tinputs = torch.tensor(tinputs, requires_grad=True, dtype=torch.float32).to(device)
optimizer.zero_grad()
logprobs, sdomains, tdomains = mdan(xs, tinputs)
# Compute prediction accuracy on multiple training sources.
losses = torch.stack([F.mse_loss(logprobs[j].reshape(-1), ys[j]) for j in range(num_domains)])
domain_losses = torch.stack([F.mse_loss(sdomains[j], slabels) +
F.mse_loss(tdomains[j], tlabels) for j in range(num_domains)])
# Different final loss function depending on different training modes.
if mode == "maxmin":
loss = torch.max(losses + mu * torch.min(domain_losses))
elif mode == "dynamic":
loss = torch.log(torch.sum(torch.exp(gamma * (losses + mu * domain_losses)))) / gamma
else:
raise ValueError("No support for the training mode on DANN-R: {}.".format(mode))
running_loss += loss.item()
loss.backward()
optimizer.step()
if t % 1000 == 0 or t == num_epochs:
logger.info("Iteration {}, loss = {}".format(t, running_loss))
time_end = time.time()
# Test on other domains.
mdan.eval()
target_insts = torch.tensor(target_insts, requires_grad=True, dtype=torch.float32).to(device)
target_labels = torch.tensor(target_labels.to_numpy())
preds_labels = torch.max(mdan.inference(target_insts), 1)[0].cpu().squeeze_()
error_dicts[data_name[i]] = preds_labels.detach().numpy() != target_labels.detach().numpy()
# next 3 lines borrowed from https://www.kite.com/python/answers/how-to-calculate-r-squared-with-numpy-in-python
correlation_matrix = np.corrcoef(target_labels.detach(), preds_labels.detach())
correlation_xy = correlation_matrix[0, 1]
r_squared = correlation_xy ** 2
r2s.append(r_squared)
mae = mean_absolute_error(target_labels.detach(), preds_labels.detach())
maes.append(mae)
logger.info("Prediction accuracy on {}: R2 = {}, MAE = {} time used = {} seconds.".
format(data_name[i], r_squared, mae, time_end - time_start))
results[data_name[i]] = r_squared
#print('R2 score: ', r_squared)
logger.info("Prediction accuracy with single source domain adaptation using DANN-R: ")
logger.info("R2s: ")
logger.info(r2s)
logger.info("MAEs: ")
logger.info(maes)
pickle.dump(error_dicts, open("{}-{}-{}-{}.pkl".format(args.name, args.frac, args.model, args.mode), "wb"))
logger.info("*" * 100)
else:
raise ValueError("No support for the following model: {}.".format(args.model))