-
Notifications
You must be signed in to change notification settings - Fork 6
/
crossvalidation_extra_tree_ensemble.m
128 lines (85 loc) · 3.43 KB
/
crossvalidation_extra_tree_ensemble.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
function [model] = crossvalidation_extra_tree_ensemble(subset,M,k,nmin,ns,flag)
% This function cross-validate an ensemble of Exra-Trees
%
% Inputs:
% subset = observations
% M = number of trees in the ensemble
% k = number of random cut-directions
% nmin = minimum number of points per leaf
% ns = number of folds in the k-fold cross-validation process
% flag = if flag == 1, the model is then evaluated (and saved) on the
% full dataset
%
% Output:
% model = structure containing models and performance
% 0) SET THE PROBLEM PARAMETERS FOR THE ENSEMBLE CROSS-VALIDATION
% Number of lines characterizing an alternative (a single fold)
l = floor(length(subset)/ns);
% Re-define the subset matrix
subset = subset(1:l*ns,:);
% C-ExtraTrees algorithm parameters
rtensparam=init_extra_trees();
rtensparam.nbterms=M;
rtensparam.rtparam.extratreesk=k;
rtensparam.rtparam.nmin = nmin;
% 1) INITIALIZATION OF THE OUTPUT VARIABLES
% Initialize R2 AND RMSE VECTORS
Rt2_cal_pred = zeros(ns,1);
Rt2_val_pred = zeros(ns,1);
% Initialize the function output
model.cross_validation.performance = [];
% 2) MODEL CONSTRUCTION AND EVALUATION OF THE PERFORMANCES (k-fold cross-validation)
% Counter
% disp('Start cross-validation:')
for i = 1:ns
% Counter
% disp('Start cross-validation:'); disp(i);
% Define the calibration and validation data-set
% Calibration
if (i > 1) && (i < ns)
subset_tar = [subset(i*l+1:end,:) ; subset(1:(i-1)*l,:)];
else if i == 1
subset_tar = subset(i*l+1:end,:);
else
subset_tar = subset(1:(i-1)*l,:);
end
end
% Validation
subset_val = subset((i-1)*l+1:i*l,:);
% datasets
X1 = single(subset_tar(:,1:end-1));
Y1 = single(subset_tar(:,end));
ls1 = int32(1:size(subset_tar,1));
X2 = single(subset_val(:,1:end-1));
% Ensemble building + test the ensemble on the calibration and validation dataset
evalc('[finalResult_val_pred temp1 temp2 finalResult_cal_pred] = rtenslearn_c(X1,Y1,ls1,[],rtensparam,X2,0)');
Rt2_cal_pred(i) = Rt2_fit(subset_tar(:,end),finalResult_cal_pred);
Rt2_val_pred(i) = Rt2_fit(subset_val(:,end),finalResult_val_pred);
end
% Average R2
model.cross_validation.performance.Rt2_cal_pred = Rt2_cal_pred;
model.cross_validation.performance.Rt2_val_pred = Rt2_val_pred;
model.cross_validation.performance.Rt2_cal_pred_mean = mean(Rt2_cal_pred);
model.cross_validation.performance.Rt2_val_pred_mean = mean(Rt2_val_pred);
% 3) MODEL CONSTRUCTION ON THE WHOLE DATA-SET
% Check if is necessary to test the model on the whole data-set
if flag == 1
% Add new fields to the function output
model.complete_model.ensemble = [];
model.complete_model.trajectories = [];
model.complete_model.performance = [];
% Counter
% disp('Building and testing the final model');
% Model construction
X1 = single(subset(:,1:end-1));
Y1 = single(subset(:,end));
ls1 = int32(1:size(subset,1));
evalc('[temp0 temp1 ensemble finalResult_pred] = rtenslearn_c(X1,Y1,ls1,[],rtensparam,X2,0)');
model.complete_model.ensemble = ensemble;
model.complete_model.trajectories = finalResult_pred;
% Evaluate R2
model.complete_model.performance.Rt2 = Rt2_fit(subset(:,end),finalResult_pred);
else
return
end
% This code has been written by Stefano Galelli and Riccardo Taormina.