Skip to content

Commit

Permalink
convenience fit function
Browse files Browse the repository at this point in the history
  • Loading branch information
abrahamq committed Aug 22, 2019
1 parent d889c80 commit 68dbcf8
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 4 deletions.
37 changes: 33 additions & 4 deletions learners/ensemble_learner.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import os
import pandas as pd

# import img_util as img_util

import simple_nn as nn

Expand All @@ -13,7 +12,7 @@

class EnsembleLearner():

def __init__(self, config, names, learners):
def __init__(self, config, names, learners, hidden=10):
"""
Args:
learners: list of instances of PerceptronLearners
Expand All @@ -23,6 +22,7 @@ def __init__(self, config, names, learners):
self.logger = config["logger"]
self.learners = learners
self.names = names
self.hidden = hidden
self.models = []
pass

Expand Down Expand Up @@ -59,6 +59,23 @@ def _fill_no_data_spots(self, data_list, label_list):
print(res)
return res.join(lab_df)

def fit(self, X, y):
"""
Args:
X (np.array) of [n_samples, n_features]
y (np.array) of [n_samples]
where each member is -1 or +1
Returns:
self (object)
"""
t_full_matrix = torch.from_numpy(X).float()
# sklearn wants -1, 1 class labels but torch expects 0, 1
into_zeros = np.where(y < 0, 0, 1)
t_full_labels = torch.from_numpy(into_zeros).long()
self.nn_model = nn.Simple_nn(X.shape[0], self.hidden)
nn.run_training(self.nn_model, t_full_matrix, t_full_labels)
return self

def train(self, params, validation_keys):
"""
Runs all learners
Expand Down Expand Up @@ -116,8 +133,9 @@ def train(self, params, validation_keys):
into_zeros = np.where(self.t_labels < 0, 0, 1)
t_full_labels = torch.from_numpy(into_zeros).long()

hidden_layers = params["hidden"]
nn_model = nn.Simple_nn(len(self.models), hidden_layers)
self.hidden_layers = params["hidden"]
nn_model = nn.Simple_nn(len(self.models), self.hidden_layers)
self.nn_model = nn_model
# nn_model = nn.Simple_nn(len(self.models), hidden_layers)
nn.run_training(nn_model, t_full_matrix, t_full_labels)

Expand Down Expand Up @@ -145,6 +163,17 @@ def dump_model_to_disk(self, model, filename="ensemble_learner_default.p"):
pickle.dump(model, open(path, "wb"))
return

def predict(self, datapoint):
import math
logSoftmaxOutput = self.nn_model(datapoint)
probs = math.e**logSoftmaxOutput
p = probs.data.numpy()

predicted = np.argmax(p, axis=1) # which index is greater
# now from index to -1, 1
to_class_label = np.where(predicted == 0, -1, 1)
return to_class_label

def load_model_from_disk(self, filename="perceptron_default.p"):
path = os.path.join(self.data_folder_prefix, filename)
self.logger.debug("loading from: " + str(path))
Expand Down
36 changes: 36 additions & 0 deletions learners/svm_learner.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,3 +111,39 @@ def predict(self, datapoint):
self.logger.warn("Should have called train first!")
return
return self.clf.predict(datapoint)

def cross_validate_model(self,
data,
labels,
k=5,
params={
"T": 1000,
"print": False
}
):
"""
Rnadomly shuffles data and labels into
k many groups. Trains on k-1 and then test
on left out group.
Args:
data (np.ndarray)
labels (np.ndarray)
k (int)
How many ways to split the data
Returns:
(mean, std): tuple of floats
the mean and standard deviation
of cross validation.
"""
if not self.clf:
self.logger.error("Called cross validate"
"before training in"
"SvmLearner!")
return
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import ShuffleSplit
# make sure to shuffle the data!
cv = ShuffleSplit(n_splits=k, test_size=.10)
scores = cross_val_score(self.clf, data, labels, cv=cv)
self.scores = scores
return (scores.mean(), scores.std())

0 comments on commit 68dbcf8

Please sign in to comment.