Skip to content

Commit

Permalink
Merge pull request #494 from tam17aki/dep_kpca
Browse files Browse the repository at this point in the history
A bug fix for KPCA
  • Loading branch information
yzhao062 authored Nov 18, 2023
2 parents f697c57 + e78400f commit 8477a11
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 104 deletions.
154 changes: 70 additions & 84 deletions pyod/models/kpca.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
# License: BSD 2 clause

import numpy as np
import sklearn
from sklearn.decomposition import KernelPCA
from sklearn.utils import check_array, check_random_state
from sklearn.utils.validation import check_is_fitted
Expand All @@ -18,22 +17,22 @@ class PyODKernelPCA(KernelPCA):
"""A wrapper class for KernelPCA class of scikit-learn."""

def __init__(
self,
n_components=None,
kernel="rbf",
gamma=None,
degree=3,
coef0=1,
kernel_params=None,
alpha=1.0,
fit_inverse_transform=False,
eigen_solver="auto",
tol=0,
max_iter=None,
remove_zero_eig=False,
copy_X=True,
n_jobs=None,
random_state=None,
self,
n_components=None,
kernel="rbf",
gamma=None,
degree=3,
coef0=1,
kernel_params=None,
alpha=1.0,
fit_inverse_transform=False,
eigen_solver="auto",
tol=0,
max_iter=None,
remove_zero_eig=False,
copy_X=True,
n_jobs=None,
random_state=None,
):
super().__init__(
kernel=kernel,
Expand Down Expand Up @@ -198,53 +197,47 @@ class KPCA(BaseDetector):
"""

def __init__(
self,
contamination=0.1,
n_components=None,
n_selected_components=None,
kernel="rbf",
gamma=None,
degree=3,
coef0=1,
kernel_params=None,
alpha=1.0,
eigen_solver="auto",
tol=0,
max_iter=None,
remove_zero_eig=False,
copy_X=True,
n_jobs=None,
sampling=False,
subset_size=20,
random_state=None,
self,
contamination=0.1,
n_components=None,
n_selected_components=None,
kernel="rbf",
gamma=None,
degree=3,
coef0=1,
kernel_params=None,
alpha=1.0,
eigen_solver="auto",
tol=0,
max_iter=None,
remove_zero_eig=False,
copy_X=True,
n_jobs=None,
sampling=False,
subset_size=20,
random_state=None,
):
super().__init__(contamination=contamination)
self.n_components = n_components
self.n_selected_components = n_selected_components
self.copy_x = copy_X
self.kernel = kernel
self.gamma = gamma
self.degree = degree
self.coef0 = coef0
self.kernel_params = kernel_params
self.alpha = alpha
self.eigen_solver = eigen_solver
self.tol = tol
self.max_iter = max_iter
self.remove_zero_eig = remove_zero_eig
self.copy_X = copy_X
self.n_jobs = n_jobs
self.sampling = sampling
self.subset_size = subset_size
self.random_state = check_random_state(random_state)
self.decision_scores_ = None
self.n_selected_components_ = None

self.kpca = PyODKernelPCA(
n_components=n_components,
kernel=kernel,
gamma=gamma,
degree=degree,
coef0=coef0,
kernel_params=kernel_params,
alpha=alpha,
fit_inverse_transform=False,
eigen_solver=eigen_solver,
tol=tol,
max_iter=max_iter,
remove_zero_eig=remove_zero_eig,
copy_X=copy_X,
n_jobs=n_jobs,
)

def _check_subset_size(self, array):
"""Check subset size."""
n_samples, _ = array.shape
Expand Down Expand Up @@ -283,7 +276,7 @@ def fit(self, X, y=None):
"""

# validate inputs X and y (optional)
X = check_array(X, copy=self.copy_x)
X = check_array(X, copy=self.copy_X)
self._set_n_classes(y)

# perform subsampling to reduce time complexity
Expand All @@ -298,7 +291,7 @@ def fit(self, X, y=None):

# copy the attributes from the sklearn Kernel PCA object
if self.n_components is None:
n_components = X.shape[1] # use all dimensions
n_components = X.shape[0] # use all dimensions
else:
if self.n_components < 1:
raise ValueError(
Expand All @@ -320,20 +313,29 @@ def fit(self, X, y=None):
param_name="n_selected_components",
)

self.kpca.fit(X)
self.kpca = PyODKernelPCA(
n_components=self.n_components,
kernel=self.kernel,
gamma=self.gamma,
degree=self.degree,
coef0=self.coef0,
kernel_params=self.kernel_params,
alpha=self.alpha,
fit_inverse_transform=False,
eigen_solver=self.eigen_solver,
tol=self.tol,
max_iter=self.max_iter,
remove_zero_eig=self.remove_zero_eig,
copy_X=self.copy_X,
n_jobs=self.n_jobs,
random_state=self.random_state,
)
x_transformed = self.kpca.fit_transform(X)
x_transformed = x_transformed[:, : self.n_selected_components_]

centerer = self.kpca.get_centerer
kernel = self.kpca.get_kernel

if int(sklearn.__version__[0]) < 1:
eigenvalues_ = self.kpca.lambdas_
eigenvectors_ = self.kpca.alphas_
else:
eigenvalues_ = self.kpca.eigenvalues_
eigenvectors_ = self.kpca.eigenvectors_

x_transformed = eigenvectors_ * np.sqrt(eigenvalues_)
x_transformed = x_transformed[:, : self.n_selected_components_]

potential = []
for i in range(X.shape[0]):
sample = X[i, :].reshape(1, -1)
Expand Down Expand Up @@ -372,24 +374,8 @@ def decision_function(self, X):
centerer = self.kpca.get_centerer
kernel = self.kpca.get_kernel
gram_matrix = kernel(X, self.kpca.X_fit_)
centered_g = centerer.transform(gram_matrix)

if int(sklearn.__version__[0]) < 1:
eigenvalues_ = self.kpca.lambdas_
eigenvectors_ = self.kpca.alphas_
else:
eigenvalues_ = self.kpca.eigenvalues_
eigenvectors_ = self.kpca.eigenvectors_

# scale eigenvectors (properly account for null-space for dot product)
non_zeros = np.flatnonzero(eigenvalues_)
scaled_alphas = np.zeros_like(eigenvectors_)
scaled_alphas[:, non_zeros] = eigenvectors_[:, non_zeros] / np.sqrt(
eigenvalues_[non_zeros]
)

# Project with a scalar product between K and the scaled eigenvectors
x_transformed = np.dot(centered_g, scaled_alphas)
x_transformed = self.kpca.transform(X)
x_transformed = x_transformed[:, : self.n_selected_components_]

potential = []
Expand Down
22 changes: 2 additions & 20 deletions pyod/test/test_kpca.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ def setUp(self):

def test_parameters(self):
assert (
hasattr(self.clf, "decision_scores_")
and self.clf.decision_scores_ is not None
hasattr(self.clf, "decision_scores_")
and self.clf.decision_scores_ is not None
)
assert hasattr(self.clf, "labels_") and self.clf.labels_ is not None
assert hasattr(self.clf, "threshold_") and self.clf.threshold_ is not None
Expand Down Expand Up @@ -108,24 +108,6 @@ def test_fit_predict_score(self):
with assert_raises(NotImplementedError):
self.clf.fit_predict_score(self.X_test, self.y_test, scoring="something")

def test_predict_rank(self):
pred_socres = self.clf.decision_function(self.X_test)
pred_ranks = self.clf._predict_rank(self.X_test)

# assert the order is reserved
assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=4)
assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
assert_array_less(-0.1, pred_ranks)

def test_predict_rank_normalized(self):
pred_socres = self.clf.decision_function(self.X_test)
pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)

# assert the order is reserved
assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=4)
assert_array_less(pred_ranks, 1.01)
assert_array_less(-0.1, pred_ranks)

def test_model_clone(self):
clone_clf = clone(self.clf)

Expand Down

0 comments on commit 8477a11

Please sign in to comment.