Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

A bug fix for KPCA #494

Merged
merged 5 commits into from
Nov 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
154 changes: 70 additions & 84 deletions pyod/models/kpca.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
# License: BSD 2 clause

import numpy as np
import sklearn
from sklearn.decomposition import KernelPCA
from sklearn.utils import check_array, check_random_state
from sklearn.utils.validation import check_is_fitted
Expand All @@ -18,22 +17,22 @@ class PyODKernelPCA(KernelPCA):
"""A wrapper class for KernelPCA class of scikit-learn."""

def __init__(
self,
n_components=None,
kernel="rbf",
gamma=None,
degree=3,
coef0=1,
kernel_params=None,
alpha=1.0,
fit_inverse_transform=False,
eigen_solver="auto",
tol=0,
max_iter=None,
remove_zero_eig=False,
copy_X=True,
n_jobs=None,
random_state=None,
self,
n_components=None,
kernel="rbf",
gamma=None,
degree=3,
coef0=1,
kernel_params=None,
alpha=1.0,
fit_inverse_transform=False,
eigen_solver="auto",
tol=0,
max_iter=None,
remove_zero_eig=False,
copy_X=True,
n_jobs=None,
random_state=None,
):
super().__init__(
kernel=kernel,
Expand Down Expand Up @@ -198,53 +197,47 @@ class KPCA(BaseDetector):
"""

def __init__(
self,
contamination=0.1,
n_components=None,
n_selected_components=None,
kernel="rbf",
gamma=None,
degree=3,
coef0=1,
kernel_params=None,
alpha=1.0,
eigen_solver="auto",
tol=0,
max_iter=None,
remove_zero_eig=False,
copy_X=True,
n_jobs=None,
sampling=False,
subset_size=20,
random_state=None,
self,
contamination=0.1,
n_components=None,
n_selected_components=None,
kernel="rbf",
gamma=None,
degree=3,
coef0=1,
kernel_params=None,
alpha=1.0,
eigen_solver="auto",
tol=0,
max_iter=None,
remove_zero_eig=False,
copy_X=True,
n_jobs=None,
sampling=False,
subset_size=20,
random_state=None,
):
super().__init__(contamination=contamination)
self.n_components = n_components
self.n_selected_components = n_selected_components
self.copy_x = copy_X
self.kernel = kernel
self.gamma = gamma
self.degree = degree
self.coef0 = coef0
self.kernel_params = kernel_params
self.alpha = alpha
self.eigen_solver = eigen_solver
self.tol = tol
self.max_iter = max_iter
self.remove_zero_eig = remove_zero_eig
self.copy_X = copy_X
self.n_jobs = n_jobs
self.sampling = sampling
self.subset_size = subset_size
self.random_state = check_random_state(random_state)
self.decision_scores_ = None
self.n_selected_components_ = None

self.kpca = PyODKernelPCA(
n_components=n_components,
kernel=kernel,
gamma=gamma,
degree=degree,
coef0=coef0,
kernel_params=kernel_params,
alpha=alpha,
fit_inverse_transform=False,
eigen_solver=eigen_solver,
tol=tol,
max_iter=max_iter,
remove_zero_eig=remove_zero_eig,
copy_X=copy_X,
n_jobs=n_jobs,
)

def _check_subset_size(self, array):
"""Check subset size."""
n_samples, _ = array.shape
Expand Down Expand Up @@ -283,7 +276,7 @@ def fit(self, X, y=None):
"""

# validate inputs X and y (optional)
X = check_array(X, copy=self.copy_x)
X = check_array(X, copy=self.copy_X)
self._set_n_classes(y)

# perform subsampling to reduce time complexity
Expand All @@ -298,7 +291,7 @@ def fit(self, X, y=None):

# copy the attributes from the sklearn Kernel PCA object
if self.n_components is None:
n_components = X.shape[1] # use all dimensions
n_components = X.shape[0] # use all dimensions
else:
if self.n_components < 1:
raise ValueError(
Expand All @@ -320,20 +313,29 @@ def fit(self, X, y=None):
param_name="n_selected_components",
)

self.kpca.fit(X)
self.kpca = PyODKernelPCA(
n_components=self.n_components,
kernel=self.kernel,
gamma=self.gamma,
degree=self.degree,
coef0=self.coef0,
kernel_params=self.kernel_params,
alpha=self.alpha,
fit_inverse_transform=False,
eigen_solver=self.eigen_solver,
tol=self.tol,
max_iter=self.max_iter,
remove_zero_eig=self.remove_zero_eig,
copy_X=self.copy_X,
n_jobs=self.n_jobs,
random_state=self.random_state,
)
x_transformed = self.kpca.fit_transform(X)
x_transformed = x_transformed[:, : self.n_selected_components_]

centerer = self.kpca.get_centerer
kernel = self.kpca.get_kernel

if int(sklearn.__version__[0]) < 1:
eigenvalues_ = self.kpca.lambdas_
eigenvectors_ = self.kpca.alphas_
else:
eigenvalues_ = self.kpca.eigenvalues_
eigenvectors_ = self.kpca.eigenvectors_

x_transformed = eigenvectors_ * np.sqrt(eigenvalues_)
x_transformed = x_transformed[:, : self.n_selected_components_]

potential = []
for i in range(X.shape[0]):
sample = X[i, :].reshape(1, -1)
Expand Down Expand Up @@ -372,24 +374,8 @@ def decision_function(self, X):
centerer = self.kpca.get_centerer
kernel = self.kpca.get_kernel
gram_matrix = kernel(X, self.kpca.X_fit_)
centered_g = centerer.transform(gram_matrix)

if int(sklearn.__version__[0]) < 1:
eigenvalues_ = self.kpca.lambdas_
eigenvectors_ = self.kpca.alphas_
else:
eigenvalues_ = self.kpca.eigenvalues_
eigenvectors_ = self.kpca.eigenvectors_

# scale eigenvectors (properly account for null-space for dot product)
non_zeros = np.flatnonzero(eigenvalues_)
scaled_alphas = np.zeros_like(eigenvectors_)
scaled_alphas[:, non_zeros] = eigenvectors_[:, non_zeros] / np.sqrt(
eigenvalues_[non_zeros]
)

# Project with a scalar product between K and the scaled eigenvectors
x_transformed = np.dot(centered_g, scaled_alphas)
x_transformed = self.kpca.transform(X)
x_transformed = x_transformed[:, : self.n_selected_components_]

potential = []
Expand Down
22 changes: 2 additions & 20 deletions pyod/test/test_kpca.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ def setUp(self):

def test_parameters(self):
assert (
hasattr(self.clf, "decision_scores_")
and self.clf.decision_scores_ is not None
hasattr(self.clf, "decision_scores_")
and self.clf.decision_scores_ is not None
)
assert hasattr(self.clf, "labels_") and self.clf.labels_ is not None
assert hasattr(self.clf, "threshold_") and self.clf.threshold_ is not None
Expand Down Expand Up @@ -108,24 +108,6 @@ def test_fit_predict_score(self):
with assert_raises(NotImplementedError):
self.clf.fit_predict_score(self.X_test, self.y_test, scoring="something")

def test_predict_rank(self):
pred_socres = self.clf.decision_function(self.X_test)
pred_ranks = self.clf._predict_rank(self.X_test)

# assert the order is reserved
assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=4)
assert_array_less(pred_ranks, self.X_train.shape[0] + 1)
assert_array_less(-0.1, pred_ranks)

def test_predict_rank_normalized(self):
pred_socres = self.clf.decision_function(self.X_test)
pred_ranks = self.clf._predict_rank(self.X_test, normalized=True)

# assert the order is reserved
assert_allclose(rankdata(pred_ranks), rankdata(pred_socres), atol=4)
assert_array_less(pred_ranks, 1.01)
assert_array_less(-0.1, pred_ranks)

def test_model_clone(self):
clone_clf = clone(self.clf)

Expand Down
Loading