Exemplo n.º 1
0
    def fit(self, X, y=None):
        # Validate inputs X and y (optional)
        X = check_array(X)
        self._set_n_classes(y)

        # PCA is recommended to use on the standardized data (zero mean and
        # unit variance).
        if self.standardization:
            self.scaler_ = StandardScaler().fit(X)
            X = self.scaler_.transform(X)

        self.detector_ = sklearn_PCA(n_components=self.n_components,
                                     copy=self.copy,
                                     whiten=self.whiten,
                                     svd_solver=self.svd_solver,
                                     tol=self.tol,
                                     iterated_power=self.iterated_power,
                                     random_state=self.random_state)
        self.detector_.fit(X=X, y=y)

        # copy the attributes from the sklearn PCA object
        self.n_components_ = self.detector_.n_components_
        self.components_ = self.detector_.components_

        # validate the number of components to be used for outlier detection
        if self.n_selected_components is None:
            self.n_selected_components_ = self.n_components_
        else:
            self.n_selected_components_ = self.n_selected_components
        check_parameter(self.n_selected_components_,
                        1,
                        self.n_components_,
                        include_left=True,
                        include_right=True,
                        param_name='n_selected_components_')

        # use eigenvalues as the weights of eigenvectors
        self.w_components_ = np.ones([
            self.n_components_,
        ])
        if self.weighted:
            self.w_components_ = self.detector_.explained_variance_ratio_

        # outlier scores is the sum of the weighted distances between each
        # sample to the eigenvectors. The eigenvectors with smaller
        # eigenvalues have more influence
        # Not all eigenvectors are used, only n_selected_components_ smallest
        # are used since they better reflect the variance change

        self.selected_components_ = self.components_[
            -1 * self.n_selected_components_:, :]
        self.selected_w_components_ = self.w_components_[
            -1 * self.n_selected_components_:]

        self.decision_scores_ = np.sum(cdist(X, self.selected_components_) /
                                       self.selected_w_components_,
                                       axis=1).ravel()

        self._process_decision_scores()
        return self
Exemplo n.º 2
0
Arquivo: pca.py Projeto: deltat99/Pyod
    def fit(self, X, y=None):
        """
        Fit the model using X as training data.

        :param X: Training data. If array or matrix,
            shape [n_samples, n_features],
            or [n_samples, n_samples] if metric='precomputed'.
        :type X: {array-like, sparse matrix, BallTree, KDTree}

        :return: self
        :rtype: object
        """
        # Validate inputs X and y (optional)
        X = check_array(X)
        self._set_n_classes(y)

        self.detector_ = sklearn_PCA(n_components=self.n_components,
                                     copy=self.copy,
                                     whiten=self.whiten,
                                     svd_solver=self.svd_solver,
                                     tol=self.tol,
                                     iterated_power=self.iterated_power,
                                     random_state=self.random_state)
        self.detector_.fit(X=X, y=y)
        # self.decision_scores_ =
        # self._process_decision_scores()
        return self
Exemplo n.º 3
0
    def fit(self, X, y=None):
        """Fit detector. y is ignored in unsupervised methods.
        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The input samples.
        y : Ignored
            Not used, present for API consistency by convention.
        Returns
        -------
        self : object
            Fitted estimator.
        """
        nsamples, nx, ny = X.shape
        X = X.reshape((nsamples, nx * ny))
        self.detector_ = sklearn_PCA(n_components=self.n_components,
                                     copy=self.copy,
                                     whiten=self.whiten,
                                     svd_solver=self.svd_solver,
                                     tol=self.tol,
                                     iterated_power=self.iterated_power,
                                     random_state=self.random_state)
        self.detector_.fit(X=X, y=y)

        # copy the attributes from the sklearn PCA object
        self.n_components_ = self.detector_.n_components_
        self.components_ = self.detector_.components_

        # validate the number of components to be used for outlier detection
        if self.n_selected_components is None:
            self.n_selected_components_ = self.n_components_
        else:
            self.n_selected_components_ = self.n_selected_components
        self.w_components_ = np.ones([
            self.n_components_,
        ])
        if self.weighted:
            self.w_components_ = self.detector_.explained_variance_ratio_

        # outlier scores is the sum of the weighted distances between each
        # sample to the eigenvectors. The eigenvectors with smaller
        # eigenvalues have more influence
        # Not all eigenvectors are used, only n_selected_components_ smallest
        # are used since they better reflect the variance change

        self.selected_components_ = self.components_[
            -1 * self.n_selected_components_:, :]
        self.selected_w_components_ = self.w_components_[
            -1 * self.n_selected_components_:]

        self.decision_scores_ = np.sum(cdist(X, self.selected_components_) /
                                       self.selected_w_components_,
                                       axis=1).ravel()

        self._process_decision_scores()
        return self
Exemplo n.º 4
0
def analysisPCA(cryo_data, normalize=True):
    ### Get results on my own PCA on this dataset
    new_data = PCA(cryo_data, normalize=normalize)
    plotResults_2D(new_data, cryo_data.iloc[:,-1], 'Custom PCA Results on cryo Dataset - Normalized = '+str(normalize))

    ### Get results to compare to using the sklearn version of PCA on this dataset
    pca = sklearn_PCA(n_components=2)
    if normalize:
        sklearn_data = sklearn_SS().fit_transform(cryo_data.iloc[:,:-1])
        sklearn_new_data = pca.fit_transform(sklearn_data)
    else:
        sklearn_new_data = pca.fit_transform(cryo_data.iloc[:,:-1])
    plotResults_2D(pd.DataFrame(sklearn_new_data), cryo_data.iloc[:,-1], 'Sklearn PCA Results on cryo Dataset - Normalized = '+str(normalize))
Exemplo n.º 5
0
    def train(self, data, labels=None):
        # stacking input data
        data = np.vstack(data)
        # checking to make sure that enough components are specified
        if data.shape[1] < self.n_components:
            self.logger.warning("more components specified than features!")
            self.logger.warning("truncating n_components({}) to num_features({})"\
                                    .format(self.n_components,data.shape[1]))
            # reinstantiating the class with fewer components
            # this is done to make sure that self.io_map is accurate
            self.__init__(data.shape[1], self.random_state)

        self.pca = sklearn_PCA(self.n_components)
        self.pca.fit(data)
Exemplo n.º 6
0
    def fit(self, X):
        self.X = X  # store X in the output
        sample_ids = X.index
        feature_ids = X.columns
        X = X.as_matrix()
        nuee_PCA = sklearn_PCA(n_components=self.n_components,
                               copy=self.copy,
                               whiten=self.whiten,
                               svd_solver=self.svd_solver,
                               tol=self.tol,
                               iterated_power=self.iterated_power,
                               random_state=self.random_state)
        nuee_PCA.fit(X)

        ordi_column_names = [
            'PCA%d' % (i + 1)
            for i in range(nuee_PCA.explained_variance_.shape[0])
        ]

        # prepare output
        eigenvalues = nuee_PCA.explained_variance_
        p_explained = pd.Series(eigenvalues / eigenvalues.sum(),
                                index=ordi_column_names)

        if self.scaling == 1:
            sample_scores = nuee_PCA.transform(X)
            biplot_scores = nuee_PCA.components_.T
        elif self.scaling == 2 or scaling == 'correlation':
            sample_scores = nuee_PCA.transform(X).dot(
                np.diag(eigenvalues**(-0.5)))
            biplot_scores = nuee_PCA.components_.dot(np.diag(
                eigenvalues**0.5)).T

        # Add PCA ordination object names to self
        self.ordiobject_type = 'PCA'
        self.method_name = 'Principal Components Analysis'
        self.ordi_fitted = nuee_PCA
        self.eigenvalues = eigenvalues
        self.proportion_explained = p_explained
        self.sample_scores = pd.DataFrame(sample_scores,
                                          index=sample_ids,
                                          columns=ordi_column_names)
        self.sample_scores.index.name = 'ID'
        self.biplot_scores = pd.DataFrame(biplot_scores,
                                          index=feature_ids,
                                          columns=ordi_column_names)
        self.biplot_scores.index.name = 'ID'
        return self
Exemplo n.º 7
0
def sklearn_reduceData(data, dim):
    # Requires sklearn_PCA to be imported
    model = sklearn_PCA(dim, svd_solver="full")
    reduced = model.fit_transform(data)
    return reduced, model.components_.T, model.mean_
Exemplo n.º 8
0
def generate_meta_features(X):
    """Get the meta-features of a datasets X

    Parameters
    ----------
    X : numpy array of shape (n_samples, n_features)
        Input array

    Returns
    -------
    meta_features : numpy array of shape (1, 200)
        Meta-feature in dimension of 200

    """
    # outliers_fraction = np.count_nonzero(y) / len(y)
    # outliers_percentage = round(outliers_fraction * 100, ndigits=4)
    X = check_array(X)

    meta_vec = []
    meta_vec_names = []

    # on the sample level
    n_samples, n_features = X.shape[0], X.shape[1]

    meta_vec.append(n_samples)
    meta_vec.append(n_features)

    meta_vec_names.append('n_samples')
    meta_vec_names.append('n_features')

    sample_mean = np.mean(X)
    sample_median = np.median(X)
    sample_var = np.var(X)
    sample_min = np.min(X)
    sample_max = np.max(X)
    sample_std = np.std(X)

    q1, q25, q75, q99 = np.percentile(X, [0.01, 0.25, 0.75, 0.99])
    iqr = q75 - q25

    normalized_mean = sample_mean / sample_max
    normalized_median = sample_median / sample_max
    sample_range = sample_max - sample_min
    sample_gini = gini(X)
    med_abs_dev = np.median(np.absolute(X - sample_median))
    avg_abs_dev = np.mean(np.absolute(X - sample_mean))
    quant_coeff_disp = (q75 - q25) / (q75 + q25)
    coeff_var = sample_var / sample_mean

    outliers_15iqr = np.logical_or(X < (q25 - 1.5 * iqr), X >
                                   (q75 + 1.5 * iqr))
    outliers_3iqr = np.logical_or(X < (q25 - 3 * iqr), X > (q75 + 3 * iqr))
    outliers_1_99 = np.logical_or(X < q1, X > q99)
    outliers_3std = np.logical_or(X < (sample_mean - 3 * sample_std), X >
                                  (sample_mean + 3 * sample_std))

    percent_outliers_15iqr = np.sum(outliers_15iqr) / len(X)
    percent_outliers_3iqr = np.sum(outliers_3iqr) / len(X)
    percent_outliers_1_99 = np.sum(outliers_1_99) / len(X)
    percent_outliers_3std = np.sum(outliers_3std) / len(X)

    has_outliers_15iqr = np.any(outliers_15iqr).astype(int)
    has_outliers_3iqr = np.any(outliers_3iqr).astype(int)
    has_outliers_1_99 = np.any(outliers_1_99).astype(int)
    has_outliers_3std = np.any(outliers_3std).astype(int)

    meta_vec.extend([
        sample_mean,
        sample_median,
        sample_var,
        sample_min,
        sample_max,
        sample_std,
        q1,
        q25,
        q75,
        q99,
        iqr,
        normalized_mean,
        normalized_median,
        sample_range,
        sample_gini,
        med_abs_dev,
        avg_abs_dev,
        quant_coeff_disp,
        coeff_var,
        # moment_5, moment_6, moment_7, moment_8, moment_9, moment_10,
        percent_outliers_15iqr,
        percent_outliers_3iqr,
        percent_outliers_1_99,
        percent_outliers_3std,
        has_outliers_15iqr,
        has_outliers_3iqr,
        has_outliers_1_99,
        has_outliers_3std
    ])

    meta_vec_names.extend([
        'sample_mean',
        'sample_median',
        'sample_var',
        'sample_min',
        'sample_max',
        'sample_std',
        'q1',
        'q25',
        'q75',
        'q99',
        'iqr',
        'normalized_mean',
        'normalized_median',
        'sample_range',
        'sample_gini',
        'med_abs_dev',
        'avg_abs_dev',
        'quant_coeff_disp',
        'coeff_var',
        # moment_5, moment_6, moment_7, moment_8, moment_9, moment_10,
        'percent_outliers_15iqr',
        'percent_outliers_3iqr',
        'percent_outliers_1_99',
        'percent_outliers_3std',
        'has_outliers_15iqr',
        'has_outliers_3iqr',
        'has_outliers_1_99',
        'has_outliers_3std'
    ])

    ###########################################################################

    normality_k2, normality_p = normaltest(X)
    is_normal_5 = (normality_p < 0.05).astype(int)
    is_normal_1 = (normality_p < 0.01).astype(int)

    meta_vec.extend(list_process(normality_p))
    meta_vec.extend(list_process(is_normal_5))
    meta_vec.extend(list_process(is_normal_1))

    meta_vec_names.extend(list_process_name('normality_p'))
    meta_vec_names.extend(list_process_name('is_normal_5'))
    meta_vec_names.extend(list_process_name('is_normal_1'))

    moment_5 = moment(X, moment=5)
    moment_6 = moment(X, moment=6)
    moment_7 = moment(X, moment=7)
    moment_8 = moment(X, moment=8)
    moment_9 = moment(X, moment=9)
    moment_10 = moment(X, moment=10)
    meta_vec.extend(list_process(moment_5))
    meta_vec.extend(list_process(moment_6))
    meta_vec.extend(list_process(moment_7))
    meta_vec.extend(list_process(moment_8))
    meta_vec.extend(list_process(moment_9))
    meta_vec.extend(list_process(moment_10))
    meta_vec_names.extend(list_process_name('moment_5'))
    meta_vec_names.extend(list_process_name('moment_6'))
    meta_vec_names.extend(list_process_name('moment_7'))
    meta_vec_names.extend(list_process_name('moment_8'))
    meta_vec_names.extend(list_process_name('moment_9'))
    meta_vec_names.extend(list_process_name('moment_10'))

    # note: this is for each dimension == the number of dimensions
    skewness_list = skew(X).reshape(-1, 1)
    skew_values = list_process(skewness_list)
    meta_vec.extend(skew_values)
    meta_vec_names.extend(list_process_name('skewness'))

    # note: this is for each dimension == the number of dimensions
    kurtosis_list = kurtosis(X)
    kurtosis_values = list_process(kurtosis_list)
    meta_vec.extend(kurtosis_values)
    meta_vec_names.extend(list_process_name('kurtosis'))

    correlation = np.nan_to_num(pd.DataFrame(X).corr(), nan=0)
    correlation_list = flatten_diagonally(correlation)[0:int(
        (n_features * n_features - n_features) / 2)]
    correlation_values = list_process(correlation_list)
    meta_vec.extend(correlation_values)
    meta_vec_names.extend(list_process_name('correlation'))

    covariance = np.cov(X.T)
    covariance_list = flatten_diagonally(covariance)[0:int(
        (n_features * n_features - n_features) / 2)]
    covariance_values = list_process(covariance_list)
    meta_vec.extend(covariance_values)
    meta_vec_names.extend(list_process_name('covariance'))

    # sparsity
    rep_counts = []
    for i in range(n_features):
        rep_counts.append(len(np.unique(X[:, i])))
    sparsity_list = np.asarray(rep_counts) / (n_samples)
    sparsity = list_process(sparsity_list)
    meta_vec.extend(sparsity)
    meta_vec_names.extend(list_process_name('sparsity'))

    # ANOVA p value
    p_values_list = []
    all_perm = list(itertools.combinations(list(range(n_features)), 2))
    for j in all_perm:
        p_values_list.append(f_oneway(X[:, j[0]], X[:, j[1]])[1])
    anova_p_value = list_process(np.asarray(p_values_list))
    # anova_p_value = np.mean(p_values_list)
    # anova_p_value_exceed_thresh = np.mean((np.asarray(p_values_list)<0.05).astype(int))
    meta_vec.extend(anova_p_value)
    meta_vec_names.extend(list_process_name('anova_p_value'))

    # pca
    pca_transformer = sklearn_PCA(n_components=3)
    X_transform = pca_transformer.fit_transform(X)

    # first pc
    pca_fpc = list_process(X_transform[0, :],
                           r_min=False,
                           r_max=False,
                           r_mean=False,
                           r_std=True,
                           r_skew=True,
                           r_kurtosis=True)
    meta_vec.extend(pca_fpc)
    meta_vec_names.extend(
        ['first_pca_std', 'first_pca_skewness', 'first_pca_kurtosis'])

    # entropy
    entropy_list = []
    for i in range(n_features):
        counts = pd.Series(X[:, i]).value_counts()
        entropy_list.append(entropy(counts) / n_samples)
    entropy_values = list_process(entropy_list)
    meta_vec.extend(entropy_values)
    meta_vec_names.extend(list_process_name('entropy'))

    ##############################Landmarkers######################################
    # HBOS
    clf = HBOS(n_bins=10)
    clf.fit(X)
    HBOS_hists = clf.hist_
    HBOS_mean = np.mean(HBOS_hists, axis=0)
    HBOS_max = np.max(HBOS_hists, axis=0)
    HBOS_min = np.min(HBOS_hists, axis=0)
    meta_vec.extend(list_process(HBOS_mean))
    meta_vec.extend(list_process(HBOS_max))
    meta_vec.extend(list_process(HBOS_min))
    meta_vec_names.extend(list_process_name('HBOS_mean'))
    meta_vec_names.extend(list_process_name('HBOS_max'))
    meta_vec_names.extend(list_process_name('HBOS_min'))

    # IForest
    n_estimators = 100
    clf = IForest(n_estimators=n_estimators)
    clf.fit(X)

    n_leaves = []
    n_depth = []
    fi_mean = []
    fi_max = []

    # doing this for each sub-trees
    for i in range(n_estimators):
        n_leaves.append(clf.estimators_[i].get_n_leaves())
        n_depth.append(clf.estimators_[i].get_depth())
        fi_mean.append(clf.estimators_[i].feature_importances_.mean())
        fi_max.append(clf.estimators_[i].feature_importances_.max())
        # print(clf.estimators_[i].tree_)

    meta_vec.extend(list_process(n_leaves))
    meta_vec.extend(list_process(n_depth))
    meta_vec.extend(list_process(fi_mean))
    meta_vec.extend(list_process(fi_max))

    meta_vec_names.extend(list_process_name('IForest_n_leaves'))
    meta_vec_names.extend(list_process_name('IForest_n_depth'))
    meta_vec_names.extend(list_process_name('IForest_fi_mean'))
    meta_vec_names.extend(list_process_name('IForest_fi_max'))

    # PCA
    clf = PCA(n_components=3)
    clf.fit(X)
    meta_vec.extend(clf.explained_variance_ratio_)
    meta_vec.extend(clf.singular_values_)
    meta_vec_names.extend(
        ['pca_expl_ratio_1', 'pca_expl_ratio_2', 'pca_expl_ratio_3'])
    meta_vec_names.extend(['pca_sv_1', 'pca_sv_2', 'pca_sv_3'])

    # LODA
    n_bins = 10
    n_random_cuts = 100

    n_hists_mean = []
    n_hists_max = []

    n_cuts_mean = []
    n_cuts_max = []

    clf = LODA(n_bins=n_bins, n_random_cuts=n_random_cuts)
    clf.fit(X)

    for i in range(n_bins):
        n_hists_mean.append(clf.histograms_[:, i].mean())
        n_hists_max.append(clf.histograms_[:, i].max())
    for i in range(n_random_cuts):
        n_cuts_mean.append(clf.histograms_[i, :].mean())
        n_cuts_max.append(clf.histograms_[i, :].max())

    meta_vec.extend(list_process(n_hists_mean))
    meta_vec.extend(list_process(n_hists_max))
    meta_vec.extend(list_process(n_cuts_mean))
    meta_vec.extend(list_process(n_cuts_max))

    meta_vec_names.extend(list_process_name('LODA_n_hists_mean'))
    meta_vec_names.extend(list_process_name('LODA_n_hists_max'))
    meta_vec_names.extend(list_process_name('LODA_n_cuts_mean'))
    meta_vec_names.extend(list_process_name('LODA_n_cuts_max'))

    return meta_vec, meta_vec_names
Exemplo n.º 9
0
 def __init__(self):
     # self.classifier = sklearn_PCA(n_components='mle')
     self.classifier = sklearn_PCA(
         n_components=900)  #This is for gabor filter only!!!
Exemplo n.º 10
0
 def __init__(self, n_components=4, whiten=True, name="PCA4Feature"):
     super(PCA4Feature, self).__init__(name)
     self.n_components = n_components
     self.whiten = whiten
     self.model = sklearn_PCA(n_components=self.n_components, whiten=self.whiten)
Exemplo n.º 11
0
Arquivo: pca.py Projeto: Mm24/pyod
    def fit(self, X, y=None):
        """Fit detector. y is ignored in unsupervised methods.

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The input samples.

        y : Ignored
            Not used, present for API consistency by convention.

        Returns
        -------
        self : object
            Fitted estimator.
        """
        # validate inputs X and y (optional)
        X = check_array(X)
        self._set_n_classes(y)

        # PCA is recommended to use on the standardized data (zero mean and
        # unit variance).
        if self.standardization:
            X, self.scaler_ = standardizer(X, keep_scalar=True)

        self.detector_ = sklearn_PCA(n_components=self.n_components,
                                     copy=self.copy,
                                     whiten=self.whiten,
                                     svd_solver=self.svd_solver,
                                     tol=self.tol,
                                     iterated_power=self.iterated_power,
                                     random_state=self.random_state)
        self.detector_.fit(X=X, y=y)

        # copy the attributes from the sklearn PCA object
        self.n_components_ = self.detector_.n_components_
        self.components_ = self.detector_.components_

        # validate the number of components to be used for outlier detection
        if self.n_selected_components is None:
            self.n_selected_components_ = self.n_components_
        else:
            self.n_selected_components_ = self.n_selected_components
        check_parameter(self.n_selected_components_,
                        1,
                        self.n_components_,
                        include_left=True,
                        include_right=True,
                        param_name='n_selected_components_')

        # use eigenvalues as the weights of eigenvectors
        self.w_components_ = np.ones([
            self.n_components_,
        ])
        if self.weighted:
            self.w_components_ = self.detector_.explained_variance_ratio_

        # outlier scores is the sum of the weighted distances between each
        # sample to the eigenvectors. The eigenvectors with smaller
        # eigenvalues have more influence
        # Not all eigenvectors are used, only n_selected_components_ smallest
        # are used since they better reflect the variance change

        self.selected_components_ = self.components_[
            -1 * self.n_selected_components_:, :]
        self.selected_w_components_ = self.w_components_[
            -1 * self.n_selected_components_:]

        self.decision_scores_ = np.sum(cdist(X, self.selected_components_) /
                                       self.selected_w_components_,
                                       axis=1).ravel()

        self._process_decision_scores()
        return self
Exemplo n.º 12
0
 def __init__(self):
     # self.classifier = sklearn_PCA(n_components='mle')
     self.classifier = sklearn_PCA(n_components=900)  #This is for gabor filter only!!!
Exemplo n.º 13
0
from sklearn import datasets
from sklearn.decomposition import PCA as sklearn_PCA
import matplotlib.pyplot as plt
iris = datasets.load_iris()
#from mpl_toolkits.mplot3d import Axes3D
import  numpy as np

X = iris.data
Y = iris.target
target_names = iris.target_names



np.random.seed(5)
skPCA = sklearn_PCA(n_components=2)
X_learn = skPCA.fit(X).transform(X)

plt.figure()
colours =['navy', 'turquoise', 'darkorange']
lw = 2

for color, i , target_name in zip(colours, [0,1,2], target_names):
    plt.scatter(X_learn[Y ==i, 0], X_learn[Y ==i,1],

                color = color, alpha =.8, lw=lw, label = target_name)

plt.legend(loc='best', shadow=False, scatterpoints=1)
plt.title('PCA of IRIS dataset')
plt.show()
Exemplo n.º 14
0
    def fit(self, X, y=None):
        """Fit detector. y is optional for unsupervised methods.

        Parameters
        ----------
        X : numpy array of shape (n_samples, n_features)
            The input samples.

        y : numpy array of shape (n_samples,), optional (default=None)
            The ground truth of the input samples (labels).
        """
        # validate inputs X and y (optional)
        X = check_array(X)
        self._set_n_classes(y)

        # PCA is recommended to use on the standardized data (zero mean and
        # unit variance).
        if self.standardization:
            X, self.scaler_ = standardizer(X, keep_scalar=True)

        self.detector_ = sklearn_PCA(n_components=self.n_components,
                                     copy=self.copy,
                                     whiten=self.whiten,
                                     svd_solver=self.svd_solver,
                                     tol=self.tol,
                                     iterated_power=self.iterated_power,
                                     random_state=self.random_state)
        self.detector_.fit(X=X, y=y)

        # copy the attributes from the sklearn PCA object
        self.n_components_ = self.detector_.n_components_
        self.components_ = self.detector_.components_

        # validate the number of components to be used for outlier detection
        if self.n_selected_components is None:
            self.n_selected_components_ = self.n_components_
        else:
            self.n_selected_components_ = self.n_selected_components
        check_parameter(self.n_selected_components_, 1, self.n_components_,
                        include_left=True, include_right=True,
                        param_name='n_selected_components_')

        # use eigenvalues as the weights of eigenvectors
        self.w_components_ = np.ones([self.n_components_, ])
        if self.weighted:
            self.w_components_ = self.detector_.explained_variance_ratio_

        # outlier scores is the sum of the weighted distances between each
        # sample to the eigenvectors. The eigenvectors with smaller
        # eigenvalues have more influence
        # Not all eigenvectors are used, only n_selected_components_ smallest
        # are used since they better reflect the variance change

        self.selected_components_ = self.components_[
                                    -1 * self.n_selected_components_:, :]
        self.selected_w_components_ = self.w_components_[
                                      -1 * self.n_selected_components_:]

        self.decision_scores_ = np.sum(
            cdist(X, self.selected_components_) / self.selected_w_components_,
            axis=1).ravel()

        self._process_decision_scores()
        return self