示例#1
0
 def MiniBach_SparcePCA(self,N_component):
     MiniPCA_calculator = skdecomp.MiniBatchSparsePCA(N_component,batch_size=15,normalize_components=True)
     self.MiniPCs = MiniPCA_calculator.fit(self.vector_centered)
     all_MiniPCs = self.MiniPCs.components_
     pp.save_variable(all_MiniPCs,save_folder+r'\\MiniPCed_Data.pkl')
     print('MiniBach Sparce PCA done, generating graphs')
     self.cell_graph_plot('MINIPCA',all_MiniPCs)
示例#2
0
 def MiniBatchSparsePCA(self, source):
     min_max_scaler = preprocessing.MinMaxScaler()
     data_source = min_max_scaler.fit_transform(source)
     pca = decomposition.MiniBatchSparsePCA(n_components=2)
     #没有相关参数------------------------
     result = {}
     result['data'] = pca.fit_transform(data_source)
     result['params'] = 0
     return result
示例#3
0
def gen_estimators():
    '''
    List of the different estimators, whether to center and transpose the problem, and whether the transformer uses the clustering API.
    '''
    rng = RandomState(0)
    estimators = [
        ('Eigenfaces - RandomizedPCA',
         decomposition.RandomizedPCA(n_components=n_components,
                                     whiten=True), True),
        ('Non-negative components - NMF tol=1e-4',
         decomposition.NMF(n_components=n_components,
                           init='nndsvda',
                           tol=1e-4,
                           solver='cd'), False),
        ('Non-negative components - NMF tol=1e-6',
         decomposition.NMF(
             n_components=n_components,
             init='nndsvd',
         ), False),
        ('Independent components - FastICA',
         decomposition.FastICA(n_components=n_components, whiten=True), True),
        ('Sparse comp. - MiniBatchSparsePCA',
         decomposition.MiniBatchSparsePCA(n_components=n_components,
                                          alpha=0.8,
                                          n_iter=100,
                                          batch_size=3,
                                          random_state=rng), True),
        ('MiniBatchDictionaryLearning',
         decomposition.MiniBatchDictionaryLearning(n_components=15,
                                                   alpha=0.1,
                                                   n_iter=50,
                                                   batch_size=3,
                                                   random_state=rng), True),
        ('Cluster centers - MiniBatchKMeans',
         MiniBatchKMeans(n_clusters=n_components,
                         tol=1e-3,
                         batch_size=20,
                         max_iter=50,
                         random_state=rng), True),
        ('Factor Analysis components - FA',
         decomposition.FactorAnalysis(n_components=n_components,
                                      max_iter=2), True),
    ]
    return estimators
示例#4
0
# List of the different estimators, whether to center and transpose the
# problem, and whether the transformer uses the clustering API.
estimators = [
    ('Eigenfaces - PCA using randomized SVD',
     decomposition.PCA(n_components=n_components,
                       svd_solver='randomized',
                       whiten=True), True),
    ('Non-negative components - NMF',
     decomposition.NMF(n_components=n_components, init='nndsvda',
                       tol=5e-3), False),
    ('Independent components - FastICA',
     decomposition.FastICA(n_components=n_components, whiten=True), True),
    ('Sparse comp. - MiniBatchSparsePCA',
     decomposition.MiniBatchSparsePCA(n_components=n_components,
                                      alpha=0.8,
                                      n_iter=100,
                                      batch_size=3,
                                      random_state=rng), True),
    ('MiniBatchDictionaryLearning',
     decomposition.MiniBatchDictionaryLearning(n_components=15,
                                               alpha=0.1,
                                               n_iter=50,
                                               batch_size=3,
                                               random_state=rng), True),
    ('Cluster centers - MiniBatchKMeans',
     MiniBatchKMeans(n_clusters=n_components,
                     tol=1e-3,
                     batch_size=20,
                     max_iter=50,
                     random_state=rng), True),
    ('Factor Analysis components - FA',
示例#5
0
from sklearn import decomposition
import numpy as np
import matplotlib.pyplot as plt

data = np.loadtxt("../Dataset/data.txt", delimiter=';')
target = np.loadtxt("../Dataset/target.txt", delimiter=';')
data_pre = np.loadtxt("../Dataset/data_pre.txt", delimiter=';')

#sentiment = svm.SVC(kernel="rbf", gamma=0.001, C=1000)
sentiment_bay_m = MultinomialNB()
sentiment_bay_g = GaussianNB()
sentiment_bay_b = BernoulliNB()

sentiment_dec = tree.DecisionTreeClassifier()

pca = decomposition.MiniBatchSparsePCA(n_components=20)
pca.fit_transform(data)

accuracy = np.mean(cross_validation.cross_val_score(sentiment, data, target))
print accuracy
'''
accuracy_pre = np.mean(cross_validation.cross_val_score(sentiment,data_pre,target))
accuracy_bay_m = np.mean(cross_validation.cross_val_score(sentiment_bay_m,data,target))
accuracy_bay_pre_m = np.mean(cross_validation.cross_val_score(sentiment_bay_m,data_pre,target))
accuracy_bay_g = np.mean(cross_validation.cross_val_score(sentiment_bay_g,data,target))
accuracy_bay_pre_g = np.mean(cross_validation.cross_val_score(sentiment_bay_g,data_pre,target))
accuracy_bay_b = np.mean(cross_validation.cross_val_score(sentiment_bay_b,data,target))
accuracy_bay_pre_b = np.mean(cross_validation.cross_val_score(sentiment_bay_b,data_pre,target))

print accuracy,accuracy_pre,accuracy_bay_m,accuracy_bay_pre_m,accuracy_bay_g,accuracy_bay_pre_g,accuracy_bay_b,accuracy_bay_pre_b
'''
示例#6
0
def _eval_search_params(params_builder):
    search_params = {}

    for p in params_builder['param_set']:
        search_list = p['sp_list'].strip()
        if search_list == '':
            continue

        param_name = p['sp_name']
        if param_name.lower().endswith(NON_SEARCHABLE):
            print("Warning: `%s` is not eligible for search and was "
                  "omitted!" % param_name)
            continue

        if not search_list.startswith(':'):
            safe_eval = SafeEval(load_scipy=True, load_numpy=True)
            ev = safe_eval(search_list)
            search_params[param_name] = ev
        else:
            # Have `:` before search list, asks for estimator evaluatio
            safe_eval_es = SafeEval(load_estimators=True)
            search_list = search_list[1:].strip()
            # TODO maybe add regular express check
            ev = safe_eval_es(search_list)
            preprocessings = (
                preprocessing.StandardScaler(), preprocessing.Binarizer(),
                preprocessing.MaxAbsScaler(), preprocessing.Normalizer(),
                preprocessing.MinMaxScaler(),
                preprocessing.PolynomialFeatures(),
                preprocessing.RobustScaler(), feature_selection.SelectKBest(),
                feature_selection.GenericUnivariateSelect(),
                feature_selection.SelectPercentile(),
                feature_selection.SelectFpr(), feature_selection.SelectFdr(),
                feature_selection.SelectFwe(),
                feature_selection.VarianceThreshold(),
                decomposition.FactorAnalysis(random_state=0),
                decomposition.FastICA(random_state=0),
                decomposition.IncrementalPCA(),
                decomposition.KernelPCA(random_state=0, n_jobs=N_JOBS),
                decomposition.LatentDirichletAllocation(random_state=0,
                                                        n_jobs=N_JOBS),
                decomposition.MiniBatchDictionaryLearning(random_state=0,
                                                          n_jobs=N_JOBS),
                decomposition.MiniBatchSparsePCA(random_state=0,
                                                 n_jobs=N_JOBS),
                decomposition.NMF(random_state=0),
                decomposition.PCA(random_state=0),
                decomposition.SparsePCA(random_state=0, n_jobs=N_JOBS),
                decomposition.TruncatedSVD(random_state=0),
                kernel_approximation.Nystroem(random_state=0),
                kernel_approximation.RBFSampler(random_state=0),
                kernel_approximation.AdditiveChi2Sampler(),
                kernel_approximation.SkewedChi2Sampler(random_state=0),
                cluster.FeatureAgglomeration(),
                skrebate.ReliefF(n_jobs=N_JOBS), skrebate.SURF(n_jobs=N_JOBS),
                skrebate.SURFstar(n_jobs=N_JOBS),
                skrebate.MultiSURF(n_jobs=N_JOBS),
                skrebate.MultiSURFstar(n_jobs=N_JOBS),
                imblearn.under_sampling.ClusterCentroids(random_state=0,
                                                         n_jobs=N_JOBS),
                imblearn.under_sampling.CondensedNearestNeighbour(
                    random_state=0, n_jobs=N_JOBS),
                imblearn.under_sampling.EditedNearestNeighbours(random_state=0,
                                                                n_jobs=N_JOBS),
                imblearn.under_sampling.RepeatedEditedNearestNeighbours(
                    random_state=0, n_jobs=N_JOBS),
                imblearn.under_sampling.AllKNN(random_state=0, n_jobs=N_JOBS),
                imblearn.under_sampling.InstanceHardnessThreshold(
                    random_state=0, n_jobs=N_JOBS),
                imblearn.under_sampling.NearMiss(random_state=0,
                                                 n_jobs=N_JOBS),
                imblearn.under_sampling.NeighbourhoodCleaningRule(
                    random_state=0, n_jobs=N_JOBS),
                imblearn.under_sampling.OneSidedSelection(random_state=0,
                                                          n_jobs=N_JOBS),
                imblearn.under_sampling.RandomUnderSampler(random_state=0),
                imblearn.under_sampling.TomekLinks(random_state=0,
                                                   n_jobs=N_JOBS),
                imblearn.over_sampling.ADASYN(random_state=0, n_jobs=N_JOBS),
                imblearn.over_sampling.RandomOverSampler(random_state=0),
                imblearn.over_sampling.SMOTE(random_state=0, n_jobs=N_JOBS),
                imblearn.over_sampling.SVMSMOTE(random_state=0, n_jobs=N_JOBS),
                imblearn.over_sampling.BorderlineSMOTE(random_state=0,
                                                       n_jobs=N_JOBS),
                imblearn.over_sampling.SMOTENC(categorical_features=[],
                                               random_state=0,
                                               n_jobs=N_JOBS),
                imblearn.combine.SMOTEENN(random_state=0),
                imblearn.combine.SMOTETomek(random_state=0))
            newlist = []
            for obj in ev:
                if obj is None:
                    newlist.append(None)
                elif obj == 'all_0':
                    newlist.extend(preprocessings[0:35])
                elif obj == 'sk_prep_all':  # no KernalCenter()
                    newlist.extend(preprocessings[0:7])
                elif obj == 'fs_all':
                    newlist.extend(preprocessings[7:14])
                elif obj == 'decomp_all':
                    newlist.extend(preprocessings[14:25])
                elif obj == 'k_appr_all':
                    newlist.extend(preprocessings[25:29])
                elif obj == 'reb_all':
                    newlist.extend(preprocessings[30:35])
                elif obj == 'imb_all':
                    newlist.extend(preprocessings[35:54])
                elif type(obj) is int and -1 < obj < len(preprocessings):
                    newlist.append(preprocessings[obj])
                elif hasattr(obj, 'get_params'):  # user uploaded object
                    if 'n_jobs' in obj.get_params():
                        newlist.append(obj.set_params(n_jobs=N_JOBS))
                    else:
                        newlist.append(obj)
                else:
                    sys.exit("Unsupported estimator type: %r" % (obj))

            search_params[param_name] = newlist

    return search_params
示例#7
0
 def __init__(self, source):
     min_max_scaler = preprocessing.MinMaxScaler()
     data_source = min_max_scaler.fit_transform(source)
     pca = decomposition.MiniBatchSparsePCA(n_components=2)
     self.return_data = pca.fit_transform(data_source)
    ('Eigenfaces - RandomizedPCA',
     decomposition.RandomizedPCA(n_components=n_components,
                                 whiten=True), True, False),
    ('Non-negative components - NMF',
     decomposition.NMF(n_components=n_components,
                       init='nndsvda',
                       beta=5.0,
                       tol=5e-3,
                       sparseness='components'), False, False),
    ('Independent components - FastICA',
     decomposition.FastICA(n_components=n_components, whiten=True,
                           max_iter=10), True, True),
    ('Sparse comp. - MiniBatchSparsePCA',
     decomposition.MiniBatchSparsePCA(n_components=n_components,
                                      alpha=1e-3,
                                      n_iter=100,
                                      chunk_size=3,
                                      random_state=rng), True, False),
    ('MiniBatchDictionaryLearning',
     decomposition.MiniBatchDictionaryLearning(n_atoms=15,
                                               alpha=5e-3,
                                               n_iter=50,
                                               chunk_size=3,
                                               random_state=rng), True, False),
    ('Cluster centers - MiniBatchKMeans',
     MiniBatchKMeans(k=n_components,
                     tol=1e-3,
                     chunk_size=20,
                     max_iter=50,
                     random_state=rng), True, False)
]
def get_search_params(params_builder):
    search_params = {}
    safe_eval = SafeEval(load_scipy=True, load_numpy=True)
    safe_eval_es = SafeEval(load_estimators=True)

    for p in params_builder['param_set']:
        search_p = p['search_param_selector']['search_p']
        if search_p.strip() == '':
            continue
        param_type = p['search_param_selector']['selected_param_type']

        lst = search_p.split(':')
        assert (
            len(lst) == 2
        ), "Error, make sure there is one and only one colon in search parameter input."
        literal = lst[1].strip()
        param_name = lst[0].strip()
        if param_name:
            if param_name.lower() == 'n_jobs':
                sys.exit("Parameter `%s` is invalid for search." % param_name)
            elif not param_name.endswith('-'):
                ev = safe_eval(literal)
                if param_type == 'final_estimator_p':
                    search_params['estimator__' + param_name] = ev
                else:
                    search_params['preprocessing_' + param_type[5:6] + '__' +
                                  param_name] = ev
            else:
                # only for estimator eval, add `-` to the end of param
                #TODO maybe add regular express check
                ev = safe_eval_es(literal)
                for obj in ev:
                    if 'n_jobs' in obj.get_params():
                        obj.set_params(n_jobs=N_JOBS)
                if param_type == 'final_estimator_p':
                    search_params['estimator__' + param_name[:-1]] = ev
                else:
                    search_params['preprocessing_' + param_type[5:6] + '__' +
                                  param_name[:-1]] = ev
        elif param_type != 'final_estimator_p':
            #TODO regular express check ?
            ev = safe_eval_es(literal)
            preprocessors = [
                preprocessing.StandardScaler(),
                preprocessing.Binarizer(),
                preprocessing.Imputer(),
                preprocessing.MaxAbsScaler(),
                preprocessing.Normalizer(),
                preprocessing.MinMaxScaler(),
                preprocessing.PolynomialFeatures(),
                preprocessing.RobustScaler(),
                feature_selection.SelectKBest(),
                feature_selection.GenericUnivariateSelect(),
                feature_selection.SelectPercentile(),
                feature_selection.SelectFpr(),
                feature_selection.SelectFdr(),
                feature_selection.SelectFwe(),
                feature_selection.VarianceThreshold(),
                decomposition.FactorAnalysis(random_state=0),
                decomposition.FastICA(random_state=0),
                decomposition.IncrementalPCA(),
                decomposition.KernelPCA(random_state=0, n_jobs=N_JOBS),
                decomposition.LatentDirichletAllocation(random_state=0,
                                                        n_jobs=N_JOBS),
                decomposition.MiniBatchDictionaryLearning(random_state=0,
                                                          n_jobs=N_JOBS),
                decomposition.MiniBatchSparsePCA(random_state=0,
                                                 n_jobs=N_JOBS),
                decomposition.NMF(random_state=0),
                decomposition.PCA(random_state=0),
                decomposition.SparsePCA(random_state=0, n_jobs=N_JOBS),
                decomposition.TruncatedSVD(random_state=0),
                kernel_approximation.Nystroem(random_state=0),
                kernel_approximation.RBFSampler(random_state=0),
                kernel_approximation.AdditiveChi2Sampler(),
                kernel_approximation.SkewedChi2Sampler(random_state=0),
                cluster.FeatureAgglomeration(),
                skrebate.ReliefF(n_jobs=N_JOBS),
                skrebate.SURF(n_jobs=N_JOBS),
                skrebate.SURFstar(n_jobs=N_JOBS),
                skrebate.MultiSURF(n_jobs=N_JOBS),
                skrebate.MultiSURFstar(n_jobs=N_JOBS),
                imblearn.under_sampling.ClusterCentroids(random_state=0,
                                                         n_jobs=N_JOBS),
                imblearn.under_sampling.CondensedNearestNeighbour(
                    random_state=0, n_jobs=N_JOBS),
                imblearn.under_sampling.EditedNearestNeighbours(random_state=0,
                                                                n_jobs=N_JOBS),
                imblearn.under_sampling.RepeatedEditedNearestNeighbours(
                    random_state=0, n_jobs=N_JOBS),
                imblearn.under_sampling.AllKNN(random_state=0, n_jobs=N_JOBS),
                imblearn.under_sampling.InstanceHardnessThreshold(
                    random_state=0, n_jobs=N_JOBS),
                imblearn.under_sampling.NearMiss(random_state=0,
                                                 n_jobs=N_JOBS),
                imblearn.under_sampling.NeighbourhoodCleaningRule(
                    random_state=0, n_jobs=N_JOBS),
                imblearn.under_sampling.OneSidedSelection(random_state=0,
                                                          n_jobs=N_JOBS),
                imblearn.under_sampling.RandomUnderSampler(random_state=0),
                imblearn.under_sampling.TomekLinks(random_state=0,
                                                   n_jobs=N_JOBS),
                imblearn.over_sampling.ADASYN(random_state=0, n_jobs=N_JOBS),
                imblearn.over_sampling.RandomOverSampler(random_state=0),
                imblearn.over_sampling.SMOTE(random_state=0, n_jobs=N_JOBS),
                imblearn.over_sampling.SVMSMOTE(random_state=0, n_jobs=N_JOBS),
                imblearn.over_sampling.BorderlineSMOTE(random_state=0,
                                                       n_jobs=N_JOBS),
                imblearn.over_sampling.SMOTENC(categorical_features=[],
                                               random_state=0,
                                               n_jobs=N_JOBS),
                imblearn.combine.SMOTEENN(random_state=0),
                imblearn.combine.SMOTETomek(random_state=0)
            ]
            newlist = []
            for obj in ev:
                if obj is None:
                    newlist.append(None)
                elif obj == 'all_0':
                    newlist.extend(preprocessors[0:36])
                elif obj == 'sk_prep_all':  # no KernalCenter()
                    newlist.extend(preprocessors[0:8])
                elif obj == 'fs_all':
                    newlist.extend(preprocessors[8:15])
                elif obj == 'decomp_all':
                    newlist.extend(preprocessors[15:26])
                elif obj == 'k_appr_all':
                    newlist.extend(preprocessors[26:30])
                elif obj == 'reb_all':
                    newlist.extend(preprocessors[31:36])
                elif obj == 'imb_all':
                    newlist.extend(preprocessors[36:55])
                elif type(obj) is int and -1 < obj < len(preprocessors):
                    newlist.append(preprocessors[obj])
                elif hasattr(obj, 'get_params'):  # user object
                    if 'n_jobs' in obj.get_params():
                        newlist.append(obj.set_params(n_jobs=N_JOBS))
                    else:
                        newlist.append(obj)
                else:
                    sys.exit("Unsupported preprocessor type: %r" % (obj))
            search_params['preprocessing_' + param_type[5:6]] = newlist
        else:
            sys.exit("Parameter name of the final estimator can't be skipped!")

    return search_params
def main():
    dataset = fetch_olivetti_faces(shuffle=True, random_state=rng)
    faces = dataset.data

    n_samples, n_features = faces.shape
    # global centering
    faces_centered = faces - faces.mean(axis=0)
    # local centering
    faces_centered -= faces_centered.mean(axis=1).reshape(n_samples, -1)
    print("Dataset consists of %d faces" % n_samples)

    estimators = [
        ('Eigenfaces - PCA using randomized SVD',
         decomposition.PCA(n_components=n_components,
                           svd_solver='randomized',
                           whiten=True), True),
        ('Non-negative components - NMF',
         decomposition.NMF(n_components=n_components, init='nndsvda',
                           tol=5e-3), False),
        ('Independent components - FastICA',
         decomposition.FastICA(n_components=n_components, whiten=True), True),
        ('Sparse comp. - MiniBatchSparsePCA',
         decomposition.MiniBatchSparsePCA(n_components=n_components,
                                          alpha=0.8,
                                          n_iter=100,
                                          batch_size=3,
                                          random_state=rng), True),
        ('MiniBatchDictionaryLearning',
         decomposition.MiniBatchDictionaryLearning(n_components=15,
                                                   alpha=0.1,
                                                   n_iter=50,
                                                   batch_size=3,
                                                   random_state=rng), True),
        ('Cluster centers - MiniBatchKMeans',
         MiniBatchKMeans(n_clusters=n_components,
                         tol=1e-3,
                         batch_size=20,
                         max_iter=50,
                         random_state=rng), True),
        ('Factor Analysis components - FA',
         decomposition.FactorAnalysis(n_components=n_components,
                                      max_iter=2), True),
    ]

    plot_gallery("First centered Olivetti faces",
                 faces_centered[:n_components])

    for name, estimator, center in estimators:
        print("Extracting the top %d %s..." % (n_components, name))
        t0 = time()
        data = faces
        if center:
            data = faces_centered
        estimator.fit(data)
        train_time = (time() - t0)
        print("done in %0.3fs" % train_time)
        if hasattr(estimator, 'cluster_centers_'):
            components_ = estimator.cluster_centers_
        else:
            components_ = estimator.components_
        # so we skip it.
        if (hasattr(estimator, 'noise_variance_')
                and estimator.noise_variance_.ndim >
                0):  # Skip the Eigenfaces case
            plot_gallery("Pixelwise variance",
                         estimator.noise_variance_.reshape(1, -1),
                         n_col=1,
                         n_row=1)
        plot_gallery('%s - Train time %.1fs' % (name, train_time),
                     components_[:n_components])

    plt.show()
示例#11
0
def faces_decomposition():
    import logging
    from numpy.random import RandomState  #随机数生成器种子,从高斯分布或者其他等分布产生
    import matplotlib.pyplot as plt
    from time import time
    from sklearn.datasets import fetch_olivetti_faces
    from sklearn.cluster import MiniBatchKMeans
    from sklearn import decomposition

    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s %(levelname)s %(message)s')
    n_row, n_col = 2, 3
    n_components = n_row * n_col
    image_shape = (64, 64)
    rng = RandomState(0)

    #加载数据集
    dataset = fetch_olivetti_faces(shuffle=True, random_state=rng)
    faces = dataset.data

    n_samples, n_features = faces.shape

    faces_centered = faces - faces.mean(axis=0)

    faces_centered -= faces_centered.mean(axis=1).reshape(n_samples, -1)

    print("dataset consits of %d faces" % n_samples)  #样本个数

    def plot_gallery(title, images, n_col=n_col, n_row=n_row):
        plt.figure(figsize=(2. * n_col, 2.26 * n_row))
        plt.suptitle(title, size=16)
        for i, comp in enumerate(images):
            plt.subplot(n_row, n_col, i + 1)
            vmax = max(comp.max(), -comp.min())
            plt.imshow(comp.reshape(image_shape),
                       cmap=plt.cm.gray,
                       interpolation='nearest',
                       vmin=-vmax,
                       vmax=vmax)
            plt.xticks(())
            plt.yticks(())
        plt.subplots_adjust(0.01, 0.05, 0.99, 0.93, 0.04, 0.)

    estimators = [
        ('Eigenfaces - PCA using randomized SVD',
         decomposition.PCA(n_components=n_components,
                           svd_solver='randomized',
                           whiten=True), True),
        ('Non-negative components - NMF',
         decomposition.NMF(n_components=n_components, init='nndsvda',
                           tol=5e-3), False),
        ('Independent components - FastICA',
         decomposition.FastICA(n_components=n_components, whiten=True), True),
        ('Sparse comp. - MiniBatchSparsePCA',
         decomposition.MiniBatchSparsePCA(n_components=n_components,
                                          alpha=0.8,
                                          n_iter=100,
                                          batch_size=3,
                                          random_state=rng), True),
        ('MiniBatchDictionaryLearning',
         decomposition.MiniBatchDictionaryLearning(n_components=15,
                                                   alpha=0.1,
                                                   n_iter=50,
                                                   batch_size=3,
                                                   random_state=rng), True),
        ('Cluster centers - MiniBatchKMeans',
         MiniBatchKMeans(n_clusters=n_components,
                         tol=1e-3,
                         batch_size=20,
                         max_iter=50,
                         random_state=rng), True),
        ('Factor Analysis components - FA',
         decomposition.FactorAnalysis(n_components=n_components,
                                      max_iter=2), True),
    ]

    # #############################################################################
    # Plot a sample of the input data

    plot_gallery("First centered Olivetti faces",
                 faces_centered[:n_components])

    # #############################################################################
    # Do the estimation and plot it

    for name, estimator, center in estimators:
        print("Extracting the top %d %s..." % (n_components, name))
        t0 = time()
        data = faces
        if center:
            data = faces_centered
        estimator.fit(data)
        train_time = (time() - t0)
        print("done in %0.3fs" % train_time)
        if hasattr(estimator, 'cluster_centers_'):
            components_ = estimator.cluster_centers_
        else:
            components_ = estimator.components_

        # Plot an image representing the pixelwise variance provided by the
        # estimator e.g its noise_variance_ attribute. The Eigenfaces estimator,
        # via the PCA decomposition, also provides a scalar noise_variance_
        # (the mean of pixelwise variance) that cannot be displayed as an image
        # so we skip it.
        if (hasattr(estimator, 'noise_variance_')
                and estimator.noise_variance_.ndim >
                0):  # Skip the Eigenfaces case
            plot_gallery("Pixelwise variance",
                         estimator.noise_variance_.reshape(1, -1),
                         n_col=1,
                         n_row=1)
        plot_gallery('%s - Train time %.1fs' % (name, train_time),
                     components_[:n_components])

    plt.show()
def plot_faces_decomposition():
    # Display progress logs on stdout
    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s %(levelname)s %(message)s')
    n_row, n_col = 2, 3
    n_components = n_row * n_col
    image_shape = (64, 64)
    rng = RandomState(0)

    # #############################################################################
    # Load faces data
    faces, _ = fetch_olivetti_faces(return_X_y=True, shuffle=True,
                                    random_state=rng)
    n_samples, n_features = faces.shape

    # global centering
    faces_centered = faces - faces.mean(axis=0)

    # local centering
    faces_centered -= faces_centered.mean(axis=1).reshape(n_samples, -1)

    print("Dataset consists of %d faces" % n_samples)

    def plot_gallery(title, images, n_col=n_col, n_row=n_row, cmap=plt.cm.gray):
        plt.figure(figsize=(2. * n_col, 2.26 * n_row))
        plt.suptitle(title, size=16)
        for i, comp in enumerate(images):
            plt.subplot(n_row, n_col, i + 1)
            vmax = max(comp.max(), -comp.min())
            plt.imshow(comp.reshape(image_shape), cmap=cmap,
                       interpolation='nearest',
                       vmin=-vmax, vmax=vmax)
            plt.xticks(())
            plt.yticks(())
        plt.subplots_adjust(0.01, 0.05, 0.99, 0.93, 0.04, 0.)

    # #############################################################################
    # List of the different estimators, whether to center and transpose the
    # problem, and whether the transformer uses the clustering API.
    estimators = [
        ('Eigenfaces - PCA using randomized SVD',
         decomposition.PCA(n_components=n_components, svd_solver='randomized',
                           whiten=True),
         True),

        ('Non-negative components - NMF',
         decomposition.NMF(n_components=n_components, init='nndsvda', tol=5e-3),
         False),

        ('Independent components - FastICA',
         decomposition.FastICA(n_components=n_components, whiten=True),
         True),

        ('Sparse comp. - MiniBatchSparsePCA',
         decomposition.MiniBatchSparsePCA(n_components=n_components, alpha=0.8,
                                          n_iter=100, batch_size=3,
                                          random_state=rng),
         True),

        ('MiniBatchDictionaryLearning',
         decomposition.MiniBatchDictionaryLearning(n_components=15, alpha=0.1,
                                                   n_iter=50, batch_size=3,
                                                   random_state=rng),
         True),

        ('Cluster centers - MiniBatchKMeans',
         MiniBatchKMeans(n_clusters=n_components, tol=1e-3, batch_size=20,
                         max_iter=50, random_state=rng),
         True),

        ('Factor Analysis components - FA',
         decomposition.FactorAnalysis(n_components=n_components, max_iter=20),
         True),
    ]

    # #############################################################################
    # Plot a sample of the input data

    plot_gallery("First centered Olivetti faces", faces_centered[:n_components])

    # #############################################################################
    # Do the estimation and plot it

    for name, estimator, center in estimators:
        print("Extracting the top %d %s..." % (n_components, name))
        t0 = time()
        data = faces
        if center:
            data = faces_centered
        estimator.fit(data)
        train_time = (time() - t0)
        print("done in %0.3fs" % train_time)
        if hasattr(estimator, 'cluster_centers_'):
            components_ = estimator.cluster_centers_
        else:
            components_ = estimator.components_

        # Plot an image representing the pixelwise variance provided by the
        # estimator e.g its noise_variance_ attribute. The Eigenfaces estimator,
        # via the PCA decomposition, also provides a scalar noise_variance_
        # (the mean of pixelwise variance) that cannot be displayed as an image
        # so we skip it.
        if (hasattr(estimator, 'noise_variance_') and
                estimator.noise_variance_.ndim > 0):  # Skip the Eigenfaces case
            plot_gallery("Pixelwise variance",
                         estimator.noise_variance_.reshape(1, -1), n_col=1,
                         n_row=1)
        plot_gallery('%s - Train time %.1fs' % (name, train_time),
                     components_[:n_components])

    plt.show()

    # #############################################################################
    # Various positivity constraints applied to dictionary learning.
    estimators = [
        ('Dictionary learning',
         decomposition.MiniBatchDictionaryLearning(n_components=15, alpha=0.1,
                                                   n_iter=50, batch_size=3,
                                                   random_state=rng),
         True),
        ('Dictionary learning - positive dictionary',
         decomposition.MiniBatchDictionaryLearning(n_components=15, alpha=0.1,
                                                   n_iter=50, batch_size=3,
                                                   random_state=rng,
                                                   positive_dict=True),
         True),
        ('Dictionary learning - positive code',
         decomposition.MiniBatchDictionaryLearning(n_components=15, alpha=0.1,
                                                   n_iter=50, batch_size=3,
                                                   fit_algorithm='cd',
                                                   random_state=rng,
                                                   positive_code=True),
         True),
        ('Dictionary learning - positive dictionary & code',
         decomposition.MiniBatchDictionaryLearning(n_components=15, alpha=0.1,
                                                   n_iter=50, batch_size=3,
                                                   fit_algorithm='cd',
                                                   random_state=rng,
                                                   positive_dict=True,
                                                   positive_code=True),
         True),
    ]

    # #############################################################################
    # Plot a sample of the input data

    plot_gallery("First centered Olivetti faces", faces_centered[:n_components],
                 cmap=plt.cm.RdBu)

    # #############################################################################
    # Do the estimation and plot it

    for name, estimator, center in estimators:
        print("Extracting the top %d %s..." % (n_components, name))
        t0 = time()
        data = faces
        if center:
            data = faces_centered
        estimator.fit(data)
        train_time = (time() - t0)
        print("done in %0.3fs" % train_time)
        components_ = estimator.components_
        plot_gallery(name, components_[:n_components], cmap=plt.cm.RdBu)

    plt.show()
ica_estimator.fit(faces_centered)
plot_gallery("Independent components - FastICA",
             ica_estimator.components_[:n_components])

# %%
# Sparse components - MiniBatchSparsePCA
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
#
# Mini-batch sparse PCA (`MiniBatchSparsePCA`) extracts the set of sparse
# components that best reconstruct the data. This variant is faster but
# less accurate than the similar :py:mod:`sklearn.decomposition.SparsePCA`.

# %%
batch_pca_estimator = decomposition.MiniBatchSparsePCA(
    n_components=n_components,
    alpha=0.1,
    n_iter=100,
    batch_size=3,
    random_state=rng)
batch_pca_estimator.fit(faces_centered)
plot_gallery(
    "Sparse components - MiniBatchSparsePCA",
    batch_pca_estimator.components_[:n_components],
)

# %%
# Dictionary learning
# ^^^^^^^^^^^^^^^^^^^
#
# By default, :class:`MiniBatchDictionaryLearning` divides the data into
# mini-batches and optimizes in an online manner by cycling over the
# mini-batches for the specified number of iterations.