Exemplo n.º 1
0
def optimize_components(X, feature_names, label, abbrev):
    # model selection (optimal number of components)
    # from https://scikit-learn.org/stable/auto_examples/decomposition/plot_pca_vs_fa_model_selection.html

    # prepare explained variance scorer
    def get_score(model, data, scorer=explained_variance_score):
        prediction = model.inverse_transform(model.transform(data))
        return scorer(data, prediction)

    # choose number of components by explained variance
    n_components = np.arange(1, len(feature_names) + 1)
    nmf = NMF(random_state=SEED)
    nmf_scores = []
    for n in n_components:
        nmf.n_components = n
        nmf.fit(X)
        nmf_scores.append(get_score(nmf, X))
    nmf_scores = np.array(nmf_scores)
    n_components_nmf = n_components[np.argmax(nmf_scores >= 0.95)]
    print(label + ": best n_components by explained variance > 0.95 = %d" %
          int(n_components_nmf))

    # create plot
    plt.figure()
    plt.plot(n_components,
             nmf_scores,
             'b',
             label='explained variance by num/components')
    plt.axvline(n_components_nmf,
                color='b',
                label='chosen number of components: %d' % n_components_nmf,
                linestyle='--')

    # format plot
    ax = plt.gca()
    ax.xaxis.set_major_locator(MaxNLocator(integer=True))
    plt.xlabel('number of components')
    plt.ylabel('explained variance')
    plt.legend(loc='lower right')
    plt.title(label + ": NMF model selection")
    plt.savefig(path.join(PLOT_DIR, abbrev + "_nmf_components.png"),
                bbox_inches='tight')
    plt.show()
    plt.close()

    return n_components_nmf
Exemplo n.º 2
0
def compute_scores(X):
    pca = PCA(svd_solver='auto')
    kpca = KernelPCA(fit_inverse_transform=True)
    ica = FastICA()
    nmf = NMF(init='nndsvda')
    pca_scores, ica_scores, nmf_scores, kpca_scores = [], [], [], []
    for n in n_components:
        pca.n_components = n
        ica.n_components = n
        nmf.n_components = n
        kpca.n_components = n
        print(n)

        Xpca = pca.inverse_transform(pca.fit_transform(Xs))
        pca_scores.append(explained_variance_score(Xs, Xpca))
        Xica = ica.inverse_transform(ica.fit_transform(Xs))
        ica_scores.append(explained_variance_score(Xs, Xica))
        Xkpca = kpca.inverse_transform(kpca.fit_transform(Xs))
        kpca_scores.append(explained_variance_score(Xs, Xkpca))

        Xnmf = nmf.inverse_transform(nmf.fit_transform(X))
        nmf_scores.append(explained_variance_score(X, Xnmf))

    return pca_scores, ica_scores, nmf_scores, kpca_scores