Пример #1
0
    top_kurt_ind = (-kurt).argsort()

    # IC
    examine_credit_cluster(
        X_ica[:, top_kurt_ind[:2]],
        y,
        title=f"ICA transformation on {data_key} data",
        xylabel=["IC1", "IC2"],
        fname=f"output/ica_ic_cluster_{data_key}.png",
    )

    if data_key == "fashion":
        # IC
        plot_fashion_cluster(
            ica.components_[top_kurt_ind[:25], :],
            range(25),
            fname="output/ica_ic_fashion.png",
        )

        # reconstructed image
        X_recon = ica.inverse_transform(X_ica)
        plot_fashion_cluster(X_recon,
                             y,
                             fname="output/ica_reconstructed_fashion.png")

        # recon vs k
        sample = []
        for nc in range(2, 201, 8):
            print(f"Reconstructing with {nc} PC...")
            ica = FastICA(n_components=nc, whiten=True, random_state=0)
            X_recon = ica.inverse_transform(ica.fit_transform(X))
Пример #2
0
    plt.savefig(f"output/pca_eigenval_{data_key}.png")
    plt.close()

    # PC
    examine_credit_cluster(
        X_pca[:, :2],
        y,
        title=f"PCA transformation on {data_key} data",
        xylabel=["PC1", "PC2"],
        fname=f"output/pca_pc_cluster_{data_key}.png",
    )

    if data_key == "fashion":
        # PC image
        plot_fashion_cluster(
            pca.components_[:25, :], range(25), fname="output/pca_pc_fashion.png"
        )

        # reconstructed image
        pca = PCA(n_components=0.95, whiten=True, random_state=0)
        X_recon = pca.inverse_transform(pca.fit_transform(X))
        print(f"...Keep {pca.n_components_} components for {data_key} data...")
        plot_fashion_cluster(X_recon, y, fname="output/pca_reconstructed_fashion.png")

        # recon vs k
        sample = []
        for nc in range(2, 201, 8):
            print(f"Reconstructing with {nc} PC...")
            pca = PCA(n_components=nc, whiten=True, random_state=0)
            X_recon = pca.inverse_transform(pca.fit_transform(X))
            sample.append(X_recon[0])
Пример #3
0
    k_labels_df.to_csv(f"data/kmeans_labels_{data_key}.csv", index=False)
    res_df.plot(
        subplots=True,
        style=".-",
        title=f"KMeans performance vs n_clusters on {data_key} data",
    )
    plt.xlabel("n_clusters")
    plt.savefig(f"output/kmeans_{data_key}.png")
    plt.close()

    if "credit" in data_key:
        examine_credit_cluster(
            X.values[:, TOP_FEATURES[:2]],
            y,
            title="True Label",
            xylabel=DATA["credit"][0].columns[TOP_FEATURES[:2]],
            fname=f"output/true_cluster_{data_key}.png",
        )
        examine_credit_cluster(
            X.values[:, TOP_FEATURES[:2]],
            k_labels_df[y.nunique()],
            title="KMeans",
            xylabel=DATA["credit"][0].columns[TOP_FEATURES[:2]],
            fname=f"output/kmeans_cluster_{data_key}.png",
        )

    if data_key == "fashion":
        plot_fashion_cluster(X,
                             k_labels_df[len(np.unique(y))],
                             fname="output/kmeans_cluster_fashion.png")
Пример #4
0
        title=f"RCA transformation on {data_key} data",
        xylabel=["RC1", "RC2"],
        fname=f"output/rca_rc_cluster_{data_key}.png",
    )

    if data_key == "fashion":
        # recon vs k
        sample = []
        for nc in np.linspace(2, X.shape[1], 25).astype(int):
            print(f"Reconstructing with {nc} RC...")
            rca = GaussianRandomProjection(n_components=nc, random_state=0)
            X_rca = rca.fit_transform(X)
            X_recon = np.dot(X_rca, np.linalg.pinv(rca.components_.T))
            sample.append(X_recon[0])
        plot_fashion_cluster(np.array(sample),
                             range(25),
                             fname="output/rca_recon_vs_k_fashion.png")
    elif data_key == "credit":
        corr_mean = []
        corr_std = []
        for nc in range(1, X.shape[1]):
            print(f"Reconstructing with {nc} RC...")
            corr = []
            for i in range(100):
                rca = GaussianRandomProjection(n_components=nc)
                X_rca = rca.fit_transform(X)
                X_recon = np.dot(X_rca, np.linalg.pinv(rca.components_.T))
                corr.append(
                    np.corrcoef(X.values.flatten(), X_recon.flatten())[0, 1])
            corr_mean.append(np.mean(corr))
            corr_std.append(np.std(corr))