示例#1
0
def pcoords(X, y, outpath, **kwargs):
    # Create a new figure and axes
    _, ax = plt.subplots()

    # Create the visualizer
    visualizer = ParallelCoordinates(ax=ax, **kwargs)
    visualizer.fit_transform(X, y)

    # Save to disk
    visualizer.poof(outpath=outpath)
示例#2
0
def pcoords(X, y, outpath, **kwargs):
    # Create a new figure and axes
    _, ax = plt.subplots()

    # Create the visualizer
    visualizer = ParallelCoordinates(ax=ax, **kwargs)
    visualizer.fit_transform(X, y)

    # Save to disk
    visualizer.poof(outpath=outpath)
示例#3
0
    def pcoords_time(X, y, fast=True):
        _, ax = plt.subplots()
        oz = ParallelCoordinates(fast=fast, ax=ax)

        start = time.time()
        oz.fit_transform(X, y)
        delta = time.time() - start

        plt.cla()        # clear current axis
        plt.clf()        # clear current figure
        plt.close("all") # close all existing plots

        return delta
示例#4
0
def plot_fast_vs_slow():
    data = load_iris()

    _, axes = plt.subplots(nrows=2, figsize=(9,9))

    for idx, fast in enumerate((False, True)):
        title = "Fast Parallel Coordinates" if fast else "Standard Parallel Coordinates"
        oz = ParallelCoordinates(ax=axes[idx], fast=fast, title=title)
        oz.fit_transform(data.data, data.target)
        oz.finalize()

    plt.tight_layout()
    plt.savefig("images/fast_vs_slow_parallel_coordinates.png")
示例#5
0
    def pcoords_time(X, y, fast=True):
        _, ax = plt.subplots()
        oz = ParallelCoordinates(fast=fast, ax=ax)

        start = time.time()
        oz.fit_transform(X, y)
        delta = time.time() - start

        plt.cla()        # clear current axis
        plt.clf()        # clear current figure
        plt.close("all") # close all existing plots

        return delta
示例#6
0
def plot_fast_vs_slow():
    data = load_iris()

    _, axes = plt.subplots(nrows=2, figsize=(9,9))

    for idx, fast in enumerate((False, True)):
        title = "Fast Parallel Coordinates" if fast else "Standard Parallel Coordinates"
        oz = ParallelCoordinates(ax=axes[idx], fast=fast, title=title)
        oz.fit_transform(data.data, data.target)
        oz.finalize()

    plt.tight_layout()
    plt.savefig("images/fast_vs_slow_parallel_coordinates.png")
示例#7
0
X, y = tf.create_X_y()

mask = IsolationForest(contamination=0.15).fit_predict(X[column].to_frame(),
                                                       y) == 1
new_X = X[mask]
new_y = y[mask]

X_scaled = StandardScaler().fit_transform(new_X)

target_names = ['f', 's']

visualizer = ParallelCoordinates(classes=target_names,
                                 features=list(X.columns),
                                 sample=0.5,
                                 shuffle=True)
visualizer.fit_transform(X_scaled, new_y)
visualizer.show()

# %%
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA

X, y = tf.cats_to_one_hot(columns=[]).create_X_y()

X_scaled = StandardScaler().fit_transform(X)

X = X.values

pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)
示例#8
0
def pcoords():
    X, y = load_occupancy()
    oz = ParallelCoordinates(sample=0.05, shuffle=True, ax=newfig())
    oz.fit_transform(X, y)
    savefig(oz, "parallel_coordinates")
		plt.xlabel('number of components')
		plt.ylabel('variance (%)')
		plt.title(label + ": Explained Variance by Number of Components")
		plt.savefig(path.join(PLOT_DIR, abbrev + "_pca_variance.png"), bbox_inches='tight')
		plt.show()
		plt.close()

		# save as new set of features
		pca = PCA(n_components=n_components, svd_solver='full', random_state=SEED)
		start_time = time.perf_counter()
		df = pd.DataFrame(pca.fit_transform(X))
		run_time = time.perf_counter() - start_time
		print(label + ": run time = " + str(run_time))
		df.to_pickle(path.join(PKL_DIR, abbrev + "_pca.pickle"))

		# parallel coordinates plot
		visualizer = ParallelCoordinates(sample=0.2, shuffle=True, fast=True)
		visualizer.fit_transform(df, y)
		visualizer.ax.set_xticklabels(visualizer.ax.get_xticklabels(), rotation=45, horizontalalignment='right')
		visualizer.finalize()
		plt.savefig(path.join(PLOT_DIR, abbrev + "_pca_parallel.png"), bbox_inches='tight')
		visualizer.show()
		plt.close()

		# output reconstruction error
		recon_err = get_reconstruction_error_invertable(X, df, pca)
		print(label + ": reconstruction error = " + str(recon_err))

		# distribution of eigenvalues
		print(label + ": eigenvalues?", pca.components_)
    sns.set(style="ticks")
    grid = sns.pairplot(df, hue="cluster", vars=feature_names)
    plt.subplots_adjust(top=0.96)
    grid.fig.suptitle(label + ": K-means k=" + str(best_k))
    plt.savefig(path.join(PLOT_DIR, abbrev + "_em_scatter.png"),
                bbox_inches='tight')
    plt.show()
    plt.close()

    # parallel coordinates plot
    print("# Parallel Coordinates Plot for " + label)
    visualizer = ParallelCoordinates(features=feature_names,
                                     sample=0.1,
                                     shuffle=True,
                                     fast=True)
    visualizer.fit_transform(X, y_pred)
    visualizer.ax.set_xticklabels(visualizer.ax.get_xticklabels(),
                                  rotation=45,
                                  horizontalalignment='right')
    visualizer.finalize()
    plt.savefig(path.join(PLOT_DIR, abbrev + "_em_parallel.png"),
                bbox_inches='tight')
    visualizer.show()
    plt.close()

    # compare with ground truth (classes)
    print(label + ": Homogeneity Score = " +
          str(metrics.homogeneity_score(y, y_pred)))
    print(label + ": V Measure Score = " +
          str(metrics.v_measure_score(y, y_pred)))
    print(label + ": Mutual Info Score = " +