Exemplo n.º 1
0
def plot_fast_vs_slow():
    data = load_iris()

    _, axes = plt.subplots(nrows=2, figsize=(9,9))

    for idx, fast in enumerate((False, True)):
        title = "Fast Parallel Coordinates" if fast else "Standard Parallel Coordinates"
        oz = ParallelCoordinates(ax=axes[idx], fast=fast, title=title)
        oz.fit_transform(data.data, data.target)
        oz.finalize()

    plt.tight_layout()
    plt.savefig("images/fast_vs_slow_parallel_coordinates.png")
Exemplo n.º 2
0
def plot_fast_vs_slow():
    data = load_iris()

    _, axes = plt.subplots(nrows=2, figsize=(9,9))

    for idx, fast in enumerate((False, True)):
        title = "Fast Parallel Coordinates" if fast else "Standard Parallel Coordinates"
        oz = ParallelCoordinates(ax=axes[idx], fast=fast, title=title)
        oz.fit_transform(data.data, data.target)
        oz.finalize()

    plt.tight_layout()
    plt.savefig("images/fast_vs_slow_parallel_coordinates.png")
		plt.xlabel('number of components')
		plt.ylabel('variance (%)')
		plt.title(label + ": Explained Variance by Number of Components")
		plt.savefig(path.join(PLOT_DIR, abbrev + "_pca_variance.png"), bbox_inches='tight')
		plt.show()
		plt.close()

		# save as new set of features
		pca = PCA(n_components=n_components, svd_solver='full', random_state=SEED)
		start_time = time.perf_counter()
		df = pd.DataFrame(pca.fit_transform(X))
		run_time = time.perf_counter() - start_time
		print(label + ": run time = " + str(run_time))
		df.to_pickle(path.join(PKL_DIR, abbrev + "_pca.pickle"))

		# parallel coordinates plot
		visualizer = ParallelCoordinates(sample=0.2, shuffle=True, fast=True)
		visualizer.fit_transform(df, y)
		visualizer.ax.set_xticklabels(visualizer.ax.get_xticklabels(), rotation=45, horizontalalignment='right')
		visualizer.finalize()
		plt.savefig(path.join(PLOT_DIR, abbrev + "_pca_parallel.png"), bbox_inches='tight')
		visualizer.show()
		plt.close()

		# output reconstruction error
		recon_err = get_reconstruction_error_invertable(X, df, pca)
		print(label + ": reconstruction error = " + str(recon_err))

		# distribution of eigenvalues
		print(label + ": eigenvalues?", pca.components_)