def pcoords(X, y, outpath, **kwargs): # Create a new figure and axes _, ax = plt.subplots() # Create the visualizer visualizer = ParallelCoordinates(ax=ax, **kwargs) visualizer.fit_transform(X, y) # Save to disk visualizer.poof(outpath=outpath)
def pcoords_time(X, y, fast=True): _, ax = plt.subplots() oz = ParallelCoordinates(fast=fast, ax=ax) start = time.time() oz.fit_transform(X, y) delta = time.time() - start plt.cla() # clear current axis plt.clf() # clear current figure plt.close("all") # close all existing plots return delta
def plot_fast_vs_slow(): data = load_iris() _, axes = plt.subplots(nrows=2, figsize=(9,9)) for idx, fast in enumerate((False, True)): title = "Fast Parallel Coordinates" if fast else "Standard Parallel Coordinates" oz = ParallelCoordinates(ax=axes[idx], fast=fast, title=title) oz.fit_transform(data.data, data.target) oz.finalize() plt.tight_layout() plt.savefig("images/fast_vs_slow_parallel_coordinates.png")
X, y = tf.create_X_y() mask = IsolationForest(contamination=0.15).fit_predict(X[column].to_frame(), y) == 1 new_X = X[mask] new_y = y[mask] X_scaled = StandardScaler().fit_transform(new_X) target_names = ['f', 's'] visualizer = ParallelCoordinates(classes=target_names, features=list(X.columns), sample=0.5, shuffle=True) visualizer.fit_transform(X_scaled, new_y) visualizer.show() # %% from sklearn.manifold import TSNE from sklearn.decomposition import PCA X, y = tf.cats_to_one_hot(columns=[]).create_X_y() X_scaled = StandardScaler().fit_transform(X) X = X.values pca = PCA(n_components=2) X_pca = pca.fit_transform(X_scaled)
def pcoords(): X, y = load_occupancy() oz = ParallelCoordinates(sample=0.05, shuffle=True, ax=newfig()) oz.fit_transform(X, y) savefig(oz, "parallel_coordinates")
plt.xlabel('number of components') plt.ylabel('variance (%)') plt.title(label + ": Explained Variance by Number of Components") plt.savefig(path.join(PLOT_DIR, abbrev + "_pca_variance.png"), bbox_inches='tight') plt.show() plt.close() # save as new set of features pca = PCA(n_components=n_components, svd_solver='full', random_state=SEED) start_time = time.perf_counter() df = pd.DataFrame(pca.fit_transform(X)) run_time = time.perf_counter() - start_time print(label + ": run time = " + str(run_time)) df.to_pickle(path.join(PKL_DIR, abbrev + "_pca.pickle")) # parallel coordinates plot visualizer = ParallelCoordinates(sample=0.2, shuffle=True, fast=True) visualizer.fit_transform(df, y) visualizer.ax.set_xticklabels(visualizer.ax.get_xticklabels(), rotation=45, horizontalalignment='right') visualizer.finalize() plt.savefig(path.join(PLOT_DIR, abbrev + "_pca_parallel.png"), bbox_inches='tight') visualizer.show() plt.close() # output reconstruction error recon_err = get_reconstruction_error_invertable(X, df, pca) print(label + ": reconstruction error = " + str(recon_err)) # distribution of eigenvalues print(label + ": eigenvalues?", pca.components_)
sns.set(style="ticks") grid = sns.pairplot(df, hue="cluster", vars=feature_names) plt.subplots_adjust(top=0.96) grid.fig.suptitle(label + ": K-means k=" + str(best_k)) plt.savefig(path.join(PLOT_DIR, abbrev + "_em_scatter.png"), bbox_inches='tight') plt.show() plt.close() # parallel coordinates plot print("# Parallel Coordinates Plot for " + label) visualizer = ParallelCoordinates(features=feature_names, sample=0.1, shuffle=True, fast=True) visualizer.fit_transform(X, y_pred) visualizer.ax.set_xticklabels(visualizer.ax.get_xticklabels(), rotation=45, horizontalalignment='right') visualizer.finalize() plt.savefig(path.join(PLOT_DIR, abbrev + "_em_parallel.png"), bbox_inches='tight') visualizer.show() plt.close() # compare with ground truth (classes) print(label + ": Homogeneity Score = " + str(metrics.homogeneity_score(y, y_pred))) print(label + ": V Measure Score = " + str(metrics.v_measure_score(y, y_pred))) print(label + ": Mutual Info Score = " +