# Infer trajectory #### # run topslam from sklearn.manifold import TSNE, LocallyLinearEmbedding, SpectralEmbedding, Isomap from sklearn.decomposition import FastICA, PCA n_components = p["n_components"] methods = { 't-SNE': TSNE(n_components=n_components), 'PCA': PCA(n_components=n_components), 'Spectral': SpectralEmbedding(n_components=n_components, n_neighbors=p["n_neighbors"]), 'Isomap': Isomap(n_components=n_components, n_neighbors=p["n_neighbors"]), 'ICA': FastICA(n_components=n_components) } method_names = sorted(methods.keys()) method_names_selected = [ method_names[i] for i, selected in enumerate(p["dimreds"]) if selected ] methods = { method_name: method for method_name, method in methods.items() if method_name in method_names_selected } # dimensionality reduction X_init, dims = run_methods(expression, methods)
y # We see here that there are 1,797 samples and 64 features. # ### Unsupervised learning: Dimensionality reduction # # We'd like to visualize our points within the 64-dimensional parameter space # - it's difficult to effectively visualize points in such a high-dimensional space. # - Instead we'll reduce the dimensions to 2, using an unsupervised method. # # Here, we'll make use of a manifold learning algorithm called *Isomap* (see **In-Depth: Manifold Learning**), and transform the data to two dimensions: # In[32]: from sklearn.manifold import Isomap iso = Isomap(n_components=2) iso.fit(digits.data) data_projected = iso.transform(digits.data) data_projected.shape # We see that the projected data is now two-dimensional. # Let's plot this data to see if we can learn anything from its structure: # In[112]: plt.scatter(data_projected[:, 0], data_projected[:, 1], c=digits.target, edgecolor='none', alpha=0.5, cmap=plt.cm.get_cmap('nipy_spectral', 10))
ax.text(0.05, 0.05, str(digits.target[i]), transform=ax.transAxes, color='green') X = digits.data print(X.shape) # представляем массив пиксело длиной 64 элемента y = digits.target print(y.shape) # Итого получили 1797 выборок и 64 признака # 1. Обучение без учителя: понижение размерности # Преобразуем данные в двумерный вид from sklearn.manifold import Isomap # Алгоритм обучения на базе многообразий iso = Isomap(n_components=2) # Понижение количества измерений до 2 iso.fit(digits.data) data_projected = iso.transform(digits.data) print(data_projected.shape) # Посторим график данных plt.scatter(data_projected[:, 0], data_projected[:, 1], c=digits.target, edgecolors='none', alpha=0.5, cmap=plt.cm.get_cmap("Spectral", 10)) plt.colorbar(label='digit label', ticks=range(10)) plt.clim(-0.5, 9.5) # 2. Классификация цифр
def embedDistanceMatrix(dmatDf, method='kpca', n_components=2, **kwargs): """Two-dimensional embedding of sequence distances in dmatDf, returning Nx2 x,y-coords: tsne, isomap, pca, mds, kpca, sklearn-tsne""" if isinstance(dmatDf, pd.DataFrame): dmat = dmatDf.values else: dmat = dmatDf if method == 'isomap': isoObj = Isomap(n_neighbors=10, n_components=n_components) xy = isoObj.fit_transform(dmat) elif method == 'mds': mds = MDS(n_components=n_components, max_iter=3000, eps=1e-9, random_state=15, dissimilarity="precomputed", n_jobs=1) xy = mds.fit(dmat).embedding_ rot = PCA(n_components=n_components) xy = rot.fit_transform(xy) elif method == 'pca': pcaObj = PCA(n_components=None) xy = pcaObj.fit_transform(dmat)[:, :n_components] elif method == 'kpca': pcaObj = KernelPCA(n_components=dmat.shape[0], kernel='precomputed', eigen_solver='dense') try: gram = dist2kernel(dmat) except: print( 'Could not convert dmat to kernel for KernelPCA; using 1 - dmat/dmat.max() instead' ) gram = 1 - dmat / dmat.max() xy = pcaObj.fit_transform(gram)[:, :n_components] elif method == 'lle': lle = LocallyLinearEmbedding(n_neighbors=30, n_components=n_components, method='standard') xy = lle.fit_transform(dist) elif method == 'sklearn-tsne': tsneObj = TSNE(n_components=n_components, metric='precomputed', random_state=0, perplexity=kwargs['perplexity']) xy = tsneObj.fit_transform(dmat) else: print(('Method unknown: %s' % method)) return assert xy.shape[0] == dmatDf.shape[0] xyDf = pd.DataFrame(xy[:, :n_components], index=dmatDf.index, columns=np.arange(n_components)) if method == 'kpca': """Not sure how negative eigenvalues should be handled here, but they are usually small so it shouldn't make a big difference""" xyDf.explained_variance_ = pcaObj.lambdas_[:n_components] / pcaObj.lambdas_[ pcaObj.lambdas_ > 0].sum() return xyDf
return data_n def scatter_3d(X, y): fig = plt.figure(figsize=(6, 5)) ax = fig.add_subplot(111, projection='3d') ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=y, cmap=plt.cm.hot) ax.view_init(10, -70) ax.set_xlabel("$x_1$", fontsize=18) ax.set_ylabel("$x_2$", fontsize=18) ax.set_zlabel("$x_3$", fontsize=18) plt.show() if __name__ == '__main__': X, Y = make_s_curve(n_samples=500, noise=0.1, random_state=42) data_1 = my_Isomap(X, 2, 10) data_2 = Isomap(n_neighbors=10, n_components=2).fit_transform(X) plt.figure(figsize=(8, 4)) plt.subplot(121) plt.title("my_Isomap") plt.scatter(data_1[:, 0], data_1[:, 1], c=Y) plt.subplot(122) plt.title("sklearn_Isomap") plt.scatter(data_2[:, 0], data_2[:, 1], c=Y) plt.savefig("Isomap1.png") plt.show()
def makeRingManifold(spikes, ep, angle, bin_size=200): """ spikes : dict of hd spikes ep : epoch to restrict angle : tsd of angular direction bin_size : in ms """ neurons = np.sort(list(spikes.keys())) inputs = [] angles = [] sizes = [] bins = np.arange( ep.as_units('ms').start.iloc[0], ep.as_units('ms').end.iloc[0] + bin_size, bin_size) spike_counts = pd.DataFrame(index=bins[0:-1] + np.diff(bins) / 2, columns=neurons) for i in neurons: spks = spikes[i].as_units('ms').index.values spike_counts[i], _ = np.histogram(spks, bins) rates = np.sqrt(spike_counts / (bin_size)) epi = nts.IntervalSet(ep.loc[0, 'start'], ep.loc[0, 'end']) angle2 = angle.restrict(epi) newangle = pd.Series(index=np.arange(len(bins) - 1)) tmp = angle2.groupby( np.digitize(angle2.as_units('ms').index.values, bins) - 1).mean() tmp = tmp.loc[np.arange(len(bins) - 1)] newangle.loc[tmp.index] = tmp newangle.index = pd.Index(bins[0:-1] + np.diff(bins) / 2.) tmp = rates.rolling(window=200, win_type='gaussian', center=True, min_periods=1, axis=0).mean(std=2).values sizes.append(len(tmp)) inputs.append(tmp) angles.append(newangle) inputs = np.vstack(inputs) imap = Isomap(n_neighbors=20, n_components=2, n_jobs=-1).fit_transform(inputs) H = newangle.values / (2 * np.pi) HSV = np.vstack((H, np.ones_like(H), np.ones_like(H))).T RGB = hsv_to_rgb(HSV) fig, ax = subplots() ax = subplot(111) ax.set_aspect(aspect=1) ax.scatter(imap[:, 0], imap[:, 1], c=RGB, marker='o', alpha=0.5, zorder=2, linewidth=0, s=40) # hsv display_axes = fig.add_axes([0.2, 0.25, 0.05, 0.1], projection='polar') colormap = plt.get_cmap('hsv') norm = mpl.colors.Normalize(0.0, 2 * np.pi) xval = np.arange(0, 2 * pi, 0.01) yval = np.ones_like(xval) display_axes.scatter(xval, yval, c=xval, s=20, cmap=colormap, norm=norm, linewidths=0, alpha=0.8) display_axes.set_yticks([]) display_axes.set_xticks(np.arange(0, 2 * np.pi, np.pi / 2)) display_axes.grid(False) show() return imap, bins[0:-1] + np.diff(bins) / 2
def isomap(X=None,W=None,num_vecs=None,k=None): embedder = Isomap(n_neighbors=k, n_components=num_vecs) return embedder.fit_transform(X)
fa_projected_data = FactorAnalysis( n_components=PROJECTED_DIMENSIONS).fit_transform(neural_data) color_3D_projection(fa_projected_data, variable_data, 'FA; ' + Title) # ICA ICA_projected_data = FastICA( n_components=PROJECTED_DIMENSIONS).fit_transform(neural_data) color_3D_projection(ICA_projected_data, variable_data, 'ICA; ' + Title) # Isomap N_NEIGHBORS = 30 Isomap_projected_data = Isomap( n_components=PROJECTED_DIMENSIONS, n_neighbors=N_NEIGHBORS).fit_transform(neural_data) color_3D_projection(Isomap_projected_data, variable_data, 'Isomap; ' + Title) # tSNE PERPLEXITY = 30 # normally ranges 5-50 TSNE_projected_data = TSNE( n_components=PROJECTED_DIMENSIONS, perplexity=PERPLEXITY).fit_transform(neural_data) color_3D_projection(TSNE_projected_data, variable_data, 'tSNE; ' + Title) # Multidimensional scaling MDS_projected_data = MDS(
plt.ylabel("MLLE2") # KERNEL PRINCIPAL COMPONENT ANALYSIS (KPCA) print("Performing Kernel Principal Component Analysis (KPCA) ...") plt.subplot(333) kpca = KernelPCA(n_components=2, kernel='cosine').fit_transform(X) plt.scatter(kpca[:, 0], kpca[:, 1], c=Y, cmap='viridis', s=1) plt.title('Kernel PCA') #plt.colorbar() plt.xlabel("KPCA1") plt.ylabel("KPCA2") # ISOMAP print("Performing Isomap Plotting ...") plt.subplot(334) model = Isomap(n_components=2) isomap = model.fit_transform(X) plt.scatter(isomap[:, 0], isomap[:, 1], c=Y, cmap='viridis', s=1) plt.title('Isomap') #plt.colorbar() plt.xlabel("ISO1") plt.ylabel("ISO2") # LAPLACIAN EIGENMAP print("Performing Laplacian Eigenmap (Spectral Embedding) ...") plt.subplot(335) model = SpectralEmbedding(n_components=2, n_neighbors=50) se = model.fit_transform(X) plt.scatter(se[:, 0], se[:, 1], c=Y, cmap='viridis', s=1) plt.title('Laplacian Eigenmap') #plt.colorbar()
def eval_other_methods(x, y): gmm = mixture.GaussianMixture(covariance_type='full', n_components=args.n_clusters, random_state=0) gmm.fit(x) y_pred_prob = gmm.predict_proba(x) y_pred = y_pred_prob.argmax(1) acc = np.round(cluster_acc(y, y_pred), 5) nmi = np.round(metrics.normalized_mutual_info_score(y, y_pred), 5) ari = np.round(metrics.adjusted_rand_score(y, y_pred), 5) print(args.dataset + " | GMM clustering on raw data") print("======================") result = "{}\t{}\t{}".format(ari, nmi, acc) print(result) print("======================") y_pred = KMeans(n_clusters=args.n_clusters, random_state=0).fit_predict(x) acc = np.round(cluster_acc(y, y_pred), 5) nmi = np.round(metrics.normalized_mutual_info_score(y, y_pred), 5) ari = np.round(metrics.adjusted_rand_score(y, y_pred), 5) print(args.dataset + " | K-Means clustering on raw data") print("======================") result = "{}\t{}\t{}".format(ari, nmi, acc) print(result) print("======================") sc = SpectralClustering(n_clusters=args.n_clusters, random_state=0, affinity='nearest_neighbors') y_pred = sc.fit_predict(x) acc = np.round(cluster_acc(y, y_pred), 5) nmi = np.round(metrics.normalized_mutual_info_score(y, y_pred), 5) ari = np.round(metrics.adjusted_rand_score(y, y_pred), 5) print(args.dataset + " | Spectral Clustering on raw data") print("======================") result = "{}\t{}\t{}".format(ari, nmi, acc) print(result) print("======================") if args.manifold_learner == 'UMAP': md = float(args.umap_min_dist) hle = umap.UMAP(random_state=0, metric=args.umap_metric, n_components=args.umap_dim, n_neighbors=args.umap_neighbors, min_dist=md).fit_transform(x) elif args.manifold_learner == 'LLE': from sklearn.manifold import LocallyLinearEmbedding hle = LocallyLinearEmbedding( n_components=args.umap_dim, n_neighbors=args.umap_neighbors).fit_transform(x) elif args.manifold_learner == 'tSNE': method = 'exact' hle = TSNE(n_components=args.umap_dim, n_jobs=16, random_state=0, verbose=0).fit_transform(x) elif args.manifold_learner == 'isomap': hle = Isomap( n_components=args.umap_dim, n_neighbors=5, ).fit_transform(x) gmm = mixture.GaussianMixture(covariance_type='full', n_components=args.n_clusters, random_state=0) gmm.fit(hle) y_pred_prob = gmm.predict_proba(hle) y_pred = y_pred_prob.argmax(1) acc = np.round(cluster_acc(y, y_pred), 5) nmi = np.round(metrics.normalized_mutual_info_score(y, y_pred), 5) ari = np.round(metrics.adjusted_rand_score(y, y_pred), 5) print(args.dataset + " | GMM clustering on " + str(args.manifold_learner) + " embedding") print("======================") result = "{}\t{}\t{}".format(ari, nmi, acc) print(result) print("======================") plt.scatter(*zip(*hle[:, :2]), c=y, label=y) plt.savefig(args.save_dir + '/' + args.dataset + '-' + str(args.manifold_learner) + '.png') plt.clf() y_pred = KMeans(n_clusters=args.n_clusters, random_state=0).fit_predict(hle) acc = np.round(cluster_acc(y, y_pred), 5) nmi = np.round(metrics.normalized_mutual_info_score(y, y_pred), 5) ari = np.round(metrics.adjusted_rand_score(y, y_pred), 5) print(args.dataset + " | K-Means " + str(args.manifold_learner) + " embedding") print("======================") result = "{}\t{}\t{}".format(ari, nmi, acc) print(result) print("======================") sc = SpectralClustering(n_clusters=args.n_clusters, random_state=0, affinity='nearest_neighbors') y_pred = sc.fit_predict(hle) acc = np.round(cluster_acc(y, y_pred), 5) nmi = np.round(metrics.normalized_mutual_info_score(y, y_pred), 5) ari = np.round(metrics.adjusted_rand_score(y, y_pred), 5) print(args.dataset + " | Spectral Clustering on " + str(args.manifold_learner) + " embedding") print("======================") result = "{}\t{}\t{}".format(ari, nmi, acc) print(result) print("======================")
#Set seed np.random.seed(42) #-------------------------------FEATURE EXTRACTION--------------------------------------------------------- #Transform the images in the images folder in a 2D numpy array with on image per row and one pixel per column data = aux.images_as_matrix() #Extract 6 features using Principal Component Analysis PCA_features = PCA(n_components=6).fit_transform(data) #Extract 6 features using t-Distributed Stochastic Neighbor Embedding TSNE_features = TSNE(n_components=6, method="exact").fit_transform(data) #Extract 6 features using Isometric mapping with Isomap ISOMAP_features = Isomap(n_components=6).fit_transform(data) #Save the 18 extracted features into one feature matrix matrix = np.concatenate((PCA_features, TSNE_features, ISOMAP_features), axis=1) np.savez('featureextration.npz', matrix) #-------------------------------FEATURE SELECTION--------------------------------------------------------- def scatter_plot(features): """ Another method to check the correlation between features """ plt.figure() scatter_matrix(features, alpha=0.5, figsize=(15, 10), diagonal='kde') plt.savefig("scatter_plot.png")
def cluster_manifold_in_embedding(hl, y, n_clusters, save_dir, visualize): # find manifold on autoencoded embedding if args.manifold_learner == 'UMAP': md = float(args.umap_min_dist) hle = umap.UMAP(random_state=0, metric=args.umap_metric, n_components=args.umap_dim, n_neighbors=args.umap_neighbors, min_dist=md).fit_transform(hl) elif args.manifold_learner == 'LLE': hle = LocallyLinearEmbedding( n_components=args.umap_dim, n_neighbors=args.umap_neighbors).fit_transform(hl) elif args.manifold_learner == 'tSNE': hle = TSNE(n_components=args.umap_dim, n_jobs=16, random_state=0, verbose=0).fit_transform(hl) elif args.manifold_learner == 'isomap': hle = Isomap( n_components=args.umap_dim, n_neighbors=5, ).fit_transform(hl) # clustering on new manifold of autoencoded embedding if args.cluster == 'GMM': gmm = mixture.GaussianMixture(covariance_type='full', n_components=n_clusters, random_state=0) gmm.fit(hle) y_pred_prob = gmm.predict_proba(hle) y_pred = y_pred_prob.argmax(1) elif args.cluster == 'KM': km = KMeans(init='k-means++', n_clusters=n_clusters, random_state=0, n_init=20) y_pred = km.fit_predict(hle) elif args.cluster == 'SC': sc = SpectralClustering(n_clusters=n_clusters, random_state=0, affinity='nearest_neighbors') y_pred = sc.fit_predict(hle) y_pred = np.asarray(y_pred) y_pred = y_pred.reshape(len(y_pred), ) y = np.asarray(y) y = y.reshape(len(y), ) acc = np.round(cluster_acc(y, y_pred), 5) nmi = np.round(metrics.normalized_mutual_info_score(y, y_pred), 5) ari = np.round(metrics.adjusted_rand_score(y, y_pred), 5) print(args.dataset + " | " + args.manifold_learner + " on autoencoded embedding with " + args.cluster + " - N2D") print("======================") result = "{}\t{}\t{}".format(ari, nmi, acc) print(result) print("======================") if visualize: plt.scatter(*zip(*hle[:, :2]), c=y, label=y) plt.savefig(save_dir + '/' + args.dataset + '-n2d.png') plt.clf() return y_pred, acc, nmi, ari
ax.yaxis.set_major_formatter(plt.NullFormatter()) ax.set_xlabel('feature 1', color='gray') ax.set_ylabel('feature 2', color='gray') ax.set_title(title, color='gray') # make data X, y = make_swiss_roll(200, noise=0.5, random_state=42) X = X[:, [0, 2]] # visualize data fig, ax = plt.subplots() ax.scatter(X[:, 0], X[:, 1], color='gray', s=30) # format the plot format_plot(ax, 'Input Data') model = Isomap(n_neighbors=8, n_components=1) y_fit = model.fit_transform(X).ravel() # visualize data fig, ax = plt.subplots() pts = ax.scatter(X[:, 0], X[:, 1], c=y_fit, cmap='viridis', s=30) cb = fig.colorbar(pts, ax=ax) # format the plot format_plot(ax, 'Learned Latent Parameter') cb.set_ticks([]) cb.set_label('Latent Variable', color='gray') plt.show()
Spearman_svd = [] Cluster = [] Cluster_svd = [] for _ in range(iters): Phi = random_phi(m,X.shape[0]) Y,noise = get_observations(X,Phi,snr=snr,return_noise=True) pearson_dist,spearman_dist = compare_distances(X,Y,pvalues=True) cluster_similarity = compare_clusters(X,Y) Pearson.append(pearson_dist[0]) Spearman.append(spearman_dist[0]) Pearson_p.append(pearson_dist[1]) Spearman_p.append(spearman_dist[1]) Cluster.append(cluster_similarity) X_mds = MDS().fit_transform(X.T).T Y_mds = MDS().fit_transform(Y.T).T X_iso = Isomap().fit_transform(X.T).T Y_iso = Isomap().fit_transform(Y.T).T pearson_mds,spearman_mds = compare_distances(X_mds,Y_mds,pvalues=True) pearson_iso,spearman_iso = compare_distances(X_iso,Y_iso,pvalues=True) Pearson_MDS.append(pearson_mds[0]) Pearson_Iso.append(pearson_iso[0]) Pearson_MDS_p.append(pearson_mds[1]) Pearson_Iso_p.append(pearson_mds[1]) ua,sa,vta = np.linalg.svd(X+noise,full_matrices=False) Vt = np.diag(sa).dot(vta) pearson_svd,spearman_svd = compare_distances(Vt[:m],Y,pvalues=False) cluster_similarity_svd = compare_clusters(Vt[:m],Y) Pearson_svd.append(pearson_svd) Spearman_svd.append(spearman_svd) Cluster_svd.append(cluster_similarity_svd) print prefix,m,np.average(Pearson),np.average(Pearson_p),np.average(Spearman),np.average(Spearman_p),np.average(Pearson_MDS),np.average(Pearson_MDS_p),np.average(Pearson_Iso),np.average(Pearson_Iso_p),np.average(Cluster),np.average(Pearson_svd),np.average(Spearman_svd),np.average(Cluster_svd)
import matplotlib.pyplot as plt import numpy as np from sklearn.datasets import fetch_olivetti_faces from sklearn.manifold import Isomap # Set random seed for reproducibility np.random.seed(1000) if __name__ == '__main__': # Create the dataset faces = fetch_olivetti_faces() # Train Isomap isomap = Isomap(n_neighbors=5, n_components=2) X_isomap = isomap.fit_transform(faces['data']) # Plot the result fig, ax = plt.subplots(figsize=(18, 10)) for i in range(100): ax.scatter(X_isomap[i, 0], X_isomap[i, 1], marker='o', s=100) ax.annotate('%d' % faces['target'][i], xy=(X_isomap[i, 0] + 0.5, X_isomap[i, 1] + 0.5)) ax.set_xlabel(r'$x_0$') ax.set_ylabel(r'$x_1$') ax.grid() plt.show()
def isomap(x): embedding = Isomap(n_components=2) x_transformed = embedding.fit_transform(x) return embedding, x_transformed