def get_embedding(self, points): n = self.wanted_dimensions try: embedding, errors = locally_linear_embedding(points, n_neighbors=self.knn, n_components=n, eigen_solver='dense', n_jobs=-1) except Exception: embedding, errors = locally_linear_embedding(points, n_neighbors=self.knn, n_components=n, eigen_solver='dense', n_jobs=-1) return (embedding, errors)
def test_singular_matrix(): M = np.ones((10, 3)) f = ignore_warnings with pytest.raises(ValueError): f(manifold.locally_linear_embedding(M, 2, 1, method='standard', eigen_solver='arpack'))
def improve_lle_alg(train_df, test_df=None, n_k=3, n_c=2, e_s='dense'): """ 输入: train_df 训练集; test_df 测试集; 均为 feature 数据, 无 label. n_k lle 选取近邻数, 对应 n_neighbors; n_c 选取嵌入数, 对应 n_components e_s 特征值求解算法, eigen_solver 默认为 dense, 必能找到求解方式, 详细请 help 输出: W 矩阵, 转化矩阵 求解 W, 线性表示的误差项. """ transform_array, error_result = manifold.locally_linear_embedding(X=train_df, n_neighbors=n_k, n_components=n_c, eigen_solver=e_s) # 数据类型转化 train_data_transform_df = pd.DataFrame(transform_array) # array to df reduce_dim_matrix = np.matrix(transform_array) train_data_matrix = np.matrix(train_df.astype('float')) # 是否存在逆?? train_data_matrix_inv = train_data_matrix.getI() # 求逆(不是方阵) # 计算出 W 矩阵 w_transform_matrix = train_data_matrix_inv * reduce_dim_matrix # 判断是否输入测试集 if test_df is None: # 输出: 局部线性嵌入转化后的训练集,W 矩阵, 误差项 return train_data_transform_df, w_transform_matrix, error_result test_data_transform_matrix = np.matrix(test_df) * w_transform_matrix test_data_transform_df = pd.DataFrame(test_data_transform_matrix) # 输出4个指标, 转化后的train, test, W 矩阵, 以及 误差项 return train_data_transform_df, test_data_transform_df, w_transform_matrix, error_result
def test_locally_linear_embedding(self): np.random.seed(1234) pts = np.random.random((5, 3)) expected = locally_linear_embedding(pts, 3, 1)[0] G = neighbor_graph(pts, k=3).barycenter_edge_weights(pts, copy=False) actual = G.locally_linear_embedding(num_dims=1) assert_signless_array_almost_equal(expected, actual)
def perform_lle(X, d): X_r, err = locally_linear_embedding(X.toarray(), n_neighbors=12, n_components=d, random_state=32) print("Done. Reconstruction error: %g" % err) return X_r
def _train(self): x = self._train_features y = self._train_outputs pipe = pipeline.Pipeline([ ('drop', transformers.ColumnDropper( columns=(0, 3, 5, 14, 26, 35, 40, 65, 72, 95, 99, 104, 124) )), ('scale', preprocessing.StandardScaler( with_mean=True, with_std=True )), ('select', feature_selection.SelectPercentile( percentile=59,#59, score_func=feature_selection.mutual_info_classif )), ('select', feature_selection.SelectKBest( k=101, score_func=feature_selection.f_classif )), ('estim', manifold.locally_linear_embedding( x, n_neighbors=6, n_components=101, eigen_solver='auto', method='standard' )), ]) pipe.fit_transform(x) self._model = pipe.predict
def main_LLE2(): f3=open('projection vectors 1','w') f4=open('projection vectors 2','w') f5=open('projection vectors 3','w') f6=open('scatter matrix in 3D','w') k=40 images=[] images,colors=load_images() matrix_images=matrix_build(images) newmatrix,squared_error=locally_linear_embedding(matrix_images,k,3,eigen_solver='auto') print 'squared_error='+repr(squared_error) print newmatrix.shape M1=np.dot(newmatrix.T,matrix_images) M2=np.linalg.inv(np.dot(matrix_images.T,matrix_images)) P_matrix=(np.dot(M1,M2)).T print P_matrix.shape for x in range(len(P_matrix)): f3.write(repr(P_matrix[x][0])+'\n') f4.write(repr(P_matrix[x][1])+'\n') f5.write(repr(P_matrix[x][2])+'\n') xx=[] yy=[] zz=[] for x in range(len(newmatrix)): xx.append(newmatrix[x][0]) yy.append(newmatrix[x][1]) zz.append(newmatrix[x][2]) f6.write(repr(newmatrix[x][0])+','+repr(newmatrix[x][1])+','+repr(newmatrix[x][2])+'\n') fig = plt.figure() ax = Axes3D(fig) ax.scatter(xx,yy,zz,c=colors) plt.show()
def lle(input,finaldim): from sklearn.manifold import locally_linear_embedding import numpy if isinstance(input,numpy.ndarray)==False: input= input.todense() X_r, err = locally_linear_embedding(input, n_neighbors=12, n_components=finaldim) # X_transformed = embedding.fit_transform(input.transpose()) # return X_transformed,np.asarray(np.asmatrix(input)*np.asmatrix(X_transformed)) # X_transformed = X_r.fit_transform(input) return [],X_r
def network_to_es_df(network, labels): embedding = manifold.locally_linear_embedding(X=network, n_neighbors=5, n_components=2) embedded_x = embedding[0][:,0] embedded_y = embedding[0][:,1] embedded_df = pd.DataFrame() embedded_df['x'] = embedded_x embedded_df['y'] = embedded_y class_df = pd.DataFrame() class_df['class'] = labels return embedded_df, class_df
def dim_down(self, method='tsne', ndim=2, rand_seed=6): """ :param method: selected method of dimension reduction. :param ndim: number of retained dimensions. :param rand_seed: seed used by the random number generator. :return: embedding space with N cells * d feature. """ X = self.data # http://scikit-learn.org/stable/modules/manifold.html if method == 'tsne' or method == 'TSNE': print( "Dimension reduction with t-stochastic neighbor embedding(tSNE).\n" ) V = manifold.TSNE(n_components=ndim, random_state=rand_seed, init='pca').fit_transform(X) if method == 'lle' or method == 'LLE': print("Dimension reduction with locally_linear_embedding(LLE).\n") V, err = manifold.locally_linear_embedding(X, n_neighbors=20, n_components=ndim, random_state=rand_seed, method='modified') if method == 'mds' or method == 'MDS': print("Dimension reduction with Multidimensional scaling(MDS).\n") V = manifold.MDS(n_components=ndim, random_state=rand_seed, max_iter=100, n_init=1).fit_transform(X) if method == 'se' or method == 'SE': print("Dimension reduction with Spectral Embedding(SE).\n") V = manifold.SpectralEmbedding( n_components=ndim, random_state=rand_seed).fit_transform(X) # http://scikit-learn.org/stable/modules/decomposition.html if method == 'ica' or method == 'ICA': print( "Matrix decomposition with Independent component analysis(FastICA).\n" ) V = decomposition.FastICA(n_components=ndim, random_state=rand_seed).fit_transform(X) if method == 'pca' or method == 'PCA': print( "Matrix decomposition with Principal component analysis(PCA).\n" ) V = decomposition.PCA(n_components=ndim, random_state=rand_seed).fit_transform(X) return V
def test_singular_matrix(): M = np.ones((10, 3)) f = ignore_warnings with pytest.raises(ValueError): f( manifold.locally_linear_embedding( M, n_neighbors=2, n_components=1, method="standard", eigen_solver="arpack", ))
def preform_lle_on_dynamic_connectivity(input_path, output_path, brain_areas, pattern): """ Computes the dynamic connectivity of brain areas with performing a locally linear embedding returning its matrix. :param input_path: path to input dir :type input_path: str :param output_path: path to output directory :type output_path: str :param brain_areas: number of brain areas :type brain_areas: int :param pattern: pattern of input files :type pattern: str :return: LLE matrix, LLE matrix shape :rtype: np.ndarray, tuple """ paths = return_paths_list(input_path, output_path, pattern=pattern) n_subjects = len(paths) array = np.genfromtxt(paths[0], delimiter=',') t_phases = array.shape[0] dFC = np.full((brain_areas, brain_areas), fill_value=0).astype(np.float64) lle_components = np.full((n_subjects, t_phases, (brain_areas * 2)), fill_value=0).astype(np.float64) for n in tqdm(range(0, n_subjects)): phases = convert_to_phases(paths[n], output_path, brain_areas, t_phases, n) for t in range(0, t_phases): for i in range(0, brain_areas): for z in range(0, brain_areas): if np.absolute(phases[i, t] - phases[z, t]) > np.pi: dFC[i, z] = np.cos(2 * np.pi - np.absolute(phases[i, t] - phases[z, t])) else: dFC[i, z] = np.cos( np.absolute(phases[i, t] - phases[z, t])) dfc_output = os.path.join(output_path, 'dFC') create_dir(dfc_output) np.savez( os.path.join(dfc_output, 'subject_{}_time_{}'.format(n, t)), dFC) lle, err = manifold.locally_linear_embedding(dFC, n_neighbors=12, n_components=2) with open( os.path.join(output_path, 'LLE_error_{}_{}'.format(n, t)), 'w') as output: json.dump(err, output) lle_components[n, t, :] = np.squeeze(lle.flatten()) # save the LLE matrix into a .npz file np.savez(os.path.join(output_path, 'components_matrix'), lle_components) return lle_components, lle_components.shape
def local_linear_embedding(data, n_components=2, n_neighbors=5): from sklearn.manifold import LocallyLinearEmbedding, locally_linear_embedding # lle = LocallyLinearEmbedding(n_components=n_components, # n_neighbors=n_neighbors) [lle, er] = locally_linear_embedding(data, n_neighbors=n_neighbors, n_components=n_components, method='modified') return lle
def test_locally_linear_embedding(self): iris = datasets.load_iris() df = pdml.ModelFrame(iris) result = df.manifold.locally_linear_embedding(3, 3) expected = manifold.locally_linear_embedding(iris.data, 3, 3) self.assertEqual(len(result), 2) self.assertIsInstance(result[0], pdml.ModelFrame) tm.assert_index_equal(result[0].index, df.index) tm.assert_numpy_array_equal(result[0].values, expected[0]) self.assertEqual(result[1], expected[1])
def test_locally_linear_embedding(self): iris = datasets.load_iris() df = pdml.ModelFrame(iris) result = df.manifold.locally_linear_embedding(3, 3) expected = manifold.locally_linear_embedding(iris.data, 3, 3) self.assertEqual(len(result), 2) self.assertTrue(isinstance(result[0], pdml.ModelFrame)) self.assert_index_equal(result[0].index, df.index) self.assert_numpy_array_equal(result[0].values, expected[0]) self.assertEqual(result[1], expected[1])
def plot_lle(pidata, nneigh=14): lle_data = np.array(pidata) n_components = 2 # 2 dimension X_r, err = locally_linear_embedding(lle_data, n_neighbors=nneigh, n_components=n_components, eigen_solver='dense', method='standard') print("Done. Reconstruction error: %g" % err) plt.figure() plt.scatter(X_r[:, 0], X_r[:, 1], s=5, c=np.log10(np.array(pidata[['PI4']]))) plt.title("Local linear embedding")
def sampleBoundary(im, boundary, point, samples_x, samples_y): # Identify connected boundary segment with size 'samples_y' via K-NN # This can yield some errors if two different boundaries are very near. Fixing this later. nbrs = NearestNeighbors(n_neighbors=samples_y, algorithm='ball_tree').fit(boundary) dist, ind = nbrs.kneighbors([point]) # Apply LLE in order to unravel the boundary manifold. # Kind of an overkill but no other idea on how to do this. X_r, err = manifold.locally_linear_embedding(boundary[ind[0]], n_neighbors=2, n_components=1) X_r = np.concatenate((X_r, ind.transpose()), axis=1) X_r_sorted = X_r[X_r[:, 0].argsort()] # Some plotting plt.figure() plt.imshow(im) samples = np.zeros((samples_y, samples_x * 2 + 1, 3), 'uint8') # Iterate over boundary segments of size 10 for i in range(0, samples_y - 9, 10): boundarySegment = boundary[X_r_sorted[:, 1].astype(int)[i:i + 10]] plt.scatter(boundarySegment[:, 0], boundarySegment[:, 1], marker='.', s=5) # Fit line to boundary point coordinates slope, intercept, _, _, _ = stats.linregress(boundarySegment[:, 0], boundarySegment[:, 1]) line_x1 = min(boundarySegment[:, 0]) line_x2 = max(boundarySegment[:, 0]) line_y1 = slope * line_x1 + intercept line_y2 = slope * line_x2 + intercept plt.plot([line_x1, line_x2], [line_y1, line_y2], marker='.') # Sample patch patch = bilinearInterpolation(slope, intercept, im, line_x1, line_x2, samples_x, 10) samples[i:i + 10, :] = patch return samples
def compute(self): matrix = self.getInputFromPort("X") Y, squared_error = manifold.locally_linear_embedding( X=matrix.values, n_neighbors=self.forceGetInputFromPort("n_neighbors", 10), n_components=self.forceGetInputFromPort("n_components", 2), reg=self.forceGetInputFromPort("reg", 0.001), eigen_solver=self.forceGetInputFromPort("eigen_solver", "auto"), tol=self.forceGetInputFromPort("tol", 1e-06), max_iter=self.forceGetInputFromPort("max_iter", 100), method=self.method, hessian_tol=self.hessian_tol, modified_tol=self.modified_tol, random_state=self.forceGetInputFromPort("random_state", None) # out_dim = self.forceGetInputFromPort('out_dim', None) ) proj_matrix = copy.deepcopy(matrix) proj_matrix.values = Y self.setResult("proj_matrix", proj_matrix) self.setResult("squared_error", squared_error)
def compute(self): matrix = self.getInputFromPort('X') Y, squared_error = manifold.locally_linear_embedding( X=matrix.values, n_neighbors=self.forceGetInputFromPort('n_neighbors', 10), n_components=self.forceGetInputFromPort('n_components', 2), reg=self.forceGetInputFromPort('reg', 0.001), eigen_solver=self.forceGetInputFromPort('eigen_solver', 'auto'), tol=self.forceGetInputFromPort('tol', 1e-06), max_iter=self.forceGetInputFromPort('max_iter', 100), method=self.method, hessian_tol=self.hessian_tol, modified_tol=self.modified_tol, random_state=self.forceGetInputFromPort('random_state', None) # out_dim = self.forceGetInputFromPort('out_dim', None) ) proj_matrix = copy.deepcopy(matrix) proj_matrix.values = Y self.setResult('proj_matrix', proj_matrix) self.setResult('squared_error', squared_error)
def SwissRollTest01(): import matplotlib.pyplot as plt from mpl_toolkits.mplot3d import Axes3D # Locally Linear Embedding of the swiss roll from sklearn import manifold, datasets #这里是生成数据集 X是1500 * 3 的矩阵(表示location),color是1500 * 1的矩阵(表示label--颜色) X, color = datasets.samples_generator.make_swiss_roll(n_samples = 1500) #print X[1,:] #print color[1] X_r, err = manifold.locally_linear_embedding(X, n_neighbors=12, n_components=2) #X_r是1500 * 2的矩阵,err是一个实数。 print X_r.shape #print err fig = plt.figure() try: ax = fig.add_subplot(211, projection='3d') ax.scatter(X[:,0], X[:,1], X[:,2], c=color, cmap=plt.cm.Spectral) except: ax = fig.add_subplot(211) ax.scatter(X[:,0],X[:,2],c=color, cmap=plt.cm.Spectral) print "" ax.set_title("Original data") ax = fig.add_subplot(212) #这里绘制的是projected data ax.scatter(X_r[:,0], X_r[:,1], c=color, cmap=plt.cm.Spectral) plt.axis('tight') plt.xticks([]), plt.yticks([]) plt.title('Projected Data') plt.show()
def main_LLE2(): f3 = open('projection vectors 1', 'w') f4 = open('projection vectors 2', 'w') f5 = open('projection vectors 3', 'w') f6 = open('scatter matrix in 3D', 'w') k = 40 images = [] images, colors = load_images() matrix_images = matrix_build(images) newmatrix, squared_error = locally_linear_embedding(matrix_images, k, 3, eigen_solver='auto') print 'squared_error=' + repr(squared_error) print newmatrix.shape M1 = np.dot(newmatrix.T, matrix_images) M2 = np.linalg.inv(np.dot(matrix_images.T, matrix_images)) P_matrix = (np.dot(M1, M2)).T print P_matrix.shape for x in range(len(P_matrix)): f3.write(repr(P_matrix[x][0]) + '\n') f4.write(repr(P_matrix[x][1]) + '\n') f5.write(repr(P_matrix[x][2]) + '\n') xx = [] yy = [] zz = [] for x in range(len(newmatrix)): xx.append(newmatrix[x][0]) yy.append(newmatrix[x][1]) zz.append(newmatrix[x][2]) f6.write( repr(newmatrix[x][0]) + ',' + repr(newmatrix[x][1]) + ',' + repr(newmatrix[x][2]) + '\n') fig = plt.figure() ax = Axes3D(fig) ax.scatter(xx, yy, zz, c=colors) plt.show()
c=sr_color, s=50, alpha=0.8) ax.set_title("Swiss Roll in Ambient Space") ax.view_init(azim=-66, elev=12) _ = ax.text2D(0.8, 0.05, s="n_samples=1500", transform=ax.transAxes) # %% # Computing the LLE and t-SNE embeddings, we find that LLE seems to unroll the # Swiss Roll pretty effectively. t-SNE on the other hand, is able # to preserve the general structure of the data, but, poorly represents the # continous nature of our original data. Instead, it seems to unnecessarily # clump sections of points together. sr_lle, sr_err = manifold.locally_linear_embedding(sr_points, n_neighbors=12, n_components=2) sr_tsne = manifold.TSNE(n_components=2, learning_rate="auto", perplexity=40, init="pca", random_state=0).fit_transform(sr_points) fig, axs = plt.subplots(figsize=(8, 8), nrows=2) axs[0].scatter(sr_lle[:, 0], sr_lle[:, 1], c=sr_color) axs[0].set_title("LLE Embedding of Swiss Roll") axs[1].scatter(sr_tsne[:, 0], sr_tsne[:, 1], c=sr_color) _ = axs[1].set_title("t-SNE Embedding of Swiss Roll") # %%
# Load the STL files and add the vectors your_mesh = mesh.Mesh.from_file('Left_Thalamus.stl') # Convert from groups of vertices (for each triangle) to list of vertices tri, points, dim = np.shape(your_mesh.vectors) data_mesh = np.zeros((tri * points, dim)) for index_t in range(tri): for index_p in range(points): data_mesh[index_t + index_p + index_t * 2, :] = your_mesh.vectors[index_t, index_p, :] # Isomap #X_r = manifold.Isomap(n_neighbors=10,n_components=2).fit_transform(data_mesh) # LLE X_r, err = manifold.locally_linear_embedding(data_mesh, n_neighbors=37, n_components=2) # T-sne #tsne = manifold.TSNE(n_components=2, init='pca', random_state=0 ) #X_r = tsne.fit_transform(data_mesh) # Plot original vertices figure = pyplot.figure() ax = figure.add_subplot(211, projection='3d') ax.scatter(data_mesh[:, 0], data_mesh[:, 1], data_mesh[:, 2], c=data_mesh[:, 0], cmap=pyplot.cm.Spectral) #pyplot.show() ax.set_title("Original data")
train_df = pd.read_csv(data_root+"train_feature_pca500.csv") test_df = pd.read_csv(data_root+"test_feature_pca500.csv") photo_ids = np.vstack((train_df['photo_id'].reshape(-1,1),test_df['photo_id'].reshape(-1,1))) train_df.drop('photo_id', axis=1, inplace=True) test_df.drop('photo_id', axis=1, inplace=True) X_std = np.vstack((train_df,test_df)) n_train = len(train_df) from sklearn import manifold X_lle, err = manifold.locally_linear_embedding(X_std, n_neighbors=10, n_components=300) X_all = pd.DataFrame(X_lle) X_all['photo_id'] = photo_ids X_train = X_all[:n_train] X_test = X_all[n_train:] X_train.to_csv('train_feature_lle.csv', index=False) X_test.to_csv('test_feature_lle.csv', index=False)
def lle(data): X_r, err = manifold.locally_linear_embedding(data, n_neighbors=5, n_components=2) return X_r
# License: BSD, (C) INRIA 2011 print __doc__ import pylab as pl # This import is needed to modify the way figure behaves from mpl_toolkits.mplot3d import Axes3D #---------------------------------------------------------------------- # Locally linear embedding of the swiss roll from sklearn import manifold, datasets X, color = datasets.samples_generator.make_swiss_roll(n_samples=1500) print "Computing LLE embedding" X_r, err = manifold.locally_linear_embedding(X, n_neighbors=12, out_dim=2) print "Done. Reconstruction error: %g" % err #---------------------------------------------------------------------- # Plot result fig = pl.figure() try: # compatibility matplotlib < 1.0 ax = fig.add_subplot(211, projection='3d') ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=color, cmap=pl.cm.Spectral) except: ax = fig.add_subplot(211) ax.scatter(X[:, 0], X[:, 2], c=color, cmap=pl.cm.Spectral) ax.set_title("Original data")
print __doc__ import pylab as pl # This import is needed to modify the way figure behaves from mpl_toolkits.mplot3d import Axes3D #---------------------------------------------------------------------- # Locally linear embedding of the swiss roll from sklearn import manifold, datasets X, color = datasets.samples_generator.make_swiss_roll(n_samples=1500) print "Computing LLE embedding" X_r, err = manifold.locally_linear_embedding(X, n_neighbors=12, out_dim=2) print "Done. Reconstruction error: %g" % err #---------------------------------------------------------------------- # Plot result fig = pl.figure() try: # compatibility matplotlib < 1.0 ax = fig.add_subplot(211, projection='3d') ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=color, cmap=pl.cm.Spectral) except: ax = fig.add_subplot(211) ax.scatter(X[:, 0], X[:, 2], c=color, cmap=pl.cm.Spectral) ax.set_title("Original data")
n_components = 4 pca = PCA(n_components, whiten=False) param = pca.fit(dataset_matrix) dataset_pca = pca.fit_transform(dataset_matrix) print(pca.explained_variance_ratio_) print(np.sum(pca.explained_variance_ratio_)) outdata_inter = dataset_pca.T outdata = np.reshape(outdata_inter, (n_components, rows, cols)) #LLE DR method n_neighbors = 5 n_components = 4 dataset_lle, err = manifold.locally_linear_embedding(dataset_matrix, n_neighbors, n_components, eigen_solver='auto', method='standard') print("Reconstruction error: %g" % err) outdata_inter = dataset_lle outdata = np.reshape(outdata_inter, (n_components, rows, cols)) #ISOMAP Method from sklearn.manifold import Isomap n_neighbors = 5 n_components = 4 dataset_isomap = manifold.Isomap(n_neighbors, n_components).fit_transform(dataset_matrix) outdata_inter = dataset_isomap outdata = np.reshape(outdata_inter, (n_components, rows, cols))
# angles = np.linspace(0,2*np.pi,100) angles = np.linspace(0, 2 * np.pi, 50) # angles = [5.3] newData = True if newData: print("Integrating data") # data = sim.states(duration=500) data = sim.states(duration=2400, split=0.01) # max modified 850 data = data[1000:] data = sim.interpolateCurve()[1000:] print("Computing LLE embedding of data") manifoldData, err = manifold.locally_linear_embedding( data, n_neighbors=12, n_components=2, method='standard' ) # weird results, can see that it is rossler but it doesn't look as excpected # Modified provides a smoother manifold, the return map still sucks # Hessians manifold is weird.... and the return maps suck print("Done. Reconstruction error: %g" % err) print("Storing data") sim.storeData("TestRossler") np.savetxt("ManifoldRossler.txt", manifoldData) else: print("Loading data") sim.loadData("TestRossler") data = sim.getData() manifoldData = np.loadtxt("ManifoldRossler.txt") i = 0
print(__doc__) import pylab as pl # This import is needed to modify the way figure behaves from mpl_toolkits.mplot3d import Axes3D Axes3D #---------------------------------------------------------------------- # Locally linear embedding of the swiss roll from sklearn import manifold, datasets X, color = datasets.samples_generator.make_swiss_roll(n_samples=1500) print("Computing LLE embedding") X_r, err = manifold.locally_linear_embedding(X, n_neighbors=12, n_components=2) print("Done. Reconstruction error: %g" % err) #---------------------------------------------------------------------- # Plot result fig = pl.figure() try: # compatibility matplotlib < 1.0 ax = fig.add_subplot(211, projection='3d') ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=color, cmap=pl.cm.Spectral) except: ax = fig.add_subplot(211) ax.scatter(X[:, 0], X[:, 2], c=color, cmap=pl.cm.Spectral) ax.set_title("Original data")
poincareSection2 = np.array( [point for point in poincareSection if (point[2] < 0.1)]) extremeEvents = [] index = [] for i, point in enumerate(poincareSection): if point[2] > 0.1: extremeEvents.append(i) else: index.append(i) extremeEvents = np.array(extremeEvents) print('Number of extreme events = %g' % len(extremeEvents)) print("Computing LLE embedding of return map") manifoldOfPoincareSection, err = manifold.locally_linear_embedding( poincareSection2, n_neighbors=8, n_components=2, method='modified') # return map then manifold print("Done. Reconstruction error: %g" % err) ## Fix colors for Plot col = col = cm.get_cmap('plasma') normdata = [np.linalg.norm(point) for point in poincareSection2] normalize = colors.Normalize(vmin=min(normdata), vmax=max(normdata)) dataColors = [col(normalize(value)) for value in normdata] poincColor = [dataColors[i] for i in range(0, len(poincareSection2))] print('Plotting data') fig = plt.figure() ax = fig.add_subplot(221) ax.set_title('Oscillator 1')
from sklearn import manifold,datasets import pandas as pd from sklearn import svm from sklearn import cross_validation from sklearn.metrics import confusion_matrix from sklearn.cross_validation import cross_val_score from unbalanced_dataset import SMOTE import numpy as np train=pd.read_csv('./cmv.csv') train['Defective']=train['Defective'].map({'Y':1,'N':0}) print type(train.values) train=train.values print train[0:1] X_r,err=manifold.locally_linear_embedding(train[:,0:-1],n_neighbors=12,n_components=4) print("Done. Reconstruction error: %g" % err) data=X_r label=train[:,-1] #print label x_train,x_test,y_train,y_test=cross_validation.train_test_split(data,label,test_size=0.3,random_state=0) verbose = False ratio = float(np.count_nonzero(y_train==0)) / float(np.count_nonzero(y_train==1)) smote = SMOTE(ratio=ratio, verbose=verbose, kind='regular') smox, smoy = smote.fit_transform(x_train, y_train) print np.count_nonzero(smoy==1) print np.count_nonzero(smoy==0) clf=svm.SVC(C=10000,gamma=0.0078125) #print y_train.astype(int) clf.fit(smox,smoy) y_pred=clf.predict(x_test) print y_test
if not DBexists: logging.info("Starting to build index into DB file %s" % outputf) index.fit() logging.info("Index fitted!!") logging.info("Output database: {}".format(outputf)) else: logging.info("Index loaded from DB file %s" % outputf) sparse_word_centroids = wordCentroids(db=index, vect=vectorizer) # Tal vez pueda cargar la matrix dipersa de word_centroids en ram y hacer NMF. logging.info("Fitting Isomap for sparse coding ...") X_s = Dict(sorted({w: v for w, v in sparse_word_centroids if not v is None}.items(), key=lambda t: len(t[0]))) word_embeddings, err = locally_linear_embedding(vstack(list(X_s.values())), method='ltsa', n_neighbors=5, n_components=args.dim, n_jobs=-1) #word_embeddings = factorizer.fit_transform(csr_matrix(vstack(list(X_s.values())))) logging.info("Recosntruction error %f ..." % err) logging.info("DB Vocabulary size %d ..." % index.vocab_size) logging.info("Vectorizer vocabulary size %d ..." % len(vectorizer.vocabulary_.keys())) logging.info("Shape of resulting embedding matrix:") #logging.info("({} {})".format(factorizer.embedding_.shape[0], factorizer.embedding_.shape[1])) logging.info("Writing word vectors into file %s ..." % args.output) write = partial(indexing.write_given_embedding, fname=args.output) with open(args.output, "w") as f: f.write("{} {}\n".format(len(X_s.keys()), word_embeddings.shape[1]) ) Parallel(#backend='threading', n_jobs=20
N2=(Y==2) N3=(Y==3) pca=PCA(copy=True, n_components=2, whiten=False) pcaX=pca.fit_transform(X) lda = LinearDiscriminantAnalysis(n_components=2) ldaX = lda.fit(X, Y).transform(X) kpca = KernelPCA(kernel="rbf",n_components=2) kpcaX = kpca.fit_transform(X) isomap=Isomap(n_neighbors=15,n_components=2) isomapX=isomap.fit_transform(X) iieX,err=locally_linear_embedding(X,n_neighbors=12,n_components=2) print("Done. Reconstruction error: %g" % err) distX=np.zeros([len(Y),len(Y)]) nbrs = NearestNeighbors(n_neighbors=15, algorithm='ball_tree').fit(X) distances, indices = nbrs.kneighbors(X) for i in range(0,len(Y)): distX[indices[i,0],indices[i,1:15]]=distances[i,1:15] print(distX) leX=spectral_embedding(distX,n_components=2) plt.figure() ax=plt.subplot(projection='3d') ax.scatter(X[N1, 0], X[N1, 1], X[N1, 2], c='b') ax.scatter(X[N2, 0], X[N2, 1], X[N2, 2], c='g')
components_ = pca.components_ print(components_) data1 = data_pca[0:3000] data2 = data_pca[3000:6000] data3 = data_pca[6000:9000] fig = plt.figure() plt.scatter(data1[:, 0], np.zeros((3000, )), s=1, c="b", marker="1") plt.scatter(data2[:, 0], np.zeros((3000, )), s=1, c="y", marker="1") plt.scatter(data3[:, 0], np.zeros((3000, )), s=1, c="r", marker="1") plt.show() from sklearn.manifold import locally_linear_embedding for neis in [5, 10, 20, 30, 40, 50]: (data_lle, _) = locally_linear_embedding(data, n_neighbors=neis, n_components=1) data1 = data_lle[0:3000] data2 = data_lle[3000:6000] data3 = data_lle[6000:9000] fig = plt.figure() plt.scatter(data1[:, 0], np.zeros((3000, )), s=1, c="b", marker="1") plt.scatter(data2[:, 0], np.zeros((3000, )), s=1, c="y", marker="1") plt.scatter(data3[:, 0], np.zeros((3000, )), s=1, c="r", marker="1") plt.show()
#print(np.all(np.isfinite(K_train))) # Initialise an SVM and fit data using random walk Kernel. clf = svm.SVC(kernel='precomputed', C=1) clf.fit(K_train, y_train) # Predict and test. y_pred = clf.predict(K_test) # Calculate accuracy of classification. acc = accuracy_score(y_test, y_pred) print("Accuracy:", str(round(acc * 100, 2))) K_train = manifold.locally_linear_embedding(K_train, n_neighbors=5, n_components=3) K_test = manifold.locally_linear_embedding(K_test, n_neighbors=5, n_components=3) clf.fit( K_train, y_train, ) # Predict and test. y_pred = clf.predict(K_test) # Calculate accuracy of classification. acc = accuracy_score(y_test, y_pred)
import pylab as pl # This import is needed to modify the way figure behaves from mpl_toolkits.mplot3d import Axes3D Axes3D # ---------------------------------------------------------------------- # Locally linear embedding of the swiss roll from sklearn import manifold, datasets X, color = datasets.samples_generator.make_swiss_roll(n_samples=1500) print "Computing LLE embedding" X_r, err = manifold.locally_linear_embedding(X, n_neighbors=12, n_components=2) print "Done. Reconstruction error: %g" % err # ---------------------------------------------------------------------- # Plot result fig = pl.figure() try: # compatibility matplotlib < 1.0 ax = fig.add_subplot(211, projection="3d") ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=color, cmap=pl.cm.Spectral) except: ax = fig.add_subplot(211) ax.scatter(X[:, 0], X[:, 2], c=color, cmap=pl.cm.Spectral) ax.set_title("Original data")
def def_lenses_dimred(df, fs, get_PCA, get_isomap, get_LLE, get_MDS, get_spectral_embedding, get_SVD): scaler = MinMaxScaler() mapper = km.KeplerMapper() X = df[fs].as_matrix() keys = [] values = [] minmax_scaler = MinMaxScaler() df_minmax = minmax_scaler.fit_transform(df[fs].as_matrix()) # PCA if get_PCA == True: keys.append('lens_pca_0') keys.append('lens_pca_1') pca = mapper.fit_transform(df_minmax, projection=PCA(n_components=2), scaler=None) values.append(scaler.fit_transform(pca[:, 0].reshape(-1, 1))) values.append(scaler.fit_transform(pca[:, 1].reshape(-1, 1))) # Isomap if get_isomap == True: keys.append('lens_isomap_0') keys.append('lens_isomap_1') isomap = manifold.Isomap(n_components=2, n_neighbors=3).fit_transform(df_minmax) values.append(scaler.fit_transform(isomap[:, 0].reshape(-1, 1))) values.append(scaler.fit_transform(isomap[:, 1].reshape(-1, 1))) # Locally linear embedding if get_LLE == True: keys.append('lens_LLE_0') keys.append('lens_LLE_1') LLE = manifold.locally_linear_embedding(df_minmax, n_neighbors=3, n_components=2, random_state=0)[0] values.append(scaler.fit_transform(LLE[:, 0].reshape(-1, 1))) values.append(scaler.fit_transform(LLE[:, 1].reshape(-1, 1))) # Multi-dimensional scaling if get_MDS == True: keys.append('lens_MDS_0') keys.append('lens_MDS_1') MDS = manifold.MDS(n_components=2).fit_transform(df_minmax) values.append(scaler.fit_transform(MDS[:, 0].reshape(-1, 1))) values.append(scaler.fit_transform(MDS[:, 1].reshape(-1, 1))) # Spectral embedding if get_spectral_embedding == True: keys.append('lens_spectral_embedding_0') keys.append('lens_spectral_embedding_1') L = manifold.SpectralEmbedding(n_components=2, n_neighbors=1, random_state=0).fit_transform(df_minmax) values.append(scaler.fit_transform(L[:, 0].reshape(-1, 1))) values.append(scaler.fit_transform(L[:, 1].reshape(-1, 1))) # truncated SVD if get_SVD == True: keys.append('lens_SVD_0') keys.append('lens_SVD_1') svd = TruncatedSVD(n_components=2, random_state=42).fit_transform(df_minmax) values.append(scaler.fit_transform(svd[:, 0].reshape(-1, 1))) values.append(scaler.fit_transform(svd[:, 1].reshape(-1, 1))) lenses_dimred = dict(zip(keys, values)) return (lenses_dimred)