def construct_A(X, k, binary=False): nbrs = NearestNeighbors(n_neighbors=1 + k).fit(X) if binary: return nbrs.kneighbors_graph(X) else: return nbrs.kneighbors_graph(X, mode='distance')
def _compute_neighbors(self): V,dim = self.data_frame.shape neighbors = NearestNeighbors(n_neighbors=self.num_neighbors,algorithm='auto').fit(self.data_frame) _,indices = neighbors.kneighbors(self.data_frame) self._adjacency_graph = neighbors.kneighbors_graph(self.data_frame,mode='connectivity') self._knn_graph = neighbors.kneighbors_graph(self.data_frame,mode='distance') self._neighbors = indices
def kNN_graph(self, k, metric, mutual=False): # self.latex = [] nn = NearestNeighbors(k, algorithm="brute", metric=metric, n_jobs=-1).fit(self.X) UAM = nn.kneighbors_graph(self.X).toarray() #unweighted adjacency matrix m = UAM.shape[0] self.W = np.zeros((m, m)) #(weighted) adjecancy matrix self.D = np.zeros((m, m)) #degree matrix if mutual == False: if self.full_calculated: indices = np.where(UAM == 1) self.W[indices] = self.full_W[indices] self.D[np.diag_indices(m)] = np.sum(self.W, 1) else: for i in range(m): for j in range(m): if UAM[i,j] == 1: sim = self.s(self.X[i], self.X[j], self.d) self.W[i,j] = sim self.D[i,i] += sim else: if self.full_calculated: indices = np.where(np.logical_and(UAM == 1, UAM.T == 1).astype(int) == 1) self.W[indices] = self.full_W[indices] self.D[np.diag_indices(m)] = np.sum(self.W != 0, 1) else: for i in range(m): for j in range(m): if UAM[i,j] == 1 and UAM[j,i] == 1: sim = self.s(self.X[i], self.X[j], self.d) self.W[i,j] = sim self.D[i,i] += sim self.W = np.nan_to_num(self.W) self.graph = "kNN graph, k = " + str(k) + ", mutual:" + str(mutual)
def test_connectivity_popagation(): """ Check that connectivity in the ward tree is propagated correctly during merging. """ from sklearn.neighbors import NearestNeighbors X = np.array( [ (0.014, 0.120), (0.014, 0.099), (0.014, 0.097), (0.017, 0.153), (0.017, 0.153), (0.018, 0.153), (0.018, 0.153), (0.018, 0.153), (0.018, 0.153), (0.018, 0.153), (0.018, 0.153), (0.018, 0.153), (0.018, 0.152), (0.018, 0.149), (0.018, 0.144), ] ) nn = NearestNeighbors(n_neighbors=10).fit(X) connectivity = nn.kneighbors_graph(X) ward = Ward(n_clusters=4, connectivity=connectivity) # If changes are not propagated correctly, fit crashes with an # IndexError ward.fit(X)
def _sparse_neighbor_graph(X, k, binary=False): '''Construct a sparse adj matrix from a matrix of points (one per row). Non-zeros are unweighted/binary distance values, depending on the binary arg. Doesn't include self-edges.''' knn = NearestNeighbors(n_neighbors=k).fit(X) mode = 'connectivity' if binary else 'distance' try: adj = knn.kneighbors_graph(None, mode=mode) except IndexError: # XXX: we must be running an old (<0.16) version of sklearn # We have to hack around an old bug: if binary: adj = knn.kneighbors_graph(X, k+1, mode=mode) adj.setdiag(0) else: adj = knn.kneighbors_graph(X, k, mode=mode) return Graph.from_adj_matrix(adj)
def diffusionKernel(X, eps, knn, D=None): nbrs = NearestNeighbors(n_neighbors=knn, algorithm='ball_tree').fit(X) D = nbrs.kneighbors_graph(X, mode='distance') term = D.multiply(D)/-eps G = np.exp(term.toarray()) G[np.where(G==1)]=0 G = G + np.eye(G.shape[0]) deg = np.sum(G,axis=0) P = G/deg return P, D
def test_lle_with_sklearn(): N = 10 X, color = datasets.samples_generator.make_s_curve(N, random_state=0) n_components = 2 n_neighbors = 3 knn = NearestNeighbors(n_neighbors + 1).fit(X) G = geom.Geometry() G.set_data_matrix(X) G.set_adjacency_matrix(knn.kneighbors_graph(X, mode = 'distance')) sk_Y_lle = manifold.LocallyLinearEmbedding(n_neighbors, n_components, method = 'standard').fit_transform(X) (mm_Y_lle, err) = lle.locally_linear_embedding(G, n_components) assert(_check_with_col_sign_flipping(sk_Y_lle, mm_Y_lle, 0.05))
def test_isomap_with_sklearn(): N = 10 X, color = datasets.samples_generator.make_s_curve(N, random_state=0) n_components = 2 n_neighbors = 3 knn = NearestNeighbors(n_neighbors + 1).fit(X) # Assign the geometry matrix to get the same answer since sklearn using k-neighbors instead of radius-neighbors g = geom.Geometry(X) g.set_adjacency_matrix(knn.kneighbors_graph(X, mode = 'distance')) # test Isomap with sklearn sk_Y_iso = manifold.Isomap(n_neighbors, n_components, eigen_solver = 'arpack').fit_transform(X) mm_Y_iso = iso.isomap(g, n_components) assert(_check_with_col_sign_flipping(sk_Y_iso, mm_Y_iso, 0.05))
def getNeighborStatistics(self,data,samples,pcntl): neigh = NearestNeighbors(n_neighbors=samples) neigh.fit(data) A = neigh.kneighbors_graph(data,mode='distance') b = A.nonzero() c = np.log10(np.array(A[b[0],b[1]])) mean = c[0].mean() std = c[0].std() pc = np.percentile(c[0],pcntl) n,bins,patches = plt.hist(c[0],50) plt.show() mx = bins[n.argmax()] ret = {'mean':np.power(10,mean),'std':np.power(10,std),'pcntl':np.power(10,pc), 'max':np.power(10,mx)} return ret
def test_ltsa_with_sklearn(): from sklearn import manifold from sklearn import datasets from sklearn.neighbors import NearestNeighbors N = 10 X, color = datasets.samples_generator.make_s_curve(N, random_state=0) n_components = 2 n_neighbors = 3 knn = NearestNeighbors(n_neighbors + 1).fit(X) Geometry = geom.Geometry(X) Geometry.assign_distance_matrix(knn.kneighbors_graph(X, mode = 'distance')) sk_Y_ltsa = manifold.LocallyLinearEmbedding(n_neighbors, n_components, method = 'ltsa', eigen_solver = 'arpack').fit_transform(X) (mm_Y_ltsa, err) = ltsa.ltsa(Geometry, n_components, eigen_solver = 'arpack') assert(_check_with_col_sign_flipping(sk_Y_ltsa, mm_Y_ltsa, 0.05))
def getCollectionStatistics(self,samples): neigh = NearestNeighbors(n_neighbors=samples) neigh.fit(self.coordinates) A = neigh.kneighbors_graph(self.coordinates,mode='distance') b = A.nonzero() c = np.log10(np.array(A[b[0],b[1]])) mean = c[0].mean() std = c[0].std() pc = np.percentile(c[0],50) n,bins,patches = plt.hist(c[0],80) plt.show() mx = bins[n.argmax()] self.collection_stats = {'mean':np.power(10,mean),'std':np.power(10,std),'pcntl':np.power(10,pc), 'max':np.power(10,mx)} return self.collection_stats
def fit(self, X): '''Obtain the top-k eigensystem of the graph Laplacian The eigen solver adopts shift-invert mode as described in http://docs.scipy.org/doc/scipy/reference/tutorial/arpack.html ''' nbrs = NearestNeighbors(n_neighbors=self.n_nbrs).fit(X) # NOTE W is a dense graph thus may lead to memory leak W = nbrs.kneighbors_graph(X).toarray() W_sym = np.maximum(W, W.T) L = csr_matrix(csgraph.laplacian(W_sym, normed=True)) [Sigma, U] = eigsh(L, self.n_clusters+1, sigma=0, which='LM') # remove the trivial (smallest) eigenvalues & vectors self.Sigma, self.U = Sigma[1:], U[:,1:]
def netview(matrix, k, mst, algorithm, tree): nbrs = NearestNeighbors(n_neighbors=k + 1, algorithm=algorithm).fit(matrix) adj_knn = nbrs.kneighbors_graph(matrix).toarray() np.fill_diagonal(adj_knn, 0) adj_mknn = (adj_knn == adj_knn.T) * adj_knn if tree: adj = mst + adj_mknn else: adj = adj_mknn adjacency = np.tril(adj) mst_edges = np.argwhere(adjacency < 1) adjacency[adjacency > 0] = 1.0 edges = np.argwhere(adjacency != 0) weights = matrix[edges[:, 0], edges[:, 1]] return [k, edges, weights, adjacency, mst_edges]
def test_isomap_with_sklearn(): try: from sklearn import manifold from sklearn import datasets from sklearn.neighbors import NearestNeighbors N = 10 X, color = datasets.samples_generator.make_s_curve(N, random_state=0) n_components = 2 n_neighbors = 3 knn = NearestNeighbors(n_neighbors + 1).fit(X) # Assign the geometry matrix to get the same answer since sklearn using k-neighbors instead of radius-neighbors Geometry = geom.Geometry(X) Geometry.assign_distance_matrix(knn.kneighbors_graph(X, mode = 'distance')) # test Isomap with sklearn sk_Y_iso = manifold.Isomap(n_neighbors, n_components, eigen_solver = 'arpack').fit_transform(X) mm_Y_iso = iso.isomap(Geometry, n_components) assert(_check_with_col_sign_flipping(sk_Y_iso, mm_Y_iso, 0.05)) except ImportError: return True
def entropy_batch_mixing(latent_space, batches): def entropy(hist_data): n_batches = len(np.unique(hist_data)) if n_batches > 2: raise ValueError("Should be only two clusters for this metric") frequency = np.mean(hist_data == 1) if frequency == 0 or frequency == 1: return 0 return -frequency * np.log(frequency) - (1 - frequency) * np.log(1 - frequency) nne = NearestNeighbors(n_neighbors=51, n_jobs=8) nne.fit(latent_space) kmatrix = nne.kneighbors_graph(latent_space) - scipy.sparse.identity(latent_space.shape[0]) score = 0 for t in range(50): indices = np.random.choice(np.arange(latent_space.shape[0]), size=100) score += np.mean([entropy(batches[kmatrix[indices].nonzero()[1]\ [kmatrix[indices].nonzero()[0] == i]]) for i in range(100)]) return score / 50.
def test_precomputed_nearest_neighbors_filtering(): # Test precomputed graph filtering when containing too many neighbors X, y = make_blobs(n_samples=200, random_state=0, centers=[[1, 1], [-1, -1]], cluster_std=0.01) n_neighbors = 2 results = [] for additional_neighbors in [0, 10]: nn = NearestNeighbors(n_neighbors=n_neighbors + additional_neighbors).fit(X) graph = nn.kneighbors_graph(X, mode='connectivity') labels = SpectralClustering(random_state=0, n_clusters=2, affinity='precomputed_nearest_neighbors', n_neighbors=n_neighbors).fit(graph).labels_ results.append(labels) assert_array_equal(results[0], results[1])
def search(self, collection, topicNum = 100): topicId = [] topicArray = [] print 'start collect' for item in collection.find(): topicId.append(item['url']) topics = [0] * int(topicNum) if item.get('topics') is not None: for tuple in item['topics']: topics[tuple[0]] = tuple[1] topicArray.append(topics) print 'start nns' nbrs = NearestNeighbors(n_neighbors=2, algorithm='ball_tree').fit(topicArray) print 'judge ' nnset = [[i for i, doc in enumerate(vector) if doc == 1 ] for vector in nbrs.kneighbors_graph(topicArray).toarray()] print 'update' for i,recs in enumerate(nnset): print i , ':',recs collection.update({'url':topicId[i]},{"$set" : {"rec" : [topicId[key] for key in recs]}})
def get_knn_graph(data_file, data_format, k, d, N, alg): if data_format == "binary": a = np.fromfile(data_file, dtype=float).reshape((N,d)) elif data_format == "libsvm": x, labels = load_svmlight_file(data_file) del labels a = x.todense() del x else: print "wrong data format!" return 0 k_plus_1 = k+1 t_start = time.time() nbrs = NearestNeighbors(n_neighbors=(k_plus_1), algorithm=alg, leaf_size=1).fit(a) t_tree = time.time() knn_graph = nbrs.kneighbors_graph(a) t_graph = time.time() - t_tree t = time.time() - t_start print 'overall time = ' + str(t) + " seconds" return knn_graph
def order_border(border): ''' https://stackoverflow.com/questions/37742358/sorting-points-to-form-a-continuous-line ''' n_points = border.shape[0] clf = NearestNeighbors(2).fit(border) G = clf.kneighbors_graph() T = nx.from_scipy_sparse_matrix(G) paths = [list(nx.dfs_preorder_nodes(T, i)) for i in range(n_points)] min_idx, min_dist = 0, np.inf for idx, path in enumerate(paths): ordered = border[path] # ordered nodes cost = np.sum(np.diff(ordered)**2) if cost < min_dist: min_idx, min_dist = idx, cost opt_order = paths[min_idx] return border[opt_order][:-1]
def run_swiss(): # training data data, t = load_swiss_data(train_batch_size) data = normalize(data) nbrs = NearestNeighbors(n_neighbors=k, algorithm='auto').fit(data) nbr_graph = nbrs.kneighbors_graph(data).toarray() global nbr_graph_tensor nbr_graph_tensor = torch.tensor(nbr_graph) data = torch.from_numpy(data).float() net = Net() # loss function loss_func = nn.L1Loss() # optimizer opti = torch.optim.Adam(net.parameters(), weight_decay=1e-3) # train net train_swiss_dmvu(epoch, data, net, loss_func, opti, t) data, t = load_swiss_data(test_batch_size) data = normalize_2(data) data = torch.from_numpy(data).float() test_swiss_dmvu(data, net, t)
def distancetree_metric(centroid_path): ## calculating the distance of only centroid based on the spanning tree centroid_list = joblib.load(log_path + "{}/{}/{}/{}". format(chose_dataset, chose_model, model_layer, centroid_path)) centroids = list(centroid_list.values()) neigh = NearestNeighbors(n_neighbors=len(centroids)) neigh.fit(centroids) A = neigh.kneighbors_graph(centroids, mode='distance') X = csr_matrix(A) Tcsr = minimum_spanning_tree(X) distance = Tcsr.toarray().sum() a = Tcsr.toarray() b = np.reshape(a, (-1,)) b = np.where(b == 0, np.inf, b) minmum_dist = b.min() return minmum_dist, distance
def fit(self, x): """Fit model to data. Args: x(BaseDataset): Dataset to fit. """ x_np, _ = x.numpy() # Determine neighborhood parameters x_np, _ = x.numpy() if x_np.shape[1] > 100: print('Computing PCA before knn search...') x_np = PCA(n_components=100).fit_transform(x_np) nbrs = NearestNeighbors(n_neighbors=self.n_neighbors, algorithm='auto').fit(x_np) self.knn_graph = nbrs.kneighbors_graph() super().fit(x)
def get_knn_graph(data_file, data_format, k, d, N, alg): if data_format == "binary": a = np.fromfile(data_file, dtype=float).reshape((N, d)) elif data_format == "libsvm": x, labels = load_svmlight_file(data_file) del labels a = x.todense() del x else: print "wrong data format!" return 0 k_plus_1 = k + 1 t_start = time.time() nbrs = NearestNeighbors(n_neighbors=(k_plus_1), algorithm=alg, leaf_size=1).fit(a) t_tree = time.time() knn_graph = nbrs.kneighbors_graph(a) t_graph = time.time() - t_tree t = time.time() - t_start print 'overall time = ' + str(t) + " seconds" return knn_graph
def top_3(self, keywords): """ unormalised vector used to calculated knn. KNN calculated with Sklearn out :return: knn sparse graph matrix """ kws_len = len(keywords) vecs = np.zeros((kws_len, self.vec_len), dtype=float) for i, kw in enumerate(keywords): word = self.nlp(kw) vec = np.array(word.vector) vecs[i] = vec nbrs = NearestNeighbors(n_neighbors=self.k + 1, algorithm='ball_tree').fit(vecs) graph = nbrs.kneighbors_graph(vecs).toarray() return graph
def get_connectivity(self, x): if self.degree == 0: a_net = self.dist2_mat(x) a_net = (a_net < self.comm_radius2).astype(float) else: neigh = NearestNeighbors(n_neighbors=self.degree) neigh.fit(x[:, 2:4]) a_net = np.array( neigh.kneighbors_graph(mode='connectivity').todense()) if self.mean_pooling: # Normalize the adjacency matrix by the number of neighbors - results in mean pooling, instead of sum pooling n_neighbors = np.reshape( np.sum(a_net, axis=1), (self.n_agents, 1)) # TODO or axis=0? Is the mean in the correct direction? n_neighbors[n_neighbors == 0] = 1 a_net = a_net / n_neighbors return a_net
def passl_local_graph_partial(site, loc_param_indices, params): X = site.buff[loc_param_indices[0]] K, rbf_sigma, local_graph_index, n_cluster, centers_index, point_cluster_index, inter_graph_index, member_id_index = params nins = NN(K + 1, None, metric='euclidean').fit(X) W = nins.kneighbors_graph(nins._fit_X, K + 1, mode='distance') #W.data=W.data**2 W.data = np.exp(-W.data**2 / rbf_sigma) W[np.diag_indices(W.shape[0])] = 0 #W[np.diag_indices(W.shape[0])]=0 site.buff[local_graph_index] = W kins = KM(n_cluster) point_cluster = kins.fit_predict(X) site.buff[point_cluster_index] = point_cluster site.buff[centers_index] = kins.cluster_centers_ #print(kins.cluster_centers_) site.buff[inter_graph_index] = {} member_id = [] for i in range(n_cluster): member_id.append(np.where(point_cluster == i)[0]) #print(member_id[-1]) site.buff[member_id_index] = member_id
def calculate_adjacency_matrix(self, X): n_samples = X.shape[1] adjacency_matrix = np.zeros((n_samples, n_samples)) knn = KNN(n_neighbors=self.n_neighbors, algorithm='kd_tree', n_jobs=self.n_jobs) knn.fit(X=self.X.T) # https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.NearestNeighbors.html#sklearn.neighbors.NearestNeighbors.kneighbors_graph # the following function gives n_samples*n_samples matrix, and puts 0 for where points are not connected directly in KNN graph connectivity_matrix = knn.kneighbors_graph( X=X.T, n_neighbors=self.n_neighbors + 1, mode='connectivity') #+1 because the point itself is also counted connectivity_matrix = connectivity_matrix.toarray() for point_index in range(connectivity_matrix.shape[0]): for point_index_2 in range(connectivity_matrix.shape[1]): if connectivity_matrix[point_index, point_index_2] == 1: x1 = X[:, point_index] x2 = X[:, point_index_2] adjacency_matrix[point_index, point_index_2] = math.exp( -(LA.norm(x1 - x2))**2) return adjacency_matrix
def similarity_regression(X, y, n_neighbors=None): """ Calculates similarity based on labels using X (data) y (labels) this considers X, by use knn first and then a distance metric - in this setting we will use the rbf kernel for similarity. Then if X is "far" in the knn sense we will set to 0 we can determine "distance" based on clusters? that is if we build a cluster around this obs, which other observations are closest. """ from sklearn.neighbors import NearestNeighbors if n_neighbors is None: n_neighbors = max(int(X.shape[0] * 0.05)+1, 2) # use NerestNeighbors to determine closest obs y_ = np.array(y).reshape(-1,1) nbrs = NearestNeighbors(n_neighbors=n_neighbors, algorithm='auto').fit(y_) return np.multiply(nbrs.kneighbors_graph(y_).toarray(), rbf_kernel(X, gamma=1))
def locality_preserving_loss(local_rep, target_rep, locality_preserving_k=5): # norm2 = lambda u, v: ((u-v)**2).sum() nbrs = NearestNeighbors(n_neighbors=locality_preserving_k + 1, algorithm='ball_tree', metric="euclidean", # metric="pyfunc", # metric_params={"func": norm2} ) nbrs = nbrs.fit(target_rep) alpha = nbrs.kneighbors_graph(target_rep, mode='distance') # g = g.eliminate_zeros() sigma = 10 alpha.data = np.exp(-np.power(alpha.data, 2)/(sigma**2)) alphaT = torch.tensor(alpha.toarray(), device=local_rep.device) # dists = scidist.squareform(scidist.cdist(local_rep, norm2)) dists = torch.cdist(local_rep, local_rep, p=2) dists = dists.pow(2) losses = torch.mul(dists, alphaT) return torch.sum(losses) / local_rep.shape[0]#, alpha, alphaT, dists, losses
def getNeighborStatistics(self, data, samples, pcntl): neigh = NearestNeighbors(n_neighbors=samples) neigh.fit(data) A = neigh.kneighbors_graph(data, mode='distance') b = A.nonzero() c = np.log10(np.array(A[b[0], b[1]])) mean = c[0].mean() std = c[0].std() pc = np.percentile(c[0], pcntl) n, bins, patches = plt.hist(c[0], 50) plt.show() mx = bins[n.argmax()] ret = { 'mean': np.power(10, mean), 'std': np.power(10, std), 'pcntl': np.power(10, pc), 'max': np.power(10, mx) } return ret
def k_nearest_network(phase_space, k=5): """ -------------------------------------------- Convert a phase space into a k nearest neighbor network, a directed one -------------------------------------------- phase_space: Array. The phase space representation in numpy array format k: Number. The k nearest neighbors will be connected -------------------------------------------- Return a graph object, using igraph representation -------------------------------------------- Usage example: import numpy as np import imp from ts2cn.ts import phase_space as phs filename='lorenz.dat' file = open('ts2cn/thirdy_parties/minfo/data/'+filename, 'r') ts = file.read().split() ts = [float(i) for i in ts] rc = phs.reconstruct_ps(ts, max_dim=20, dims_step=5, false_nn_threshold=0.2, noise_perc=2) graph = phs.k_nearest_network(rc, k=5) """ from scipy.spatial.distance import pdist, squareform from igraph import Graph from igraph import ADJ_UNDIRECTED, ADJ_DIRECTED from sklearn.neighbors import NearestNeighbors # TODO allow other algorithms # it's passed k+1 because each node is considered the nearest neighboor of itself nbrs = NearestNeighbors(n_neighbors=k+1, algorithm='kd_tree').fit(phase_space) adj_mat = nbrs.kneighbors_graph(phase_space, mode='connectivity').toarray() diag = range(len(adj_mat)) adj_mat[diag, diag] = 0 return Graph.Adjacency(adj_mat.tolist(), mode=ADJ_DIRECTED)
def SNN(x, k=3, verbose=True, metric='minkowski'): ''' x: n x m matrix, n is #sample, m is #feature ''' n, m = x.shape # Find a ranklist of neighbors for each sample timestamp = timer() if not verbose: print('Create KNN matrix...') knn = NearestNeighbors(n_neighbors=n, metric=metric) knn.fit(x) A = knn.kneighbors_graph(x, mode='distance') A = A.toarray() A_rank = A for i in range(n): A_rank[i, :] = np.argsort(A[i, :]) A_rank = np.array(A_rank, dtype='int') A_knn = A_rank[:, :k] if not verbose: print("Time elapsed:\t", timer() - timestamp) # Create weighted edges between samples timestamp = timer() if not verbose: print('Generate edges...') edge = [] for i in range(n): for j in range(i + 1, n): shared = set(A_knn[i, :]).intersection(set(A_knn[j, :])) shared = np.array(list(shared)) if (len(shared) > 0): # When i and j have shared knn strength = k - (match1d(shared, A_knn[i, :]) + match1d(shared, A_knn[j, :]) + 2) / 2 strength = max(strength) if (strength > 0): edge = edge + [i + 1, j + 1, strength] edge = np.array(edge).reshape(-1, 3) if not verbose: print("Time elapsed:\t", timer() - timestamp) return (edge)
def getCollectionStatistics(self, samples): neigh = NearestNeighbors(n_neighbors=samples) neigh.fit(self.coordinates) A = neigh.kneighbors_graph(self.coordinates, mode='distance') b = A.nonzero() c = np.log10(np.array(A[b[0], b[1]])) mean = c[0].mean() std = c[0].std() pc = np.percentile(c[0], 50) n, bins, patches = plt.hist(c[0], 80) plt.show() mx = bins[n.argmax()] self.collection_stats = { 'mean': np.power(10, mean), 'std': np.power(10, std), 'pcntl': np.power(10, pc), 'max': np.power(10, mx) } return self.collection_stats
def order_points(points): """ https://stackoverflow.com/questions/37742358/sorting-points-to-form-a-continuous-line """ clf = NearestNeighbors(2).fit(points) #calc nearest neighbour G = clf.kneighbors_graph() #create sparse matrix T = nx.from_scipy_sparse_matrix(G) #construct graph from sparse matrix # order paths paths = [list(nx.dfs_preorder_nodes(T, i)) for i in range(len(points))] mindist = np.inf minidx = 0 for i in range(len(points)): p = paths[i] # order of nodes ordered = points[p] # ordered nodes # find cost of that order by the sum of euclidean distances between points (i) and (i+1) cost = (((ordered[:-1] - ordered[1:])**2).sum(1)).sum() if cost < mindist: mindist = cost minidx = i return paths[minidx]
def find_KNN_distance_matrix(self, X, n_neighbors): # X: column-wise samples # returns KNN_distance_matrix: row-wise --> shape: (n_samples, n_samples) where zero for not neighbors # returns neighbors_indices: row-wise --> shape: (n_samples, n_neighbors) knn = KNN(n_neighbors=n_neighbors + 1, algorithm='kd_tree', n_jobs=-1) #+1 because the point itself is also counted knn.fit(X=X.T) # https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.NearestNeighbors.html#sklearn.neighbors.NearestNeighbors.kneighbors_graph # the following function gives n_samples*n_samples matrix, and puts 0 for diagonal and also where points are not connected directly in KNN graph # if K=n_samples, only diagonal is zero. Euclidean_distance_matrix = knn.kneighbors_graph( X=X.T, n_neighbors=n_neighbors + 1, mode='distance') #--> gives Euclidean distances KNN_distance_matrix = Euclidean_distance_matrix.toarray() neighbors_indices = np.zeros( (KNN_distance_matrix.shape[0], n_neighbors)) for sample_index in range(KNN_distance_matrix.shape[0]): neighbors_indices[sample_index, :] = np.ravel( np.asarray( np.where(KNN_distance_matrix[sample_index, :] != 0))) neighbors_indices = neighbors_indices.astype(int) return KNN_distance_matrix, neighbors_indices
def knn_distance_matrix(X, n_neighbors=10, nn_radius='halfk', leaf_size=30): knn = NearestNeighbors(n_neighbors, algorithm='auto', metric='sqeuclidean', leaf_size=leaf_size, n_jobs=-1) knn.fit(X) W = knn.kneighbors_graph(n_neighbors=n_neighbors, mode='distance') if nn_radius == 'halfk': nn_radius = n_neighbors // 2 distances, _ = knn.kneighbors(n_neighbors=nn_radius) half_k_neighbors_distance = np.sqrt(distances[:, -1].squeeze()) # normalization based on each points "neighbohood radius" for i in range(W.shape[0]): W[i, :] /= half_k_neighbors_distance[i] W = W.tocsc() for j in range(W.shape[0]): W[:, j] /= half_k_neighbors_distance[i] return W
def generateHeidiMatrixResults_noorder(inputData,k=20): factor=1 knn=k bit_subspace={} row=inputData.shape[0] count=0 heidi_matrix=np.zeros(shape=(row,row),dtype=np.uint64) max_count=int(math.pow(2,inputData.shape[1]-1)) allsubspaces=range(1,max_count) f=lambda a:sorted(a,key=lambda x:sum(int(d)for d in bin(x)[2:])) allsubspaces=f(allsubspaces) #print(allsubspaces) frmt=str(inputData.shape[1]-1)+'b' factor=1 bit_subspace={} count=0 #print('knn:',knn) for i in allsubspaces: bin_value=str(format(i,frmt)) bin_value=bin_value[::-1] subspace_col=[index for index,value in enumerate(bin_value) if value=='1'] filtered_data=inputData.iloc[:,subspace_col+[-1]] #NEED TO CHANGE IF COL IS A LIST filtered_data['classLabel_orig']=filtered_data['classLabel'].values sorted_data=filtered_data subspace=sorted_data.iloc[:,:-2] np_subspace=subspace.values#NEED TO CHANGE IF COL IS A LIST #print(np_subspace.shape) nbrs=NearestNeighbors(n_neighbors=knn,algorithm='ball_tree').fit(np_subspace) temp=nbrs.kneighbors_graph(np_subspace).toarray() temp=temp.astype(np.uint64) heidi_matrix=heidi_matrix + temp*factor factor=factor*2 subspace_col_name=[inputData.columns[j] for j in subspace_col] #print(i,subspace_col_name) bit_subspace[count]=subspace_col_name count+=1 return heidi_matrix,bit_subspace,sorted_data
def make_graph_knn(coords, layers, sim_indices, k): nbrs = NearestNeighbors(algorithm='kd_tree').fit(coords) nbrs_sm = nbrs.kneighbors_graph(coords, k) nbrs_sm.setdiag(0) #remove self-loop edges nbrs_sm.eliminate_zeros() nbrs_sm = nbrs_sm + nbrs_sm.T pairs_sel = np.array(nbrs_sm.nonzero()).T first,second = pairs_sel[:,0],pairs_sel[:,1] #selected index pair list that we label as connected data_sel = np.ones(pairs_sel.shape[0]) #prepare the input and output matrices (already need to store sparse) r_shape = (coords.shape[0],pairs_sel.shape[0]) eye_edges = np.arange(pairs_sel.shape[0]) R_i = csr_matrix((data_sel,(pairs_sel[:,1],eye_edges)),r_shape,dtype=np.uint8) R_o = csr_matrix((data_sel,(pairs_sel[:,0],eye_edges)),r_shape,dtype=np.uint8) #now make truth graph y (i.e. both hits are sim-matched) y = (np.isin(pairs_sel,sim_indices).astype(np.int8).sum(axis=-1) == 2) return R_i,R_o,y
class Density: def __init__(self, X, n_neighbors = 10, theta = 1): self.neigh = NearestNeighbors(p=1) self.neigh.fit(X) self.Pij = - self.neigh.kneighbors_graph(X, n_neighbors, mode='distance') / (2 * theta**2) self.Pij.data[:] = np.exp(self.Pij.data) self.Wij = self.Pij.sum(0) counts = np.bincount(self.Pij.indices, minlength = self.Pij.shape[0]) counts[np.where(counts ==0)[0]] = 1 #print(counts) self.Gra = np.array(self.Wij / counts).reshape(-1) def pick(self,i): indices = self.Pij.getrow(i).indices temp = self.Gra[i] self.Gra[indices] = self.Gra[indices] - temp return temp def getDensity(self): return self.Gra
def get_heidi_input_subspace_noorder(df, bin_value, factor=1, classLabelname='classLabel'): #bin_value = [True, False, True, False] #factor =1 #classLabelname='classLabel' row=df.shape[0] heidi_matrix=np.zeros(shape=(row,row),dtype=np.uint64) subspace_col = [i for i,x in enumerate(bin_value) if x] filtered_data=df.iloc[:,subspace_col] #NEED TO CHANGE IF COL IS A LIST filtered_data[classLabelname]=df[classLabelname].values filtered_data['classLabel_orig']=filtered_data[classLabelname].values sorted_data=filtered_data subspace=sorted_data.iloc[:,:-2] np_subspace=subspace.values nbrs=NearestNeighbors(n_neighbors=knn,algorithm='ball_tree').fit(np_subspace) temp=nbrs.kneighbors_graph(np_subspace).toarray() temp=temp.astype(np.uint64) heidi_matrix=heidi_matrix + temp*factor factor=factor*2 subspace_col_name=[df.columns[j] for j in subspace_col] output='.' img,bit_subspace=generateHeidiMatrixResults_noorder_helper(heidi_matrix,bs,output,sorted_data,'legend_heidi') return output+'/consolidated_img.png'
def order_coords(coords, idx_start=0): clf = NearestNeighbors(n_neighbors=2).fit(coords) G = clf.kneighbors_graph() """ New sorting, changed num neighbors to 3 above """ from scipy.sparse.csgraph import shortest_path dist_matrix, predecessors = shortest_path(csgraph=G, directed=False, return_predecessors=True) from tsp_solver.greedy import solve_tsp path = solve_tsp(dist_matrix, endpoints=(0, len(coords) - 1)) sorted_coords = coords[path[::1]] organized_coords = sorted_coords ### old sorting below # T = nx.from_scipy_sparse_matrix(G) # order = list(nx.dfs_preorder_nodes(T, 0)) # organized_coords = coords[order] # SORT BY ORDER return organized_coords
def create_network(self): data_path = 'all_stocks_5yr.csv' data = pd.read_csv(data_path) Name = data['Name'] companies = list(set(Name)) time_series = [] valid_companies = [] for index, company in enumerate(companies): all_time_series = data.loc[data['Name'] == company] ts_open = np.array(all_time_series['open']) ts_open = ts_open[~np.isnan(ts_open)] size = ts_open.shape[0] if size > 1100: valid_companies.append(company) ts_open.resize(1259) time_series.append(ts_open) time_series = np.array(time_series) nbrs = NearestNeighbors(n_neighbors=self.k, algorithm='ball_tree', metric=self.mydist).fit(time_series) knn_graph = nbrs.kneighbors_graph(time_series).toarray() np.fill_diagonal(knn_graph, 0) g = igraph.Graph.Adjacency(knn_graph.tolist(), mode="undirected") print 'Network created.....' g.write_pajek(self.output)
def find_geodesic_distance_matrix(self): # ----- find k-nearest neighbor graph (distance matrix): if self.n_neighbors == None: n_samples = self.X.shape[1] self.n_neighbors = n_samples knn = KNN( n_neighbors=self.n_neighbors + 1, algorithm='kd_tree', n_jobs=self.n_jobs) #+1 because the point itself is also counted knn.fit(X=self.X.T) # https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.NearestNeighbors.html#sklearn.neighbors.NearestNeighbors.kneighbors_graph # the following function gives n_samples*n_samples matrix, and puts 0 for diagonal and also where points are not connected directly in KNN graph # if K=n_samples, only diagonal is zero. Euclidean_distance_matrix = knn.kneighbors_graph( X=self.X.T, n_neighbors=self.n_neighbors, mode='distance') #--> gives Euclidean distances #Euclidean_distance_matrix = Euclidean_distance_matrix.toarray() # ----- find geodesic distance graph: # https://scikit-learn.org/stable/modules/generated/sklearn.utils.graph_shortest_path.graph_shortest_path.html self.geodesic_dist_matrix = graph_shortest_path( dist_matrix=Euclidean_distance_matrix, method="auto", directed=False)
def getHeidiImageForSubspace(self, subspace, outputpath): row = self.inputData.shape[0] heidi_matrix = np.zeros(shape=(row, row), dtype=np.uint64) subspace_col = [i for i, x in enumerate(subspace) if x] filtered_data = self.inputData.iloc[:, subspace_col] np_subspace = filtered_data.values nbrs = NearestNeighbors(n_neighbors=knn, algorithm='ball_tree').fit(np_subspace) temp = nbrs.kneighbors_graph(np_subspace).toarray() temp = temp.astype(np.uint64) heidi_matrix = temp arr = np.zeros((heidi_matrix.shape[0], heidi_matrix.shape[1], 3)) for i in range(heidi_matrix.shape[0]): for j in range(heidi_matrix.shape[1]): if (heidi_matrix[i][j] == 1): arr[i][j] = self.subspaceColors[tuple(subspace_col)] else: arr[i][j] = [255, 255, 255] tmp = arr.astype(np.uint8) img = Image.fromarray(tmp) img.save(outputpath) return
def get_knn_graph(X, k): ''' parameters ---------- X : 2-D array input data matrix k : int the number of nearest neighbors Notes ---------- knn graph whose element ij is distance between xi and xj if xj is in knn of xi return ---------- knn : csr_matrix(shape = len(X) * len(X)) pairwise distance matrix of samples ''' neigh = NearestNeighbors(n_neighbors=k) neigh.fit(X) return neigh.kneighbors_graph(mode='distance')
def _affinity_mat(self, X): r''' Computes the affinity matrix based on the selected kernel type. Parameters ---------- X : array-like, shape (n_samples, n_features) The data matrix from which we will compute the affinity matrix. Returns ------- sims : array-like, shape (n_samples, n_samples) The resulting affinity kernel. ''' sims = None # If gamma is None, then compute default gamma value for this view gamma = self.gamma if self.gamma is None: distances = cdist(X, X) gamma = 1 / (2 * np.median(distances) ** 2) # Produce the affinity matrix based on the selected kernel type if (self.affinity == 'rbf'): sims = rbf_kernel(X, gamma=gamma) elif(self.affinity == 'nearest_neighbors'): neighbor = NearestNeighbors(n_neighbors=self.n_neighbors) neighbor.fit(X) sims = neighbor.kneighbors_graph(X).toarray() else: sims = polynomial_kernel(X, gamma=gamma) return sims
def get_kneighbors_graph( points: NDArray[(Any, Any), Number], n_farthest_samples: Union[int, float] = 0.3, n_random_samples: Union[int, float] = 0.1, dmax: int = 500, n_neighbors: int = 5, n_jobs: Optional[int] = None, ) -> spmatrix: """ Get a graph generated by KNN on given points. Args: points: array containg point coordinates. n_farthest_samples: number of points to keep using farthest points sampling. If a float is given, represents the proportion of points used instead. n_random_samples: number of points to keep using random sampling. If a float is given, represents the proportion of points used instead. dmax: maximum distance in pixels between two adjacent nodes. n_neighbors: number of neighbors to use for KNN algorithm. n_jobs: number of parallel jobs to run for neighbors search. None means 1. Returns: Sparse distance matrix representing the graph. """ idxs = random_farthest_point_sampling( points, n_farthest_samples=n_farthest_samples, n_random_samples=n_random_samples, ) X = points[idxs] knn = NearestNeighbors(n_neighbors=n_neighbors, n_jobs=n_jobs).fit(X) A = knn.kneighbors_graph(mode="distance") Abool = A.astype(bool) - (A > dmax) A = A.multiply(Abool) return A.maximum(A.T)
from scipy.spatial.distance import cosine names = np.load("C:\\Users\\Will\\Desktop\\ml-1m\\DataSet\\NameMatrix.npy") valuematrix = np.load("C:\\Users\\Will\\Desktop\\ml-1m\\DataSet\\ArrangedMatrix.npy") #df = pd.DataFrame(valuematrix, columns=np.array(names).tolist()) nbrs = NearestNeighbors(n_neighbors=5, algorithm='ball_tree').fit(valuematrix) distances,indeces = nbrs.kneighbors(valuematrix) #print indeces #print distances temp = nbrs.kneighbors_graph(valuematrix).toarray() print temp # df = df.drop('0',1) # #df = df.drop(df.head(1).index) # #print df # # # # data_ibs = pd.DataFrame(index=df.columns, columns= df.columns) # # # ######################################Let the fun begin######################################################### # #Here we'll find the Cosin Similarity between items # #loop through columns # for i in range(0,len(data_ibs.columns)):
def construct_A(self, X, k=1, binary=False): #might generate sparse matrix nbrs = NearestNeighbors(n_neighbors=1 + k).fit(X) if binary: return nbrs.kneighbors_graph(X) else: return nbrs.kneighbors_graph(X, mode='distance')
from sklearn.neighbors import NearestNeighbors import numpy as np X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]) nbrs = NearestNeighbors(n_neighbors=2, algorithm='ball_tree').fit(X) test = np.array([[0,0],[0.1,0.9]]) distances, indices = nbrs.kneighbors(test) print indices print distances print nbrs.kneighbors_graph(X).toarray() from sklearn.neighbors import KDTree import numpy as np X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]) kdt = KDTree(X, leaf_size=30, metric='euclidean') print kdt.query(X, k=2, return_distance=False) print kdt.valid_metrics
from sklearn.neighbors import NearestNeighbors import numpy as np X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]) nbrs = NearestNeighbors(n_neighbors=2, algorithm='ball_tree').fit(X) distances, indices = nbrs.kneighbors(X) nbrs.kneighbors_graph(X).toarray() print nbrs.kneighbors_graph
# initialize data reading cfg_dnn.init_data_reading_test(train_data_spec) # get the function for feature extraction log('> ... getting the feat-extraction function') extract_func = model.build_extract_feat_function(-1) output_mat = None # store the features for all the data in memory log('> ... generating features from the specified layer') while (not cfg_dnn.test_sets.is_finish()): # loop over the data cfg_dnn.test_sets.load_next_partition(cfg_dnn.test_xy) batch_num = int(math.ceil(cfg_dnn.test_sets.cur_frame_num / batch_size)) for batch_index in xrange(batch_num): # loop over mini-batches start_index = batch_index * batch_size end_index = min((batch_index+1) * batch_size, cfg_dnn.test_sets.cur_frame_num) # the residue may be smaller than a mini-batch output = extract_func(cfg_dnn.test_x.get_value()[start_index:end_index]) if output_mat is None: output_mat = output else: output_mat = np.concatenate((output_mat, output)) # this is not efficient log('> ... fitting a KNN cluster') knn = KNN(n_neighbors=3) knn.fit(output_mat) log('> ... computing the graph of class') results = knn.kneighbors_graph(output_mat) print(results) # results.toarray() # print(results.toarray())
XTE, YTE = get_data(fullTestFile) x_new_tr = sparse.lil_matrix(sparse.csr_matrix(XTR)[:,list(range(upcStart-1,nextStart-1))]) x_new_te = sparse.lil_matrix(sparse.csr_matrix(XTE)[:,list(range(upcStart-1,nextStart-1))]) x_new_stack_T = vstack([x_new_tr,x_new_te]).T ## see boundry elements- print(sparse.csr_matrix(x_new_stack_T)[0]) ## #x_new = sparse.lil_matrix(sparse.csr_matrix(XD)[:,list(range(47,115))]) #x_new_T = x_new.T ## from sklearn.neighbors import NearestNeighbors from sklearn.utils.graph_shortest_path import graph_shortest_path import networkx as nx import pickle feat='upc' k=3 nbrs = NearestNeighbors(n_neighbors=k+1,metric='cosine',algorithm='brute').fit(x_new_stack_T) # k=(n_neighbors-1) (first neighbour is 'v' itself) #distances, indices = nbrs.kneighbors(x_new_T) # not directly needed, for now knnmatrix = nbrs.kneighbors_graph(x_new_stack_T,mode='distance') # sparse matrix(68x68) with nearest KNeighbours for each of the 68 pt knnmatrix.data[np.where(knnmatrix.data<0)]=0 sp = graph_shortest_path(knnmatrix,directed=False) # shortest-path-edge-weight from (v_i to v_j), (doc-https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/utils/graph_shortest_path.pyx) G = nx.Graph(knnmatrix) spl = nx.shortest_path(G, weight='weight') # shortest-path dict-array from each v_i to v_j, do len(array) to find path-length ## spl = nx.shortest_path(G) # Without weight (just connections-1/0) pickle.dump(knnmatrix,open('knn_'+feat+'_k_'+str(k)+'.pickle.dump','wb')) # used to smooth out features pickle.dump(sp,open('sp_all_'+feat+'_k_'+str(k)+'.pickle.dump','wb')) #np.savetxt('sp_all_'+feat+'_k_'+str(k)+'.np.save', sp) pickle.dump(spl,open('spl_all_'+feat+'_k_'+str(k)+'.pickle.dump','wb')) ## knnmatrix_all = pickle.load(open('knn_'+feat+'_k_'+str(k)+'.pickle.dump','rb')) sp_all = pickle.load(open('sp_all_'+feat+'_k_'+str(k)+'.pickle.dump','rb')) #sp_all = np.loadtxt('sp_all_'+feat+'_k_'+str(k)+'.np.save') spl_all = pickle.load(open('spl_all_'+feat+'_k_'+str(k)+'.txt','rb')) #
def wishbone(data, s, k=15, l=15, num_graphs=1, num_waypoints=250, verbose=True, metric='euclidean', voting_scheme='exponential', branch=True, flock_waypoints=2, band_sample=False, partial_order=[], search_connected_components=True): if verbose: print('Building lNN graph...') # Construct nearest neighbors graph start = time.process_time() nbrs = NearestNeighbors(n_neighbors=l+1, metric=metric).fit(data) lnn = nbrs.kneighbors_graph(data, mode='distance' ) lnn = np.transpose(lnn) print('lNN computed in : %.2f seconds' % (time.process_time()-start)) #set up return structure trajectory = [] waypoints = [] branches = [] bas = [] # generate klNN graphs and iteratively refine a trajectory in each for graph_iter in range(num_graphs): if k!=l: klnn = _spdists_klnn(lnn, k, verbose) else: klnn = lnn # Make the graph undirected klnn = _spdists_undirected(klnn) klnn.setdiag(0) klnn.eliminate_zeros() #run traj. landmarks traj, dist, iter_l, paths_l2l = _trajectory_landmarks( klnn, data, [s], num_waypoints, partial_order, verbose, metric, flock_waypoints, band_sample, branch) if branch: if verbose: print ('Determining branch point and branch associations...') RNK, bp, diffdists, Y = _splittobranches(traj, traj[0], data, iter_l, dist, paths_l2l) # calculate weighed trajectory W_full = _weighting_scheme(voting_scheme, dist) if branch: W = _muteCrossBranchVoting(W_full, RNK, RNK[s], iter_l, Y) else: W = W_full # save initial solution - start point's shortest path distances t = traj[0, :] t = [t, np.sum(np.multiply(traj, W), axis=0)] # iteratively realign trajectory (because landmarks moved) converged, user_break, realign_iter = False, False, 1 if verbose: print('Running iterations...') while converged == False and user_break == False: realign_iter = realign_iter + 1 print('Iteration: %d' % realign_iter) np.copyto(traj, dist) traj = _realign_trajectory(t, dist, iter_l, traj, 0, len(dist), realign_iter) if branch: RNK, bp, diffdists, Y = _splittobranches(traj, traj[0],data, iter_l, dist,paths_l2l) W = _muteCrossBranchVoting(W_full, RNK, RNK[s], iter_l,Y) # calculate weighed trajectory t.append(np.sum(np.multiply(traj, W), axis=0)) #check for convergence fpoint_corr = stats.pearsonr(np.transpose(t[realign_iter]), np.transpose(t[realign_iter - 1]))[0] if verbose: print('Correlation with previous iteration: %.4f' % fpoint_corr) converged = fpoint_corr > 0.9999 if (realign_iter % 16) == 0: # break after too many alignments - something is wrong user_break = True print('\nWarning: Force exit after ' + str(realign_iter) + ' iterations') print(str(realign_iter-1) + ' realignment iterations') # save final trajectory for this graph iter_traj = t[realign_iter][:] # Normalize the iter_trajectory iter_traj = (iter_traj - iter_traj.min()) / (iter_traj.max() - iter_traj.min()) trajectory.append(iter_traj) waypoints.append(iter_l) if branch: # Recalculate branches post reassignments RNK, bp, diffdists, Y = _splittobranches(traj, traj[0], data, iter_l, dist,paths_l2l) branches.append(RNK) bas.append(Y) else: branches = trajectory # branch return dict(zip(['Trajectory', 'Waypoints', 'Branches', 'BAS'], [trajectory[0], waypoints[0], branches[0], bas[0]]))
def get_nearest_neighbor_graph(k, X): neigh = NearestNeighbors(n_neighbors=k) neigh.fit(X) A = neigh.kneighbors_graph(X) distances, indices = neigh.kneighbors(X) return distances, indices, A
#! /usr/bin/python3 #For the simple task of finding the nearest neighbors between two sets of data, the unsupervised algorithms within sklearn.neighbors can be used: from sklearn.neighbors import NearestNeighbors import numpy as np # 0 1 2 3 4 5 X = np.array([[-1, -1], [-2,-1], [-3, -2], [1, 1], [2, 1], [3, 2]]) nbrs = NearestNeighbors(n_neighbors=2, algorithm='auto').fit(X) distances, indices = nbrs.kneighbors(X) print(indices) print(distances) nearestConnectionMatrix = nbrs.kneighbors_graph(X).toarray() print(nearestConnectionMatrix) # use KD-tree or Ball-tree from sklearn.neighbors import KDTree import numpy as np kdt = KDTree(X, leaf_size=30, metric='euclidean') result = kdt.query(X, k = 2, return_distance=False) print(result)
(((A[:, column_idx] + B[:, column_idx]) ** 2).mean() <= tol ** 2)) if not sign: return False return True N = 10 X, color = datasets.samples_generator.make_s_curve(N, random_state=0) n_components = 2 n_neighbors = 3 knn = NearestNeighbors(n_neighbors + 1).fit(X) # Assign the geometry matrix to get the same answer since sklearn using k-neighbors instead of radius-neighbors Geometry = geom.Geometry(X) Geometry.assign_distance_matrix(knn.kneighbors_graph(X, mode = 'distance')) from sklearn import manifold # test LTSA with sklearn sk_Y_ltsa = manifold.LocallyLinearEmbedding(n_neighbors, n_components, method = 'ltsa', eigen_solver = 'arpack').fit_transform(X) import Mmani.embedding.ltsa as ltsa (mm_Y_ltsa, err) = ltsa.ltsa(Geometry, n_components, eigen_solver = 'arpack') assert(_check_with_col_sign_flipping(sk_Y_ltsa, mm_Y_ltsa, 0.05)) # test LLE with sklearn sk_Y_lle = manifold.LocallyLinearEmbedding(n_neighbors, n_components, method = 'standard').fit_transform(X) import Mmani.embedding.locally_linear_ as lle (mm_Y_lle, err) = lle.locally_linear_embedding(Geometry, n_components) assert(_check_with_col_sign_flipping(sk_Y_ltsa, mm_Y_ltsa, 0.05))