def test_meld(): # MELD operator # Numerical accuracy np.random.seed(42) def norm(x): x = x.copy() x = x - np.min(x) x = x / np.max(x) return x D = np.random.normal(0, 2, (1000, 2)) RES = np.random.binomial(1, norm(D[:, 0]), 1000) G = gt.Graph(D, knn=20, decay=10, use_pygsp=True) meld_op = meld.MELD() B = meld_op.fit_transform(G, RES) if version.parse(np.__version__) < version.parse('1.17'): np.testing.assert_allclose(np.sum(B), 532.0001992193013) else: np.testing.assert_allclose(np.sum(B), 519.0001572740623) meld_op = meld.MELD() B = meld_op.fit_transform(gt.Graph( D, knn=20, decay=10, use_pygsp=False), RES) if version.parse(np.__version__) < version.parse('1.17'): np.testing.assert_allclose(np.sum(B), 532.0001992193013) else: np.testing.assert_allclose(np.sum(B), 519.0001572740623) # lap type TypeError lap_type = 'hello world' assert_raise_message( TypeError, "lap_type must be 'combinatorial'" " or 'normalized'. Got: '{}'".format(lap_type), meld.MELD(lap_type=lap_type).fit, G=G) # RES wrong shape RES = np.ones([2, G.N + 100]) assert_raise_message( ValueError, "Input data ({}) and input graph ({}) " "are not of the same size".format(RES.shape, G.N), meld_op.fit_transform, RES=RES, G=G) # lap reconversion warning assert_warns_message( RuntimeWarning, "Changing lap_type may require recomputing the Laplacian", meld_op.fit, G=gt.Graph(D, knn=20, decay=10, use_pygsp=True, lap_type='normalized'))
def fit(self, X): if not len(X.shape) == 3: raise ValueError("Expected X to be a tensor with three dimensions." " Got shape {}".format(X.shape)) if self.normalize: X = utils.normalize(X) tasklogger.log_start("multislice kernel") K = kernel.multislice_kernel(X, intraslice_knn=self.intraslice_knn, interslice_knn=self.interslice_knn, decay=self.decay, n_pca=self.n_pca, distance=self.knn_dist, n_jobs=self.n_jobs) tasklogger.log_complete("multislice kernel") tasklogger.log_start("graph and diffusion operator") n_landmark = self.n_landmark if self.n_landmark < K.shape[0] else None self.graph = graphtools.Graph(K, precomputed="affinity", n_landmark=n_landmark, n_svd=self.n_svd, n_jobs=self.n_jobs, verbose=self.verbose, random_state=self.random_state, **(self.kwargs)) self.diff_op tasklogger.log_complete("graph and diffusion operator") result = super().fit(self.graph) return result
def fit_kNN(self, data, **kwargs): self.graph_knn = gt.Graph(data, n_pca=100, kernel_symm=None, use_pygsp=True, random_state=self.seed, **kwargs)
def run_meld(X_red_dim, sample_labels, conditions, k=15): ''' Run MELD - X_red_dim: c x d matrix of dimensionality reduction to use for graph construction - sample_labels: assignment of cells to samples - conditions: vector of condition names ''' ## Make graph graph = gt.Graph(X_red_dim, knn=int(k)) ## Make MELD object meld_op = meld.MELD() meld_op.graph = graph ## Compute density meld_fit = meld_op.transform(sample_labels=np.array(sample_labels)) ## Mean density per replicates mean_density = pd.DataFrame( np.zeros(shape=(meld_fit.shape[0], len(conditions))), index=meld_fit.index, columns=conditions, ) for c in conditions: c_mean = meld_fit.loc[:, [c in x for x in meld_fit.columns]].mean(1) mean_density[c] = c_mean ## From density to likelihood per condition likelihoods = meld.utils.normalize_densities(mean_density) likelihoods.columns = [col.split("_")[0] for col in likelihoods.columns] return (likelihoods)
def diffusionCoordinates(X, decay, knn, n_pca, random_state=None): # diffusion maps with normalized Laplacian # n_pca = 0 corresponds to NO pca G = graphtools.Graph( X, knn=knn, decay=decay, n_pca=n_pca, use_pygsp=True, thresh=0, random_state=random_state, ) n_samples = X.shape[0] W = G.W.tocoo() # W / (DD^T) W.data = W.data / (G.dw[W.row] * G.dw[W.col]) # this is the anisotropic kernel nsqrtD = sparse.dia_matrix((np.array(np.sum(W, 0)) ** (-0.5), [0]), W.shape) L = sparse.eye(n_samples) - nsqrtD.dot(W).dot(nsqrtD) U, S, _ = randSVD(L, random_state=random_state) # smallest to largest S_idx = np.argsort(S) U, S = U[:, S_idx], S[S_idx] # trim trivial information U, S = U[:, 1:], S[1:] return U, S
def test_simple(): tree_data, tree_clusters = phate.tree.gen_dla(n_branch=3) phate_operator = phate.PHATE(knn=15, t=100, verbose=False) tree_phate = phate_operator.fit_transform(tree_data) assert tree_phate.shape == (tree_data.shape[0], 2) clusters = phate.cluster.kmeans(phate_operator, n_clusters="auto") assert np.issubdtype(clusters.dtype, np.signedinteger) assert len(np.unique(clusters)) >= 2 assert len(clusters.shape) == 1 assert len(clusters) == tree_data.shape[0] clusters = phate.cluster.kmeans(phate_operator, n_clusters=3) assert np.issubdtype(clusters.dtype, np.signedinteger) assert len(np.unique(clusters)) == 3 assert len(clusters.shape) == 1 assert len(clusters) == tree_data.shape[0] phate_operator.fit(phate_operator.graph) G = graphtools.Graph( phate_operator.graph.kernel, precomputed="affinity", use_pygsp=True, verbose=False, ) phate_operator.fit(G) G = pygsp.graphs.Graph(G.W) phate_operator.fit(G) phate_operator.fit(anndata.AnnData(tree_data)) with assert_raises_message(TypeError, "Expected phate_op to be of type PHATE. Got 1"): phate.cluster.kmeans(1)
def DM(data, t=1, knn=5, decay=40): # symmetric affinity matrix K = graphtools.Graph(data, n_jobs=-1, knn=knn, decay=decay).kernel # degrees diff_deg = np.array(np.sum(K, axis=1)).flatten() # negative sqrt diff_deg = np.power(diff_deg, -1 / 2) # put into square matrix diff_deg = sparse.spdiags([diff_deg], diags=0, m=K.shape[0], n=K.shape[0]) # conjugate K = sparse.csr_matrix(K) diff_aff = diff_deg.dot(K).dot(diff_deg) # symmetrize to remove numerical error diff_aff = (diff_aff + diff_aff.T) / 2 # svd U, S, _ = sparse.linalg.svds(diff_aff, k=3) # sort by smallest eigenvector s_idx = np.argsort(S)[::-1] U, S = U[:, s_idx], S[s_idx] # get first eigenvector u1 = U[:, 0][:, None] # ensure non-zero zero_idx = np.abs(u1) <= np.finfo(float).eps u1[zero_idx] = (np.sign(u1[zero_idx]) * np.finfo(float).eps).reshape(-1) # normalize by first eigenvector U = U / u1 # drop first eigenvector U, S = U[:, 1:], S[1:] # power eigenvalues S = np.power(S, t) # weight U by eigenvalues dm = U.dot(np.diagflat(S)) return dm
def phate_similarity_exampler(data, data_exampler, n_neigh = 5, t = 5, use_potential = True, n_pca = 100, n_exampler = None, method = "exact", **kwargs): """\ Description: ------------ Calculate diffusion distance using Phate/Diffusion Map method Parameters: ------------ data: Feature matrix of dimension (n_samples, n_features) n_neigh: The number of neighbor in knn for graph construction t: The transition timestep t use_potential: Using potential distance or not, if use, the same as Phate; if not, the same as diffusion map Returns: ----------- dist: Similarity matrix """ import graphtools as gt from scipy.spatial.distance import pdist, squareform from sklearn.neighbors import NearestNeighbors from sklearn.cluster import KMeans start = time.time() # calculate the exampler G_exampler = gt.Graph(data_exampler, n_pca = n_pca, knn = n_neigh, **kwargs) T_exampler = G_exampler.diff_op if scipy.sparse.issparse(T_exampler): T_exampler = T_exampler.toarray() T_exampler_t = np.linalg.matrix_power(T_exampler, t) if use_potential: U_exampler_t = - np.log(T_exampler_t + 1e-7) else: U_exampler_t = T_exampler_t dist_exampler = squareform(pdist(U_exampler_t)) # calculate distance between data and exampler, choice 1: euclidean distance, choice 2: diffusion distance # choice 1 # dist = pairwise_distances(X = data, Y = data_exampler) # choice 2 dist = pairwise_distances(X = data, Y = data_exampler) knn_index = np.argpartition(dist, kth = n_neigh - 1, axis = 1)[:,(n_neigh-1)] kth_dist = np.take_along_axis(dist, knn_index[:,None], axis = 1) K = dist/kth_dist K = (dist <= kth_dist) * np.exp(-K) K = K/np.sum(K, axis = 1)[:,None] U_query_t = np.matmul(K, U_exampler_t) # features, cannot deal with distance matrix directly, or the distances between query nodes are unknown. dist = pairwise_distances(X = U_query_t, Y = U_exampler_t) end = time.time() print("running time(sec):", end-start) return dist, dist_exampler
def test_graph_input(): X = np.random.normal(0, 1, (10, 2)) E = Estimator(verbose=0) G = graphtools.Graph(X) E.fit(G) assert E.graph == G G = graphtools.Graph(X, knn=2, decay=5, distance="cosine", thresh=0) E.fit(G) assert E.graph == G assert E.knn == G.knn assert E.decay == G.decay assert E.distance == G.distance assert E.thresh == G.thresh W = G.K - np.eye(X.shape[0]) G = pygsp.graphs.Graph(W) E.fit(G, use_pygsp=True) assert np.all(E.graph.W.toarray() == W)
def setUpClass(self): # VertexFrequencyCluster # Custom window sizes self.window_sizes = np.array([2, 4, 8, 24]) data, self.labels = make_batches(n_pts_per_cluster=100) self.G = gt.Graph(data, sample_idx=self.labels, use_pygsp=True) meld_op = meld.MELD() self.EES = meld_op.fit_transform(G=self.G, RES=self.labels)
def fit(self, X): """Computes the diffusion operator Parameters ---------- X : array, shape=[n_samples, n_features] input data with `n_samples` samples and `n_dimensions` dimensions. Accepted data types: `numpy.ndarray`, `scipy.sparse.spmatrix`, `pd.DataFrame`, `anndata.AnnData`. If `knn_dist` is 'precomputed', `data` should be a n_samples x n_samples distance or affinity matrix Returns ------- phate_operator : PHATE The estimator object """ X, n_pca, precomputed, update_graph = self._parse_input(X) if precomputed is None: tasklogger.log_info( "Running PHATE on {} cells and {} genes.".format( X.shape[0], X.shape[1])) else: tasklogger.log_info( "Running PHATE on precomputed {} matrix with {} cells.".format( precomputed, X.shape[0])) if self.n_landmark is None or X.shape[0] <= self.n_landmark: n_landmark = None else: n_landmark = self.n_landmark if self.graph is not None and update_graph: self._update_graph(X, precomputed, n_pca, n_landmark) self.X = X if self.graph is None: tasklogger.log_start("graph and diffusion operator") self.graph = graphtools.Graph( X, n_pca=n_pca, n_landmark=n_landmark, distance=self.knn_dist, precomputed=precomputed, knn=self.knn, decay=self.decay, thresh=1e-4, n_jobs=self.n_jobs, verbose=self.verbose, random_state=self.random_state, **(self.kwargs)) tasklogger.log_complete("graph and diffusion operator") # landmark op doesn't build unless forced self.diff_op return self
def test_utils(): data, labels = make_batches(n_pts_per_cluster=250) G = gt.Graph(data, sample_idx=labels, use_pygsp=True) EES = meld.MELD().fit_transform(G, labels) clusters = meld.VertexFrequencyCluster().fit_predict(G=G, RES=labels, EES=EES) meld.utils.sort_clusters_by_meld_score(clusters, EES)
def Spring(X, *args, is_graph=False, **kwargs): if not is_graph: G = graphtools.Graph(X, knn=3, decay=None, use_pygsp=True) else: G = pygsp.graphs.Graph(X) G = networkx.from_numpy_matrix(G.W.toarray()) X = networkx.spring_layout(G, *args, **kwargs) X = np.vstack(list(X.values())) return X
def build(self, size=500): self.name = 'Tree' #params = {'method': 'paths', 'batch_cells': size, # 'path_length': 500, # 'path_from': [0, 1, 1, 2, 0, 0], # 'de_fac_loc': 1, 'path_nonlinear_prob': 0.5, # 'dropout_type': 'binomial', 'dropout_prob': 0.5, # 'path_skew': [0.5, 0.75, 0.25, 0.5, 0.25, 0.75], # 'group_prob': [0.15, 0.05, .1, .25, .2, .25], # 'seed': self.seed, 'verbose': False} params = { 'method': 'paths', 'batch_cells': size, 'path_length': 500, 'path_from': [0, 1, 1, 2, 0, 0, 2], 'de_fac_loc': 1, 'path_skew': [0.45, 0.7, 0.7, 0.45, 0.65, 0.5, 0.5], 'group_prob': [0.1, 0.1, .1, .2, .2, .2, .1], 'dropout_type': 'binomial', 'dropout_prob': 0.5, 'seed': self.seed, 'verbose': False } sim = scprep.run.SplatSimulate(**params) data = sim['counts'] data_ln = scprep.normalize.library_size_normalize(data) data_sqrt = scprep.transform.sqrt(data_ln) self.X = data_sqrt self.c = sim['group'] self.X = self.X[np.argsort(self.c)] self.c = np.sort(self.c) expand = 4 params = { 'method': 'paths', 'batch_cells': size * expand, 'out_prob': 0, 'path_length': 500, 'path_from': [0, 1, 1, 2, 0, 0], 'de_fac_loc': 1, 'path_nonlinear_prob': 0.5, 'group_prob': [0.15, 0.05, .1, .25, .2, .25], 'dropout_type': 'binomial', 'dropout_prob': 0, 'path_skew': [0.5, 0.55, 0.4, 0.5, 0.45, 0.6], 'group_prob': [0.15, 0.05, .1, .25, .2, .25], 'seed': self.seed, 'verbose': False } sim = scprep.run.SplatSimulate(**params) data = sim['counts'] data = data[np.argsort(sim['group'])] data_ln = scprep.normalize.library_size_normalize(data) data_sqrt = scprep.transform.sqrt(data_ln) G = graphtools.Graph(data_sqrt, n_pca=100, anisotropy=1) self.X_true = embed.PHATE(G, gamma=0)[::expand]
def test_from_igraph(): n = 100 m = 500 K = np.zeros((n, n)) for _ in range(m): e = np.random.choice(n, 2, replace=False) K[e[0], e[1]] = K[e[1], e[0]] = 1 g = igraph.Graph.Adjacency(K.tolist()) G = graphtools.from_igraph(g, attribute=None) G2 = graphtools.Graph(K, precomputed="adjacency") assert np.all(G.K == G2.K)
def test_from_igraph_weighted(): n = 100 m = 500 K = np.zeros((n, n)) for _ in range(m): e = np.random.choice(n, 2, replace=False) K[e[0], e[1]] = K[e[1], e[0]] = np.random.uniform(0, 1) g = igraph.Graph.Weighted_Adjacency(K.tolist()) G = graphtools.from_igraph(g) G2 = graphtools.Graph(K, precomputed="adjacency") assert np.all(G.K == G2.K)
def test_dm(): # create fake data n_time_steps = 50 n_points = 20 n_dim = 10 np.random.seed(42) data = np.cumsum(np.random.normal( 0, 1, (n_time_steps, n_points, n_dim)), axis=0) kernel = m_phate.kernel.multislice_kernel(m_phate.utils.normalize(data)) dm = m_phate.kernel.DM(graphtools.Graph(kernel, precomputed='affinity')) assert dm.shape == (n_time_steps * n_points, 2)
def test_check_pygsp_graph(): # _check_pygsp_graph D = np.random.normal(0, 2, (10, 2)) G = gt.Graph(D, use_pygsp=False) assert isinstance(meld.utils._check_pygsp_graph(G), pygsp.graphs.Graph) assert_raise_message( TypeError, "Input graph should be of type graphtools.base.BaseGraph. " "With graphtools, use the `use_pygsp=True` flag.", meld.utils._check_pygsp_graph, G='hello world')
def Spring(X, *args, is_graph=False, seed=None, **kwargs): np.random.seed(seed) if not is_graph: G = graphtools.Graph(X, knn=3, decay=None, use_pygsp=True) else: G = pygsp.graphs.Graph(X) G = networkx.from_numpy_matrix(G.W.toarray()) tasklogger.log_start("Spring") X = networkx.spring_layout(G, *args, **kwargs) tasklogger.log_complete("Spring") X = np.vstack(list(X.values())) return X
def adata_phate(adata): # compute PHATE G = gt.Graph(data=adata.obsp['connectivities']+sparse.diags([1]*adata.shape[0],format='csr'), precomputed='adjacency', use_pygsp=True) G.knn_max = None phate_op = phate.PHATE(knn_dist='precomputed', gamma=0, n_jobs=-1, random_state=42) adata.obsm['X_phate']=phate_op.fit_transform(G.K) return adata
def __init__(self, size=5, *args, **kwargs): self.size = size super().__init__(*args, **kwargs) # create a linear topology g = graphtools.Graph(directed=False) self.graph = g g.create_graph('lattice', 1, self.size) # save the positions of vertices as Vector object for v in range(1, self.size + 1): x, y = ((v - 0.5) / self.size * self.width, 0.5 * self.height) g.set_vertex_attribute(v, 'xy', V(x, y)) self.compute_edge_lengths()
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # create a grid topology g = graphtools.Graph(directed=False) self.graph = g g.create_graph('lattice', 2, self.size) # save the positions of vertices as Vector object for j in range(1, self.size + 1): for i in range(1, self.size + 1): v = g._lattice_vertex(2, self.size, i, j) x, y = ((0.5 + i - 1) / self.size * self.width, (0.5 + j - 1) / self.size * self.height) g.set_vertex_attribute(v, 'xy', V(x, y)) self.compute_edge_lengths()
def __init__(self, npoints=100, *args, **kwargs): self.npoints = npoints super().__init__(*args, **kwargs) # create a Voronoi topology g = graphtools.Graph(directed=False) self.graph = g g.create_graph('voronoi', self.npoints, self.width, self.height) # save the positions of vertices as Vector object for v in g.vertices(): x, y = g.get_vertex_attribute(v, 'pos').split(',') x, y = float(x), float(y) g.set_vertex_attribute(v, 'xy', V(x, y)) self.compute_edge_lengths()
def phate_similarity(data, n_neigh=5, t=5, use_potential=True, **kwargs): """\ Description: ------------ Calculate diffusion distance using Phate/Diffusion Map method Parameters: ------------ data: Feature matrix of dimension (n_samples, n_features) n_neigh: The number of neighbor in knn for graph construction t: The transition timestep t use_potential: Using potential distance or not, if use, the same as Phate; if not, the same as diffusion map Returns: ----------- dist: Similarity matrix """ import graphtools as gt from scipy.spatial.distance import pdist, squareform # pairwise-distance graph, if decaying kernel with threshold, then will use knn basically, if decaying kernel without threshold, then using exact graph # threshold default is 1e-4 # what it did is basically calculate the radius neighbor, and then using kernel function, and then filter the kernel function again G = gt.Graph(data, n_pca=100, knn=n_neigh, **kwargs) # obtain transition matrix T = G.diff_op if scipy.sparse.issparse(T): T = T.toarray() # T to the power of t T_t = np.linalg.matrix_power(T, t) # calculate potential distance used as feature vector for each cell if use_potential: U_t = -np.log(T_t + 1e-7) else: U_t = T_t # calculate pairwise feature vector distance dist = squareform(pdist(U_t)) return U_t, dist
def DPT_similarity(data, n_neigh=5, use_potential=False): '''\ Description: ----------- Calculates DPT between all points in the data, directly ouput similarity matrix, which is the diffusion pseudotime matrix, a little better than diffusion map Parameters: ----------- data: Feature matrix, numpy.array of the size [n_samples, n_features] n_neigh: Larger correspond to slower decay use_potential: Expand shorter cell and compress distant cell Returns: ----------- DPT: Similarity matrix calculated from diffusion pseudo-time ''' import graphtools as gt from scipy.spatial.distance import pdist, squareform # Calculate from raw data would be too noisy, dimension reduction is necessary, construct graph adjacency matrix with n_pca 100 G = gt.Graph(data, n_pca=100, knn=n_neigh, use_pygsp=True) # Calculate eigenvectors of the diffusion operator # G.diff_op is a diffusion operator, return similarity matrix calculated from diffusion operation W, V = scipy.sparse.linalg.eigs(G.diff_op, k=1) # Remove first eigenspace T_tilde = G.diff_op.toarray() - (V[:, 0] @ V[:, 0].T) # Calculate M I = np.eye(T_tilde.shape[1]) M = np.linalg.inv(I - T_tilde) - I M = np.real(M) # log-potential if use_potential: M = M - np.min(M, axis=1)[:, None] M = M / np.sum(M, axis=1)[:, None] M = -np.log(M + 1e-7) DPT = squareform(pdist(M)) return DPT
def get_laplacian_extrema(data, n_extrema, knn=10): ''' Finds the 'Laplacian extrema' of a dataset. The first extrema is chosen as the point that minimizes the first non-trivial eigenvalue of the Laplacian graph on the data. Subsequent extrema are chosen by first finding the unique non-trivial non-negative vector that is zero on all previous extrema while at the same time minimizing the Laplacian quadratic form, then taking the argmax of this vector. ''' G = gt.Graph(data, use_pygsp=True, decay=None, knn=knn) # We need to convert G into a NetworkX graph to use the Tracemin PCG algorithm G_nx = nx.convert_matrix.from_scipy_sparse_matrix(G.W) fiedler = nx.linalg.algebraicconnectivity.fiedler_vector( G_nx, method='tracemin_pcg') # Combinatorial Laplacian gives better results than the normalized Laplacian L = nx.laplacian_matrix(G_nx) first_extrema = np.argmax(fiedler) extrema = [first_extrema] extrema_ordered = [first_extrema] init_lanczos = fiedler init_lanczos = np.delete(init_lanczos, first_extrema) for i in range(n_extrema - 1): # Generate the Laplacian submatrix by removing rows/cols for previous extrema indices = range(data.shape[0]) indices = np.delete(indices, extrema) ixgrid = np.ix_(indices, indices) L_sub = L[ixgrid] # Find the smallest eigenvector of our Laplacian submatrix eigvals, eigvecs = scipy.sparse.linalg.eigsh(L_sub, k=1, which='SM', v0=init_lanczos) # Add it to the sorted and unsorted lists of extrema new_extrema = np.argmax(np.abs(eigvecs[:, 0])) init_lanczos = eigvecs[:, 0] init_lanczos = np.delete(init_lanczos, new_extrema) shift = np.searchsorted(extrema_ordered, new_extrema) extrema_ordered.insert(shift, new_extrema + shift) extrema.append(new_extrema + shift) return extrema
def test_simple(): tree_data, tree_clusters = phate.tree.gen_dla(n_branch=3) phate_operator = phate.PHATE(k=15, t=100) tree_phate = phate_operator.fit_transform(tree_data) assert tree_phate.shape == (tree_data.shape[0], 2) clusters = phate.cluster.kmeans(phate_operator, k=3) assert np.issubdtype(clusters.dtype, int) assert len(clusters.shape) == 1 assert len(clusters) == tree_data.shape[0] phate_operator.fit(phate_operator.graph) G = graphtools.Graph(phate_operator.graph.kernel, precomputed='affinity', use_pygsp=True) phate_operator.fit(G) G = pygsp.graphs.Graph(G.W) phate_operator.fit(G) phate_operator.fit(anndata.AnnData(tree_data))
def diffusionCoordinates( X, decay, knn, n_pca, n_eigenvectors=None, n_jobs=1, verbose=0, random_state=None ): # diffusion maps with normalized Laplacian G = graphtools.Graph( X, knn=knn, decay=decay, n_pca=n_pca, use_pygsp=True, thresh=1e-4, anisotropy=1, lap_type="normalized", n_jobs=n_jobs, verbose=verbose, random_state=random_state, ) return graphDiffusionCoordinates(G, n_eigenvectors=n_eigenvectors)
def fit(self, X, y=None): if isinstance(X, list): X = np.array(X) if X.ndim < 2: raise ValueError("Cannot fit 1D array.") if X.shape[0] == 1: raise ValueError("Input contains only 1 sample.") self.n_features_in_ = X.shape[1] graph = graphtools.Graph( X, n_pca=self.n_pca, n_landmark=self.n_landmark, distance=self.knn_dist, knn=self.knn, knn_max=self.knn_max, decay=self.decay, thresh=1e-4, n_jobs=self.n_jobs, verbose=self.verbose, random_state=self.random_state, ) self.affinity_matrix_ = graph.diff_op.toarray() affinity_igraph = Graph().Weighted_Adjacency( matrix=self.affinity_matrix_.tolist(), mode="undirected") partition = leidenalg.find_partition( affinity_igraph, partition_type=leidenalg.RBConfigurationVertexPartition, weights=affinity_igraph.es["weight"], n_iterations=-1, seed=self.random_state, resolution_parameter=self.resolution_parameter, ) self.labels_ = np.array(partition.membership) self.q_ = partition.q return self
def fit_graph(self, data, n_pca=100, **kwargs): """Fits a graphtools.Graph to input data Parameters ---------- data : array, shape=[n_samples, n_observations] Input data **kwargs : dict Keyword arguments passed to gt.Graph() Returns ------- graph : graphtools.Graph Graph fit to data """ self.graph = gt.Graph(data, n_pca=n_pca, use_pygsp=True, random_state=self.seed, **kwargs) return self.graph