def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray]) -> 'Ward': """Applies embedding method followed by the Ward algorithm. Parameters ---------- input_matrix : Adjacency matrix or biadjacency matrix of the graph. Returns ------- self: :class:`Ward` """ self._init_vars() # input check_format(input_matrix) # embedding embedding, self.bipartite = get_embedding(input_matrix, self.embedding_method, self.co_cluster) # clustering ward = WardDense() self.dendrogram_ = ward.fit_transform(embedding) # output if self.co_cluster: self._split_vars(input_matrix.shape) return self
def fit(self, biadjacency: Union[sparse.csr_matrix, np.ndarray], seeds_row: Optional[Union[dict, np.ndarray]] = None, seeds_col: Optional[Union[dict, np.ndarray]] = None) -> 'CoPageRank': """Fit algorithm to data. Parameters ---------- biadjacency : Biadjacency matrix. seeds_row : Seed rows, as a dict or a vector. seeds_col : Seed columns, as a dict or a vector. If both seeds_row and seeds_col are ``None``, the uniform distribution is used. Returns ------- self: :class:`CoPageRank` """ biadjacency = check_format(biadjacency) n_row, n_col = biadjacency.shape operator = CoNeighborsOperator(biadjacency, True) seeds_row = seeds2probs(n_row, seeds_row) self.scores_row_ = get_pagerank(operator, seeds_row, damping_factor=self.damping_factor, solver=self.solver, n_iter=self.n_iter, tol=self.tol) operator = CoNeighborsOperator(biadjacency.T.tocsr(), True) seeds_col = seeds2probs(n_col, seeds_col) self.scores_col_ = get_pagerank(operator, seeds_col, damping_factor=self.damping_factor, solver=self.solver, n_iter=self.n_iter, tol=self.tol) self.scores_ = self.scores_row_ return self
def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'PCA': """Compute the embedding of the graph. Parameters ---------- adjacency : Adjacency or biadjacency matrix of the graph. Returns ------- self: :class:`PCA` """ adjacency = check_format(adjacency).asfptype() n_row, n_col = adjacency.shape adjacency_centered = SparseLR( adjacency, (-np.ones(n_row), adjacency.T.dot(np.ones(n_row)) / n_row)) if isinstance(self.solver, str): self.solver = set_svd_solver(self.solver, adjacency) svd = self.solver svd.fit(adjacency_centered, self.n_components) self.embedding_row_ = svd.singular_vectors_left_ self.embedding_col_ = svd.singular_vectors_right_ self.embedding_ = svd.singular_vectors_left_ self.singular_values_ = svd.singular_values_ self.singular_vectors_left_ = svd.singular_vectors_left_ self.singular_vectors_right_ = svd.singular_vectors_right_ return self
def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'Closeness': """Closeness centrality for connected graphs. Parameters ---------- adjacency : Adjacency matrix of the graph. Returns ------- self: :class:`Closeness` """ adjacency = check_format(adjacency) check_square(adjacency) check_connected(adjacency) n = adjacency.shape[0] if self.method == 'exact': n_sources = n sources = np.arange(n) elif self.method == 'approximate': n_sources = min(int(log(n) / self.tol**2), n) sources = np.random.choice(np.arange(n), n_sources, replace=False) else: raise ValueError( "Method should be either 'exact' or 'approximate'.") dists = distance(adjacency, n_jobs=self.n_jobs, sources=sources) self.scores_ = ( (n - 1) * n_sources / n) / dists.T.dot(np.ones(n_sources)) return self
def connected_components(adjacency: sparse.csr_matrix, connection: str = 'weak') -> np.ndarray: """Extract the connected components of the graph. * Graphs * Digraphs Based on SciPy (scipy.sparse.csgraph.connected_components). Parameters ---------- adjacency : Adjacency matrix of the graph. connection : Must be ``'weak'`` (default) or ``'strong'``. The type of connection to use for directed graphs. Returns ------- labels : np.ndarray Connected component of each node. """ adjacency = check_format(adjacency) if len(adjacency.data) == 0: raise ValueError('The graph is empty (no edge).') return sparse.csgraph.connected_components(adjacency, not is_symmetric(adjacency), connection, True)[1]
def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray, LinearOperator], seeds: Optional[Union[dict, np.ndarray]] = None) -> 'PageRank': """Fit algorithm to data. Parameters ---------- adjacency : Adjacency matrix. seeds : Parameter to be used for Personalized PageRank. Restart distribution as a vector or a dict (node: weight). If ``None``, the uniform distribution is used (no personalization, default). Returns ------- self: :class:`PageRank` """ if not isinstance(adjacency, LinearOperator): adjacency = check_format(adjacency) check_square(adjacency) seeds = seeds2probs(adjacency.shape[0], seeds) self.scores_ = get_pagerank(adjacency, seeds, damping_factor=self.damping_factor, n_iter=self.n_iter, solver=self.solver, tol=self.tol) return self
def fit(self, biadjacency: Union[sparse.csr_matrix, np.ndarray], seeds_row: Optional[Union[dict, np.ndarray]] = None, seeds_col: Optional[Union[dict, np.ndarray]] = None) \ -> 'BiPageRank': """Fit algorithm to data. Parameters ---------- biadjacency : Biadjacency matrix. seeds_row : Seed rows, as a dict or a vector. seeds_col : Seed columns, as a dict or a vector. If both seeds_row and seeds_col are ``None``, the uniform distribution is used. Returns ------- self: :class:`BiPageRank` """ biadjacency = check_format(biadjacency) n_row, n_col = biadjacency.shape adjacency = bipartite2undirected(biadjacency) seeds = stack_seeds(n_row, n_col, seeds_row, seeds_col) PageRank.fit(self, adjacency, seeds) self._split_vars(n_row) self.scores_row_ /= self.scores_row_.sum() self.scores_col_ /= self.scores_col_.sum() self.scores_ = self.scores_row_ return self
def fit(self, biadjacency: Union[sparse.csr_matrix, np.ndarray], seeds_row: Optional[Union[dict, np.ndarray]] = None, seeds_col: Optional[Union[dict, np.ndarray]] = None, initial_state: Optional = None) -> 'BiDiffusion': """Compute the diffusion (temperature at equilibrium). Parameters ---------- biadjacency : Biadjacency matrix, shape (n_row, n_col). seeds_row : Temperatures of row border nodes (dictionary or vector of size n_row). Negative temperatures ignored. seeds_col : Temperatures of column border nodes (dictionary or vector of size n_row). Negative temperatures ignored. initial_state : Initial state of temperatures. Returns ------- self: :class:`BiDiffusion` """ biadjacency = check_format(biadjacency) n_row, n_col = biadjacency.shape seeds = stack_seeds(n_row, n_col, seeds_row, seeds_col) adjacency = bipartite2undirected(biadjacency) Diffusion.fit(self, adjacency, seeds) self._split_vars(n_row) return self
def fit(self, biadjacency: Union[sparse.csr_matrix, np.ndarray], seeds_row: Optional[Union[dict, np.ndarray]] = None, seeds_col: Optional[Union[dict, np.ndarray]] = None) \ -> 'BiPageRank': """Fit algorithm to data. Parameters ---------- biadjacency : Biadjacency matrix. seeds_row, seeds_col : Parameter to be used for Personalized BiPageRank. Restart distribution as vectors or dicts on rows, columns (node: weight). If both seeds_row and seeds_col are ``None`` (default), the uniform distribution on rows is used. Returns ------- self: :class:`BiPageRank` """ biadjacency = check_format(biadjacency) n_row, n_col = biadjacency.shape adjacency = bipartite2undirected(biadjacency) seeds = stack_seeds(n_row, n_col, seeds_row, seeds_col) PageRank.fit(self, adjacency, seeds) self._split_vars(n_row) self.scores_row_ /= self.scores_row_.sum() self.scores_col_ /= self.scores_col_.sum() self.scores_ = self.scores_row_ return self
def fit( self, biadjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'BiLouvainHierarchy': """Applies Louvain hierarchical clustering to :math:`A = \\begin{bmatrix} 0 & B \\\\ B^T & 0 \\end{bmatrix}` where :math:`B` is the biadjacency matrix of the graphs. Parameters ---------- biadjacency: Biadjacency matrix of the graph. Returns ------- self: :class:`BiLouvainHierarchy` """ biadjacency = check_format(biadjacency) adjacency = bipartite2undirected(biadjacency) self.dendrogram_ = self.louvain_hierarchy.fit_transform(adjacency) self._split_vars(biadjacency.shape) return self
def fit(self, biadjacency: Union[sparse.csr_matrix, np.ndarray], seeds_row: Optional[Union[dict, np.ndarray]] = None, seeds_col: Optional[Union[dict, np.ndarray]] = None, init: Optional[float] = None) -> 'BiDirichlet': """Compute the solution to the Dirichlet problem (temperatures at equilibrium). Parameters ---------- biadjacency : Biadjacency matrix, shape (n_row, n_col). seeds_row : Temperatures of seed rows (dictionary or vector of size n_row). Negative temperatures ignored. seeds_col : Temperatures of seed columns (dictionary or vector of size n_col). Negative temperatures ignored. init : Temperature of non-seed nodes in initial state. If ``None``, use the average temperature of seed nodes (default). Returns ------- self: :class:`BiDirichlet` """ biadjacency = check_format(biadjacency) n_row, n_col = biadjacency.shape seeds = stack_seeds(n_row, n_col, seeds_row, seeds_col) adjacency = bipartite2undirected(biadjacency) Dirichlet.fit(self, adjacency, seeds, init) self._split_vars(n_row) return self
def get_adjacency(input_matrix: Union[sparse.csr_matrix, np.ndarray], allow_directed: bool = True, force_bipartite: bool = False, force_directed: bool = False)\ -> Tuple[sparse.csr_matrix, bool]: """Check the input matrix and return a proper adjacency matrix. Parameters ---------- input_matrix : Adjacency matrix of biadjacency matrix of the graph. allow_directed : If ``True`` (default), allow the graph to be directed. force_bipartite : bool If ``True``, return the adjacency matrix of a bipartite graph. Otherwise (default), do it only if the input matrix is not square or not symmetric with ``allow_directed=False``. force_directed : If ``True`` return :math:`A = \\begin{bmatrix} 0 & B \\\\ 0 & 0 \\end{bmatrix}`. Otherwise (default), return :math:`A = \\begin{bmatrix} 0 & B \\\\ B^T & 0 \\end{bmatrix}`. """ input_matrix = check_format(input_matrix) bipartite = False if force_bipartite or not is_square(input_matrix) or not ( allow_directed or is_symmetric(input_matrix)): bipartite = True if bipartite: if force_directed: adjacency = bipartite2directed(input_matrix) else: adjacency = bipartite2undirected(input_matrix) else: adjacency = input_matrix return adjacency, bipartite
def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'Harmonic': """Harmonic centrality for connected graphs. Parameters ---------- adjacency : Adjacency matrix of the graph. Returns ------- self: :class:`Harmonic` """ adjacency = check_format(adjacency) check_square(adjacency) n = adjacency.shape[0] indices = np.arange(n) paths = shortest_path(adjacency, n_jobs=self.n_jobs, indices=indices) np.fill_diagonal(paths, 1) inv = (1 / paths) np.fill_diagonal(inv, 0) self.scores_ = inv.dot(np.ones(n)) return self
def fit( self, biadjacency: Union[sparse.csr_matrix, np.ndarray], seeds_row: Union[np.ndarray, dict], seeds_col: Optional[Union[np.ndarray, dict]] = None) -> 'BiPropagation': """Node classification by k-nearest neighbors in the embedding space. Parameters ---------- biadjacency : Biadjacency matrix of the graph. seeds_row : Seed rows. Can be a dict {node: label} or an array where "-1" means no label. seeds_col : Seed columns (optional). Same format. Returns ------- self: :class:`BiPropagation` """ n_row, n_col = biadjacency.shape biadjacency = check_format(biadjacency) adjacency = bipartite2undirected(biadjacency) seeds = stack_seeds(n_row, n_col, seeds_row, seeds_col).astype(int) Propagation.fit(self, adjacency, seeds) self._split_vars(n_row) return self
def fit( self, biadjacency: Union[sparse.csr_matrix, np.ndarray] ) -> 'BiPropagationClustering': """Clustering. Parameters ---------- biadjacency : Biadjacency matrix of the graph. Returns ------- self: :class:`BiPropagationClustering` """ n_row, n_col = biadjacency.shape biadjacency = check_format(biadjacency) adjacency = bipartite2undirected(biadjacency) propagation = PropagationClustering(self.n_iter, self.node_order, self.weighted) self.labels_ = propagation.fit_transform(adjacency) self._split_vars(n_row) self._secondary_outputs(biadjacency) return self
def fit( self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'LaplacianEmbedding': """Compute the graph embedding. Parameters ---------- adjacency : Adjacency matrix of the graph (symmetric matrix). Returns ------- self: :class:`LaplacianEmbedding` """ adjacency = check_format(adjacency).asfptype() check_square(adjacency) check_symmetry(adjacency) n = adjacency.shape[0] regularize: bool = not (self.regularization is None or self.regularization == 0.) check_scaling(self.scaling, adjacency, regularize) if regularize: solver: EigSolver = LanczosEig() else: solver = set_solver(self.solver, adjacency) n_components = 1 + check_n_components(self.n_components, n - 2) weights = adjacency.dot(np.ones(n)) regularization = self.regularization if regularization: if self.relative_regularization: regularization = regularization * weights.sum() / n**2 weights += regularization * n laplacian = LaplacianOperator(adjacency, regularization) else: weight_diag = sparse.diags(weights, format='csr') laplacian = weight_diag - adjacency solver.which = 'SM' solver.fit(matrix=laplacian, n_components=n_components) eigenvalues = solver.eigenvalues_[1:] eigenvectors = solver.eigenvectors_[:, 1:] embedding = eigenvectors.copy() if self.scaling: eigenvalues_inv_diag = diag_pinv(eigenvalues**self.scaling) embedding = eigenvalues_inv_diag.dot(embedding.T).T if self.normalized: embedding = normalize(embedding, p=2) self.embedding_ = embedding self.eigenvalues_ = eigenvalues self.eigenvectors_ = eigenvectors self.regularization_ = regularization return self
def dasgupta_cost(adjacency: sparse.csr_matrix, dendrogram: np.ndarray, weights: str = 'uniform', normalized: bool = False) -> float: """Dasgupta's cost of a hierarchy. Expected size (weights = ``'uniform'``) or expected volume (weights = ``'degree'``) of the cluster induced by random edge sampling (closest ancestor of the two nodes in the hierarchy). Parameters ---------- adjacency : Adjacency matrix of the graph. dendrogram : Dendrogram. weights : Weights of nodes. ``'degree'`` or ``'uniform'`` (default). normalized : If ``True``, normalized cost (between 0 and 1). Returns ------- cost : float Cost. Example ------- >>> from sknetwork.hierarchy import dasgupta_score, Paris >>> from sknetwork.data import house >>> paris = Paris() >>> adjacency = house() >>> dendrogram = paris.fit_transform(adjacency) >>> cost = dasgupta_cost(adjacency, dendrogram) >>> np.round(cost, 2) 3.33 References ---------- Dasgupta, S. (2016). A cost function for similarity-based hierarchical clustering. Proceedings of ACM symposium on Theory of Computing. """ adjacency = check_format(adjacency) check_square(adjacency) n = adjacency.shape[0] check_min_size(n, 2) edge_sampling, _, cluster_weight = get_sampling_distributions( adjacency, dendrogram, weights) cost = edge_sampling.dot(cluster_weight) if not normalized: if weights == 'degree': cost *= adjacency.data.sum() else: cost *= n return cost
def co_neighbor_graph(adjacency: Union[sparse.csr_matrix, np.ndarray], normalized: bool = True, method='knn', n_neighbors: int = 5, n_components: int = 8) -> sparse.csr_matrix: """Compute the co-neighborhood adjacency. * Graphs * Digraphs * Bigraphs :math:`\\tilde{A} = AF^{-1}A^T`, where F is a weight matrix. Parameters ---------- adjacency: Adjacency of the input graph. normalized: If ``True``, F is the diagonal in-degree matrix :math:`F = \\text{diag}(A^T1)`. Otherwise, F is the identity matrix. method: Either ``'exact'`` or ``'knn'``. If 'exact' the output is computed with matrix multiplication. However, the density can be much higher than in the input graph and this can trigger Memory errors. If ``'knn'``, the co-neighborhood is approximated through KNNDense-search in an appropriate spectral embedding space. n_neighbors: Number of neighbors for the KNNDense search. Only useful if ``method='knn'``. n_components: Dimension of the embedding space. Only useful if ``method='knn'``. Returns ------- adjacency : sparse.csr_matrix Adjacency of the co-neighborhood. """ adjacency = check_format(adjacency).astype(float) if method == 'exact': if normalized: forward = normalize(adjacency.T).tocsr() else: forward = adjacency.T return adjacency.dot(forward) elif method == 'knn': if normalized: algo = GSVD(n_components, regularization=None) else: algo = SVD(n_components, regularization=None) embedding = algo.fit_transform(adjacency) knn = KNNDense(n_neighbors, undirected=True) knn.fit(embedding) return knn.adjacency_ else: raise ValueError('method must be "exact" or "knn".')
def largest_connected_component(adjacency: Union[sparse.csr_matrix, np.ndarray], return_labels: bool = False): """Extract the largest connected component of a graph. Bipartite graphs are treated as undirected. * Graphs * Digraphs * Bigraphs Parameters ---------- adjacency : Adjacency or biadjacency matrix of the graph. return_labels : bool Whether to return the indices of the new nodes in the original graph. Returns ------- new_adjacency : sparse.csr_matrix Adjacency or biadjacency matrix of the largest connected component. indices : array or tuple of array Indices of the nodes in the original graph. For biadjacency matrices, ``indices[0]`` corresponds to the rows and ``indices[1]`` to the columns. """ adjacency = check_format(adjacency) n_row, n_col = adjacency.shape if not is_square(adjacency): bipartite: bool = True full_adjacency = sparse.bmat([[None, adjacency], [adjacency.T, None]], format='csr') else: bipartite: bool = False full_adjacency = adjacency labels = connected_components(full_adjacency) unique_labels, counts = np.unique(labels, return_counts=True) component_label = unique_labels[np.argmax(counts)] component_indices = np.where(labels == component_label)[0] if bipartite: split_ix = np.searchsorted(component_indices, n_row) row_ix, col_ix = component_indices[:split_ix], component_indices[ split_ix:] - n_row else: row_ix, col_ix = component_indices, component_indices new_adjacency = adjacency[row_ix, :] new_adjacency = (new_adjacency.tocsc()[:, col_ix]).tocsr() if return_labels: if bipartite: return new_adjacency, (row_ix, col_ix) else: return new_adjacency, row_ix else: return new_adjacency
def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray], seeds: Union[np.ndarray, dict] = None) \ -> 'Propagation': """Node classification by label propagation. Parameters ---------- adjacency : Adjacency matrix of the graph. seeds : Seed nodes. Can be a dict {node: label} or an array where "-1" means no label. Returns ------- self: :class:`Propagation` """ adjacency = check_format(adjacency) n = adjacency.shape[0] index_seed, index_remain, labels_seed = self._instanciate_vars( adjacency, seeds) if self.node_order == 'random': np.random.shuffle(index_remain) elif self.node_order == 'decreasing': index = np.argsort(-adjacency.T.dot(np.ones(n))).astype(np.int32) index_remain = index[index_remain] elif self.node_order == 'increasing': index = np.argsort(adjacency.T.dot(np.ones(n))).astype(np.int32) index_remain = index[index_remain] labels = -np.ones(n, dtype=np.int32) labels[index_seed] = labels_seed labels_remain = np.zeros_like(index_remain, dtype=np.int32) indptr = adjacency.indptr.astype(np.int32) indices = adjacency.indices.astype(np.int32) if self.weighted: data = adjacency.data.astype(np.float32) else: data = np.ones(n, dtype=np.float32) t = 0 while t < self.n_iter and not np.array_equal(labels_remain, labels[index_remain]): t += 1 labels_remain = labels[index_remain].copy() labels = np.asarray( vote_update(indptr, indices, data, labels, index_remain)) membership = membership_matrix(labels) membership = normalize(adjacency.dot(membership)) self.labels_ = labels self.membership_ = membership return self
def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray], seeds: Optional[Union[dict, np.ndarray]] = None, initial_state: Optional = None) -> 'Diffusion': """Compute the diffusion (temperature at equilibrium). Parameters ---------- adjacency : Adjacency matrix of the graph. seeds : Temperatures of border nodes (dictionary or vector). Negative temperatures ignored. initial_state : Initial state of temperatures. Returns ------- self: :class:`Diffusion` """ adjacency = check_format(adjacency) check_square(adjacency) n: int = adjacency.shape[0] if seeds is None: self.scores_ = np.ones(n) / n return self seeds = check_seeds(seeds, n) b, border = limit_conditions(seeds) tmin, tmax = np.min(b[border]), np.max(b) interior: sparse.csr_matrix = sparse.diags(~border, shape=(n, n), format='csr', dtype=float) diffusion_matrix = interior.dot(normalize(adjacency)) if initial_state is None: if tmin != tmax: initial_state = b[border].mean() * np.ones(n) else: initial_state = np.zeros(n) initial_state[border] = b[border] if self.n_iter > 0: scores = initial_state for i in range(self.n_iter): scores = diffusion_matrix.dot(scores) scores[border] = b[border] else: a = sparse.eye(n, format='csr', dtype=float) - diffusion_matrix scores, info = bicgstab(a, b, atol=0., x0=initial_state) self._scipy_solver_info(info) if tmin != tmax: self.scores_ = np.clip(scores, tmin, tmax) else: self.scores_ = scores return self
def __init__(self, adjacency: Union[sparse.csr_matrix, np.ndarray], normalized: bool = True): adjacency = check_format(adjacency).astype(float) n = adjacency.shape[0] super(CoNeighborsOperator, self).__init__(dtype=float, shape=(n, n)) if normalized: self.forward = normalize(adjacency.T).tocsr() else: self.forward = adjacency.T self.backward = adjacency
def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray]) -> 'KMeans': """Apply embedding method followed by K-means. Parameters ---------- input_matrix : Adjacency matrix or biadjacency matrix of the graph. Returns ------- self: :class:`KMeans` """ self._init_vars() # input check_format(input_matrix) if self.co_cluster: check_n_clusters(self.n_clusters, np.sum(input_matrix.shape)) else: check_n_clusters(self.n_clusters, input_matrix.shape[0]) # embedding embedding, self.bipartite = get_embedding(input_matrix, self.embedding_method, self.co_cluster) # clustering kmeans = KMeansDense(self.n_clusters) kmeans.fit(embedding) # sort if self.sort_clusters: labels = reindex_labels(kmeans.labels_) else: labels = kmeans.labels_ # output self.labels_ = labels if self.co_cluster: self._split_vars(input_matrix.shape) self._secondary_outputs(input_matrix) return self
def __init__(self, adjacency: Union[sparse.csr_matrix, np.ndarray], coeffs: np.ndarray): if coeffs.shape[0] == 0: raise ValueError('A polynome requires at least one coefficient.') adjacency = check_format(adjacency) check_square(adjacency) shape = adjacency.shape dtype = adjacency.dtype super(Polynome, self).__init__(dtype=dtype, shape=shape) self.adjacency = adjacency self.coeffs = coeffs
def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray], seeds: Optional[Union[dict, np.ndarray]] = None, init: Optional[float] = None) -> 'Dirichlet': """Compute the solution to the Dirichlet problem (temperatures at equilibrium). Parameters ---------- adjacency : Adjacency matrix of the graph. seeds : Temperatures of seed nodes (dictionary or vector). Negative temperatures ignored. init : Temperature of non-seed nodes in initial state. If ``None``, use the average temperature of seed nodes (default). Returns ------- self: :class:`Dirichlet` """ adjacency = check_format(adjacency) check_square(adjacency) n: int = adjacency.shape[0] if seeds is None: self.scores_ = np.ones(n) / n return self seeds = check_seeds(seeds, n) border = (seeds >= 0) if init is None: scores = seeds[border].mean() * np.ones(n) else: scores = init * np.ones(n) scores[border] = seeds[border] if self.n_iter > 0: diffusion = DirichletOperator(adjacency, self.damping_factor, border) for i in range(self.n_iter): scores = diffusion.dot(scores) scores[border] = seeds[border] else: a = DeltaDirichletOperator(adjacency, self.damping_factor, border) b = -seeds b[~border] = 0 scores, info = bicgstab(a, b, atol=0., x0=scores) self._scipy_solver_info(info) tmin, tmax = seeds[border].min(), seeds[border].max() self.scores_ = np.clip(scores, tmin, tmax) return self
def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray], seeds: Union[np.ndarray, dict]) \ -> 'Propagation': """Node classification by label propagation. Parameters ---------- adjacency : Adjacency or biadjacency matrix of the graph. seeds : Seed nodes. Can be a dict {node: label} or an array where "-1" means no label. Returns ------- self: :class:`Propagation` """ adjacency = check_format(adjacency) n = adjacency.shape[0] index_seed, index_remain, labels_seed = self._instanciate_vars( adjacency, seeds) labels = -np.ones(n, dtype=int) labels[index_seed] = labels_seed labels_remain = np.zeros_like(index_remain, dtype=int) t = 0 while t < self.n_iter and not np.array_equal(labels_remain, labels[index_remain]): t += 1 labels_remain = labels[index_remain].copy() for i in index_remain: labels_ = labels[ adjacency.indices[adjacency.indptr[i]:adjacency.indptr[i + 1]]] labels_ = labels_[labels_ >= 0] if len(labels_): labels_unique, counts = np.unique(labels_, return_counts=True) labels[i] = labels_unique[np.argmax(counts)] membership = membership_matrix(labels) membership = normalize(adjacency.dot(membership)) self.labels_ = labels self.membership_ = membership return self
def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]): """Fit algorithm to the data. Parameters ---------- adjacency : Adjacency matrix of the graph Returns ------- self : :class:`FirstOrder` """ adjacency = check_format(adjacency) adjacency.sort_indices() self.indptr_ = adjacency.indptr.astype(np.int32) self.indices_ = adjacency.indices.astype(np.int32) return self
def fit(self, biadjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'BiRandomProjection': """Compute the embedding. Parameters ---------- biadjacency: Biadjacency matrix of the graph. Returns ------- self: :class:`BiRandomProjection` """ biadjacency = check_format(biadjacency) n_row, _ = biadjacency.shape RandomProjection.fit(self, bipartite2undirected(biadjacency)) self._split_vars(n_row) return self
def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'HITS': """Compute HITS algorithm with a spectral method. Parameters ---------- adjacency : Adjacency or biadjacency matrix of the graph. Returns ------- self: :class:`HITS` """ adjacency = check_format(adjacency) if self.solver == 'auto': solver = auto_solver(adjacency.nnz) if solver == 'lanczos': self.solver: SVDSolver = LanczosSVD() else: self.solver: SVDSolver = HalkoSVD() self.solver.fit(adjacency, 1) hubs: np.ndarray = self.solver.singular_vectors_left_.reshape(-1) authorities: np.ndarray = self.solver.singular_vectors_right_.reshape( -1) h_pos, h_neg = (hubs > 0).sum(), (hubs < 0).sum() a_pos, a_neg = (authorities > 0).sum(), (authorities < 0).sum() if h_pos > h_neg: hubs = np.clip(hubs, a_min=0., a_max=None) else: hubs = np.clip(-hubs, a_min=0., a_max=None) if a_pos > a_neg: authorities = np.clip(authorities, a_min=0., a_max=None) else: authorities = np.clip(-authorities, a_min=0., a_max=None) self.scores_row_ = hubs self.scores_col_ = authorities self.scores_ = hubs return self
def fit(self, biadjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'BiKatz': """Katz centrality. Parameters ---------- biadjacency : Biadjacency matrix of the graph. Returns ------- self: :class:`BiKatz` """ biadjacency = check_format(biadjacency) n_row, n_col = biadjacency.shape adjacency = bipartite2undirected(biadjacency) Katz.fit(self, adjacency) self._split_vars(n_row) return self