def fit(self, biadjacency: sparse.csr_matrix): """Embedding of bipartite graphs from the clustering obtained with Louvain. Parameters ---------- biadjacency: Biadjacency matrix of the graph. Returns ------- self: :class:`BiLouvainEmbedding` """ bilouvain = BiLouvain(resolution=self.resolution, modularity=self.modularity, tol_optimization=self.tol_optimization, tol_aggregation=self.tol_aggregation, n_aggregations=self.n_aggregations, shuffle_nodes=self.shuffle_nodes, sort_clusters=False, return_membership=True, return_aggregate=True, random_state=self.random_state) bilouvain.fit(biadjacency) self.labels_ = bilouvain.labels_ embedding_row = bilouvain.membership_row_ embedding_col = bilouvain.membership_col_ if self.isolated_nodes in ['remove', 'merge']: # remove or merge isolated column nodes and reindex labels labels_unique, counts = np.unique(bilouvain.labels_col_, return_counts=True) n_labels = max(labels_unique) + 1 labels_old = labels_unique[counts > 1] if self.isolated_nodes == 'remove': labels_new = -np.ones(n_labels, dtype='int') else: labels_new = len(labels_old) * np.ones(n_labels, dtype='int') labels_new[labels_old] = np.arange(len(labels_old)) labels_col = labels_new[bilouvain.labels_col_] # reindex row labels accordingly labels_unique = np.unique(bilouvain.labels_row_) n_labels = max(labels_unique) + 1 labels_new = -np.ones(n_labels, dtype='int') labels_new[labels_old] = np.arange(len(labels_old)) labels_row = labels_new[bilouvain.labels_row_] # get embeddings probs = normalize(biadjacency) embedding_row = probs.dot(membership_matrix(labels_col)) probs = normalize(biadjacency.T) embedding_col = probs.dot(membership_matrix(labels_row)) self.embedding_row_ = embedding_row.toarray() self.embedding_col_ = embedding_col.toarray() self.embedding_ = self.embedding_row_ return self
def fit(self, input_matrix: sparse.csr_matrix, force_bipartite: bool = False): """Embedding of graphs from the clustering obtained with Louvain. Parameters ---------- input_matrix : Adjacency matrix or biadjacency matrix of the graph. force_bipartite : bool (default = ``False``) If ``True``, force the input matrix to be considered as a biadjacency matrix. Returns ------- self: :class:`BiLouvainEmbedding` """ louvain = Louvain(resolution=self.resolution, modularity=self.modularity, tol_optimization=self.tol_optimization, tol_aggregation=self.tol_aggregation, n_aggregations=self.n_aggregations, shuffle_nodes=self.shuffle_nodes, sort_clusters=False, return_membership=True, return_aggregate=True, random_state=self.random_state) louvain.fit(input_matrix, force_bipartite=force_bipartite) # isolated nodes if is_square(input_matrix): labels = louvain.labels_ labels_secondary = None else: labels = louvain.labels_col_ labels_secondary = louvain.labels_row_ self.labels_, labels_row = reindex_labels(labels, labels_secondary, self.isolated_nodes) # embedding probs = normalize(input_matrix) embedding_ = probs.dot(membership_matrix(self.labels_)) self.embedding_ = embedding_.toarray() if labels_row is not None: probs = normalize(input_matrix.T) embedding_col = probs.dot(membership_matrix(labels_row)) self.embedding_row_ = self.embedding_ self.embedding_col_ = embedding_col.toarray() return self
def _secondary_outputs(self, biadjacency): """Compute different variables from labels_.""" if self.return_membership: membership_row = membership_matrix(self.labels_row_) membership_col = membership_matrix(self.labels_col_) self.membership_row_ = normalize(biadjacency.dot(membership_col)) self.membership_col_ = normalize(biadjacency.T.dot(membership_row)) if self.return_aggregate: membership_row = membership_matrix(self.labels_row_) membership_col = membership_matrix(self.labels_col_) biadjacency_ = sparse.csr_matrix(membership_row.T.dot(biadjacency)) biadjacency_ = biadjacency_.dot(membership_col) self.biadjacency_ = biadjacency_ return self
def predict(self, adjacency_vectors: Union[sparse.csr_matrix, np.ndarray]) -> np.ndarray: """Predict the embedding of new rows, defined by their adjacency vectors. Parameters ---------- adjacency_vectors : Adjacency row vectors. Array of shape (n_col,) (single vector) or (n_vectors, n_col) Returns ------- embedding_vectors : np.ndarray Embedding of the nodes. """ self._check_fitted() if self.embedding_col_ is not None: n = len(self.embedding_col_) else: n = len(self.embedding_) adjacency_vectors = check_adjacency_vector(adjacency_vectors, n) check_nonnegative(adjacency_vectors) membership = membership_matrix(self.labels_) return normalize(adjacency_vectors).dot(membership)
def predict( self, adjacency_vectors: Union[sparse.csr_matrix, np.ndarray]) -> np.ndarray: """Predict the embedding of new nodes, defined by their adjacency vectors. Parameters ---------- adjacency_vectors : Adjacency vectors of nodes. Array of shape (n_col,) (single vector) or (n_vectors, n_col) Returns ------- embedding_vectors : np.ndarray Embedding of the nodes. """ self._check_fitted() n = self.embedding_.shape[0] adjacency_vectors = check_adjacency_vector(adjacency_vectors, n) check_nonnegative(adjacency_vectors) membership = membership_matrix(self.labels_) return adjacency_vectors.dot(membership)
def _secondary_outputs(self, adjacency): """Compute different variables from labels_.""" if self.return_membership or self.return_aggregate: membership = membership_matrix(self.labels_) if self.return_membership: self.membership_ = normalize(adjacency.dot(membership)) if self.return_aggregate: self.adjacency_ = sparse.csr_matrix( membership.T.dot(adjacency.dot(membership))) return self
def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray], seeds: Union[np.ndarray, dict] = None) \ -> 'Propagation': """Node classification by label propagation. Parameters ---------- adjacency : Adjacency matrix of the graph. seeds : Seed nodes. Can be a dict {node: label} or an array where "-1" means no label. Returns ------- self: :class:`Propagation` """ adjacency = check_format(adjacency) n = adjacency.shape[0] index_seed, index_remain, labels_seed = self._instanciate_vars( adjacency, seeds) if self.node_order == 'random': np.random.shuffle(index_remain) elif self.node_order == 'decreasing': index = np.argsort(-adjacency.T.dot(np.ones(n))).astype(np.int32) index_remain = index[index_remain] elif self.node_order == 'increasing': index = np.argsort(adjacency.T.dot(np.ones(n))).astype(np.int32) index_remain = index[index_remain] labels = -np.ones(n, dtype=np.int32) labels[index_seed] = labels_seed labels_remain = np.zeros_like(index_remain, dtype=np.int32) indptr = adjacency.indptr.astype(np.int32) indices = adjacency.indices.astype(np.int32) if self.weighted: data = adjacency.data.astype(np.float32) else: data = np.ones(n, dtype=np.float32) t = 0 while t < self.n_iter and not np.array_equal(labels_remain, labels[index_remain]): t += 1 labels_remain = labels[index_remain].copy() labels = np.asarray( vote_update(indptr, indices, data, labels, index_remain)) membership = membership_matrix(labels) membership = normalize(adjacency.dot(membership)) self.labels_ = labels self.membership_ = membership return self
def _secondary_outputs(self, adjacency): """Compute different variables from labels_.""" if self.return_membership or self.return_aggregate: if np.issubdtype(adjacency.data.dtype, np.bool_): adjacency = adjacency.astype(float) membership = membership_matrix(self.labels_) if self.return_membership: self.membership_ = normalize(adjacency.dot(membership)) if self.return_aggregate: self.adjacency_ = sparse.csr_matrix(membership.T.dot(adjacency.dot(membership))) return self
def _secondary_outputs(self, input_matrix: sparse.csr_matrix): """Compute different variables from labels_.""" if self.return_membership or self.return_aggregate: if np.issubdtype(input_matrix.data.dtype, np.bool_): input_matrix = input_matrix.astype(float) if not self.bipartite: membership = membership_matrix(self.labels_) if self.return_membership: self.membership_ = normalize(input_matrix.dot(membership)) if self.return_aggregate: self.aggregate_ = sparse.csr_matrix( membership.T.dot(input_matrix.dot(membership))) else: if self.labels_col_ is None: n_labels = max(self.labels_) + 1 membership_row = membership_matrix(self.labels_, n_labels=n_labels) membership_col = normalize( input_matrix.T.dot(membership_row)) else: n_labels = max(max(self.labels_row_), max( self.labels_col_)) + 1 membership_row = membership_matrix(self.labels_row_, n_labels=n_labels) membership_col = membership_matrix(self.labels_col_, n_labels=n_labels) if self.return_membership: self.membership_row_ = normalize( input_matrix.dot(membership_col)) self.membership_col_ = normalize( input_matrix.T.dot(membership_row)) self.membership_ = self.membership_row_ if self.return_aggregate: aggregate_ = sparse.csr_matrix( membership_row.T.dot(input_matrix)) aggregate_ = aggregate_.dot(membership_col) self.aggregate_ = aggregate_ return self
def _secondary_outputs(self, biadjacency): """Compute different variables from labels_.""" if self.return_membership or self.return_aggregate: if np.issubdtype(biadjacency.data.dtype, np.bool_): biadjacency = biadjacency.astype(float) if self.return_membership: n_labels = max(max(self.labels_row_), max(self.labels_col_)) + 1 membership_row = membership_matrix(self.labels_row_, n_labels=n_labels) membership_col = membership_matrix(self.labels_col_, n_labels=n_labels) self.membership_row_ = normalize(biadjacency.dot(membership_col)) self.membership_col_ = normalize(biadjacency.T.dot(membership_row)) self.membership_ = self.membership_row_ if self.return_aggregate: n_labels = max(max(self.labels_row_), max(self.labels_col_)) + 1 membership_row = membership_matrix(self.labels_row_, n_labels=n_labels) membership_col = membership_matrix(self.labels_col_, n_labels=n_labels) biadjacency_ = sparse.csr_matrix(membership_row.T.dot(biadjacency)) biadjacency_ = biadjacency_.dot(membership_col) self.biadjacency_ = biadjacency_ return self
def fit(self, adjacency: sparse.csr_matrix): """Embedding of bipartite graphs from a clustering obtained with Louvain. Parameters ---------- adjacency: Adjacency matrix of the graph. Returns ------- self: :class:`BiLouvainEmbedding` """ louvain = Louvain(resolution=self.resolution, modularity=self.modularity, tol_optimization=self.tol_optimization, tol_aggregation=self.tol_aggregation, n_aggregations=self.n_aggregations, shuffle_nodes=self.shuffle_nodes, sort_clusters=True, return_membership=True, return_aggregate=True, random_state=self.random_state) louvain.fit(adjacency) self.labels_ = louvain.labels_ embedding_ = louvain.membership_ if self.isolated_nodes in ['remove', 'merge']: # remove or merge isolated nodes and reindex labels labels_unique, counts = np.unique(louvain.labels_, return_counts=True) n_labels = max(labels_unique) + 1 labels_old = labels_unique[counts > 1] if self.isolated_nodes == 'remove': labels_new = -np.ones(n_labels, dtype='int') else: labels_new = len(labels_old) * np.ones(n_labels, dtype='int') labels_new[labels_old] = np.arange(len(labels_old)) labels_ = labels_new[louvain.labels_] # get embeddings probs = normalize(adjacency) embedding_ = probs.dot(membership_matrix(labels_)) self.embedding_ = embedding_.toarray() return self
def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray], seeds: Union[np.ndarray, dict]) \ -> 'Propagation': """Node classification by label propagation. Parameters ---------- adjacency : Adjacency or biadjacency matrix of the graph. seeds : Seed nodes. Can be a dict {node: label} or an array where "-1" means no label. Returns ------- self: :class:`Propagation` """ adjacency = check_format(adjacency) n = adjacency.shape[0] index_seed, index_remain, labels_seed = self._instanciate_vars( adjacency, seeds) labels = -np.ones(n, dtype=int) labels[index_seed] = labels_seed labels_remain = np.zeros_like(index_remain, dtype=int) t = 0 while t < self.n_iter and not np.array_equal(labels_remain, labels[index_remain]): t += 1 labels_remain = labels[index_remain].copy() for i in index_remain: labels_ = labels[ adjacency.indices[adjacency.indptr[i]:adjacency.indptr[i + 1]]] labels_ = labels_[labels_ >= 0] if len(labels_): labels_unique, counts = np.unique(labels_, return_counts=True) labels[i] = labels_unique[np.argmax(counts)] membership = membership_matrix(labels) membership = normalize(adjacency.dot(membership)) self.labels_ = labels self.membership_ = membership return self
def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray], seeds: Union[np.ndarray, dict]) \ -> 'Propagation': """Node classification by label propagation. Parameters ---------- adjacency : Adjacency matrix of the graph. seeds : Seed nodes. Can be a dict {node: label} or an array where "-1" means no label. Returns ------- self: :class:`Propagation` """ adjacency = check_format(adjacency) n = adjacency.shape[0] index_seed, index_remain, labels_seed = self._instanciate_vars( adjacency, seeds) labels = -np.ones(n, dtype=np.int32) labels[index_seed] = labels_seed labels_remain = np.zeros_like(index_remain, dtype=np.int32) indptr = adjacency.indptr.astype(np.int32) indices = adjacency.indices.astype(np.int32) t = 0 while t < self.n_iter and not np.array_equal(labels_remain, labels[index_remain]): t += 1 labels_remain = labels[index_remain].copy() labels = vote_update(indptr, indices, labels, index_remain) membership = membership_matrix(labels) membership = normalize(adjacency.dot(membership)) self.labels_ = labels self.membership_ = membership return self
def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'Louvain': """Fit algorithm to the data. Parameters ---------- adjacency : Adjacency matrix of the graph. Returns ------- self: :class:`Louvain` """ adjacency = check_format(adjacency) check_square(adjacency) n_nodes = adjacency.shape[0] probs_out = check_probs('degree', adjacency) probs_in = check_probs('degree', adjacency.T) nodes = np.arange(n_nodes) if self.shuffle_nodes: nodes = self.random_state.permutation(nodes) adjacency = adjacency[nodes, :].tocsc()[:, nodes].tocsr() adjacency_norm = adjacency / adjacency.data.sum() membership = sparse.identity(n_nodes, format='csr') increase = True count_aggregations = 0 self.log.print("Starting with", n_nodes, "nodes.") while increase: count_aggregations += 1 current_labels, pass_increase = self._optimize( n_nodes, adjacency_norm, probs_out, probs_in) _, current_labels = np.unique(current_labels, return_inverse=True) if pass_increase <= self.tol_aggregation: increase = False else: membership_agg = membership_matrix(current_labels) membership = membership.dot(membership_agg) n_nodes, adjacency_norm, probs_out, probs_in = self._aggregate( adjacency_norm, probs_out, probs_in, membership_agg) if n_nodes == 1: break self.log.print("Aggregation", count_aggregations, "completed with", n_nodes, "clusters and ", pass_increase, "increment.") if count_aggregations == self.n_aggregations: break if self.sort_clusters: labels = reindex_labels(membership.indices) else: labels = membership.indices if self.shuffle_nodes: reverse = np.empty(nodes.size, nodes.dtype) reverse[nodes] = np.arange(nodes.size) labels = labels[reverse] self.labels_ = labels self._secondary_outputs(adjacency) return self
def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'Louvain': """Fit algorithm to the data. Parameters ---------- adjacency : Adjacency matrix of the graph. Returns ------- self: :class:`Louvain` """ adjacency = check_format(adjacency) check_square(adjacency) n = adjacency.shape[0] if self.modularity == 'potts': probs_ou = check_probs('uniform', adjacency) probs_in = probs_ou.copy() elif self.modularity == 'newman': probs_ou = check_probs('degree', adjacency) probs_in = probs_ou.copy() elif self.modularity == 'dugue': probs_ou = check_probs('degree', adjacency) probs_in = check_probs('degree', adjacency.T) else: raise ValueError('Unknown modularity function.') nodes = np.arange(n, dtype=np.int32) if self.shuffle_nodes: nodes = self.random_state.permutation(nodes) adjacency = adjacency[nodes, :].tocsc()[:, nodes].tocsr() adjacency_clust = adjacency / adjacency.data.sum() membership = sparse.identity(n, format='csr') increase = True count_aggregations = 0 self.log.print("Starting with", n, "nodes.") while increase: count_aggregations += 1 labels_clust, pass_increase = self._optimize( adjacency_clust, probs_ou, probs_in) _, labels_clust = np.unique(labels_clust, return_inverse=True) if pass_increase <= self.tol_aggregation: increase = False else: membership_clust = membership_matrix(labels_clust) membership = membership.dot(membership_clust) adjacency_clust, probs_ou, probs_in = self._aggregate( adjacency_clust, probs_ou, probs_in, membership_clust) n = adjacency_clust.shape[0] if n == 1: break self.log.print("Aggregation", count_aggregations, "completed with", n, "clusters and ", pass_increase, "increment.") if count_aggregations == self.n_aggregations: break if self.sort_clusters: labels = reindex_labels(membership.indices) else: labels = membership.indices if self.shuffle_nodes: reverse = np.empty(nodes.size, nodes.dtype) reverse[nodes] = np.arange(nodes.size) labels = labels[reverse] self.labels_ = labels self._secondary_outputs(adjacency) return self
def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], force_bipartite: bool = False) -> 'Louvain': """Fit algorithm to data. Parameters ---------- input_matrix : Adjacency matrix or biadjacency matrix of the graph. force_bipartite : If ``True``, force the input matrix to be considered as a biadjacency matrix even if square. Returns ------- self: :class:`Louvain` """ self._init_vars() if self.modularity == 'dugue': adjacency, self.bipartite = get_adjacency(input_matrix, force_directed=True, force_bipartite=force_bipartite) else: adjacency, self.bipartite = get_adjacency(input_matrix, force_bipartite=force_bipartite) n = adjacency.shape[0] if self.modularity == 'potts': probs_out = get_probs('uniform', adjacency) probs_in = probs_out.copy() elif self.modularity == 'newman': probs_out = get_probs('degree', adjacency) probs_in = probs_out.copy() elif self.modularity == 'dugue': probs_out = get_probs('degree', adjacency) probs_in = get_probs('degree', adjacency.T) else: raise ValueError('Unknown modularity function.') nodes = np.arange(n) if self.shuffle_nodes: nodes = self.random_state.permutation(nodes) adjacency = adjacency[nodes, :].tocsc()[:, nodes].tocsr() adjacency_cluster = adjacency / adjacency.data.sum() membership = sparse.identity(n, format='csr') increase = True count_aggregations = 0 self.log.print("Starting with", n, "nodes.") while increase: count_aggregations += 1 labels_cluster, pass_increase = self._optimize(adjacency_cluster, probs_out, probs_in) _, labels_cluster = np.unique(labels_cluster, return_inverse=True) if pass_increase <= self.tol_aggregation: increase = False else: membership_cluster = membership_matrix(labels_cluster) membership = membership.dot(membership_cluster) adjacency_cluster, probs_out, probs_in = self._aggregate(adjacency_cluster, probs_out, probs_in, membership_cluster) n = adjacency_cluster.shape[0] if n == 1: break self.log.print("Aggregation", count_aggregations, "completed with", n, "clusters and ", pass_increase, "increment.") if count_aggregations == self.n_aggregations: break if self.sort_clusters: labels = reindex_labels(membership.indices) else: labels = membership.indices if self.shuffle_nodes: reverse = np.empty(nodes.size, nodes.dtype) reverse[nodes] = np.arange(nodes.size) labels = labels[reverse] self.labels_ = labels if self.bipartite: self._split_vars(input_matrix.shape) self._secondary_outputs(input_matrix) return self
def fit(self, biadjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'BiKMeans': """Apply embedding method followed by clustering to the graph. Parameters ---------- biadjacency: Biadjacency matrix of the graph. Returns ------- self: :class:`BiKMeans` """ n_row, n_col = biadjacency.shape check_n_clusters(self.n_clusters, n_row) method = self.embedding_method method.fit(biadjacency) if self.co_cluster: embedding = np.vstack( (method.embedding_row_, method.embedding_col_)) else: embedding = method.embedding_ kmeans = KMeansDense(self.n_clusters) kmeans.fit(embedding) if self.sort_clusters: labels = reindex_labels(kmeans.labels_) else: labels = kmeans.labels_ self.labels_ = labels if self.co_cluster: self._split_vars(n_row) else: self.labels_row_ = labels if self.return_membership: membership_row = membership_matrix(self.labels_row_, n_labels=self.n_clusters) if self.labels_col_ is not None: membership_col = membership_matrix(self.labels_col_, n_labels=self.n_clusters) self.membership_row_ = normalize( biadjacency.dot(membership_col)) self.membership_col_ = normalize( biadjacency.T.dot(membership_row)) else: self.membership_row_ = normalize( biadjacency.dot(biadjacency.T.dot(membership_row))) self.membership_ = self.membership_row_ if self.return_aggregate: membership_row = membership_matrix(self.labels_row_, n_labels=self.n_clusters) biadjacency_ = sparse.csr_matrix(membership_row.T.dot(biadjacency)) if self.labels_col_ is not None: membership_col = membership_matrix(self.labels_col_, n_labels=self.n_clusters) biadjacency_ = biadjacency_.dot(membership_col) self.biadjacency_ = biadjacency_ return self
def fit(self, biadjacency: sparse.csr_matrix): """Embedding of bipartite graphs from a clustering obtained with Louvain. Parameters ---------- biadjacency: Biadjacency matrix of the graph. Returns ------- self: :class:`BiLouvainEmbedding` """ bilouvain = BiLouvain(resolution=self.resolution, modularity=self.modularity, tol_optimization=self.tol_optimization, tol_aggregation=self.tol_aggregation, n_aggregations=self.n_aggregations, shuffle_nodes=self.shuffle_nodes, sort_clusters=True, return_membership=True, return_aggregate=True, random_state=self.random_state) bilouvain.fit(biadjacency) self.labels_ = bilouvain.labels_ embedding_row = bilouvain.membership_row_ embedding_col = bilouvain.membership_col_ if self.merge_isolated: _, counts_row = np.unique(bilouvain.labels_row_, return_counts=True) n_isolated_nodes_row = (counts_row == 1).sum() if n_isolated_nodes_row: size_row = (biadjacency.shape[0], len(counts_row)) embedding_row.resize(size_row) labels_row = bilouvain.labels_row_ labels_row[-n_isolated_nodes_row:] = labels_row[ -n_isolated_nodes_row] merge_labels_row = np.arange(len(counts_row), dtype=int) merge_labels_row[-n_isolated_nodes_row:] = merge_labels_row[ -n_isolated_nodes_row] combiner_row = membership_matrix(merge_labels_row) embedding_row = embedding_row.dot(combiner_row) self.labels_ = labels_row _, counts_col = np.unique(bilouvain.labels_col_, return_counts=True) n_isolated_nodes_col = (counts_col == 1).sum() if n_isolated_nodes_col: size_col = (biadjacency.shape[1], len(counts_col)) embedding_col.resize(size_col) merge_labels_col = np.arange(embedding_col.shape[1], dtype=int) merge_labels_col[-n_isolated_nodes_col:] = merge_labels_col[ -n_isolated_nodes_col] combiner_col = membership_matrix(merge_labels_col) embedding_col = embedding_col.dot(combiner_col) self.embedding_row_ = embedding_row.toarray() self.embedding_col_ = embedding_col.toarray() self.embedding_ = self.embedding_row_ return self
def modularity(adjacency: Union[sparse.csr_matrix, np.ndarray], labels: np.ndarray, weights: Union[str, np.ndarray] = 'degree', weights_in: Union[str, np.ndarray] = 'degree', resolution: float = 1, return_all: bool = False) -> Union[float, Tuple[float, float, float]]: """Modularity of a clustering (node partition). * Graphs * Digraphs The modularity of a clustering is :math:`Q = \\sum_{i,j}\\left(\\dfrac{A_{ij}}{w} - \\gamma \\dfrac{w_iw_j}{w^2}\\right)\\delta_{c_i,c_j}` for graphs, :math:`Q = \\sum_{i,j}\\left(\\dfrac{A_{ij}}{w} - \\gamma \\dfrac{w^+_iw^-_j}{w^2}\\right)\\delta_{c_i,c_j}` for digraphs, where * :math:`c_i` is the cluster of node :math:`i`,\n * :math:`w_i` is the weight of node :math:`i`,\n * :math:`w^+_i, w^-_i` are the out-weight, in-weight of node :math:`i` (for digraphs),\n * :math:`w = 1^TA1` is the total weight,\n * :math:`\\delta` is the Kronecker symbol,\n * :math:`\\gamma \\ge 0` is the resolution parameter. Parameters ---------- adjacency: Adjacency matrix of the graph. labels: Labels of nodes, vector of size :math:`n` . weights : Weights of nodes. ``'degree'`` (default), ``'uniform'`` or custom weights. weights_in : In-weights of nodes. ``None`` (default), ``'degree'``, ``'uniform'`` or custom weights. If ``None``, taken equal to weights. resolution: Resolution parameter (default = 1). return_all: If ``True``, return modularity, fit, diversity. Returns ------- modularity : float fit: float, optional diversity: float, optional Example ------- >>> from sknetwork.clustering import modularity >>> from sknetwork.data import house >>> adjacency = house() >>> labels = np.array([0, 0, 1, 1, 0]) >>> np.round(modularity(adjacency, labels), 2) 0.11 """ adjacency = check_format(adjacency).astype(float) check_square(adjacency) if len(labels) != adjacency.shape[0]: raise ValueError('Dimension mismatch between labels and adjacency matrix.') probs_row = check_probs(weights, adjacency) probs_col = check_probs(weights_in, adjacency.T) membership = membership_matrix(labels) fit: float = membership.multiply(adjacency.dot(membership)).data.sum() / adjacency.data.sum() div: float = membership.T.dot(probs_col).dot(membership.T.dot(probs_row)) mod: float = fit - resolution * div if return_all: return mod, fit, div else: return mod
def comodularity(adjacency: Union[sparse.csr_matrix, np.ndarray], labels: np.ndarray, resolution: float = 1, return_all: bool = False) -> Union[float, Tuple[float, float, float]]: """Modularity of a clustering in the normalized co-neighborhood graph. * Graphs * Digraphs * Bigraphs Quality metric of a clustering given by: :math:`Q = \\sum_{i,j}\\left(\\dfrac{(AD_2^{-1}A^T)_{ij}}{w} - \\gamma \\dfrac{d_id_j}{w^2}\\right) \\delta_{c_i,c_j}` where * :math:`c_i` is the cluster of node `i`,\n * :math:`\\delta` is the Kronecker symbol,\n * :math:`\\gamma \\ge 0` is the resolution parameter. Parameters ---------- adjacency : Adjacency matrix of the graph. labels : Labels of the nodes. resolution : Resolution parameter (default = 1). return_all : If ``True``, return modularity, fit, diversity. Returns ------- modularity : float fit : float, optional diversity: float, optional Example ------- >>> from sknetwork.clustering import comodularity >>> from sknetwork.data import house >>> adjacency = house() >>> labels = np.array([0, 0, 1, 1, 0]) >>> np.round(comodularity(adjacency, labels), 2) 0.06 Notes ----- Does not require the computation of the adjacency matrix of the normalized co-neighborhood graph. """ adjacency = check_format(adjacency).astype(float) n_row, n_col = adjacency.shape total_weight = adjacency.data.sum() probs = adjacency.dot(np.ones(n_col)) / total_weight weights_col = adjacency.T.dot(np.ones(n_col)) diag_col = diag_pinv(np.sqrt(weights_col)) normalized_adjacency = (adjacency.dot(diag_col)).T.tocsr() if len(labels) != n_row: raise ValueError('The number of labels must match the number of rows.') membership = membership_matrix(labels) fit: float = ((normalized_adjacency.dot(membership)).data ** 2).sum() / total_weight div: float = np.linalg.norm(membership.T.dot(probs)) ** 2 mod: float = fit - resolution * div if return_all: return mod, fit, div else: return mod