def _instanciate_vars(adjacency: sparse.csr_matrix, weights: str = 'uniform'): """Initialize standard variables for metrics.""" n = adjacency.shape[0] weights_row = check_probs(weights, adjacency) weights_col = check_probs(weights, adjacency.T) sym_adjacency = directed2undirected(adjacency) aggregate_graph = AggregateGraph(weights_row, weights_col, sym_adjacency.data.astype(np.float), sym_adjacency.indices, sym_adjacency.indptr) height = np.zeros(n - 1) cluster_weight = np.zeros(n - 1) edge_sampling = np.zeros(n - 1) return aggregate_graph, height, cluster_weight, edge_sampling, weights_row, weights_col
def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'Louvain': """Fit algorithm to the data. Parameters ---------- adjacency : Adjacency matrix of the graph. Returns ------- self: :class:`Louvain` """ adjacency = check_format(adjacency) check_square(adjacency) n_nodes = adjacency.shape[0] probs_out = check_probs('degree', adjacency) probs_in = check_probs('degree', adjacency.T) nodes = np.arange(n_nodes) if self.shuffle_nodes: nodes = self.random_state.permutation(nodes) adjacency = adjacency[nodes, :].tocsc()[:, nodes].tocsr() adjacency_norm = adjacency / adjacency.data.sum() membership = sparse.identity(n_nodes, format='csr') increase = True count_aggregations = 0 self.log.print("Starting with", n_nodes, "nodes.") while increase: count_aggregations += 1 current_labels, pass_increase = self._optimize( n_nodes, adjacency_norm, probs_out, probs_in) _, current_labels = np.unique(current_labels, return_inverse=True) if pass_increase <= self.tol_aggregation: increase = False else: membership_agg = membership_matrix(current_labels) membership = membership.dot(membership_agg) n_nodes, adjacency_norm, probs_out, probs_in = self._aggregate( adjacency_norm, probs_out, probs_in, membership_agg) if n_nodes == 1: break self.log.print("Aggregation", count_aggregations, "completed with", n_nodes, "clusters and ", pass_increase, "increment.") if count_aggregations == self.n_aggregations: break if self.sort_clusters: labels = reindex_labels(membership.indices) else: labels = membership.indices if self.shuffle_nodes: reverse = np.empty(nodes.size, nodes.dtype) reverse[nodes] = np.arange(nodes.size) labels = labels[reverse] self.labels_ = labels self._secondary_outputs(adjacency) return self
def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'Louvain': """Fit algorithm to the data. Parameters ---------- adjacency : Adjacency matrix of the graph. Returns ------- self: :class:`Louvain` """ adjacency = check_format(adjacency) check_square(adjacency) n = adjacency.shape[0] if self.modularity == 'potts': probs_ou = check_probs('uniform', adjacency) probs_in = probs_ou.copy() elif self.modularity == 'newman': probs_ou = check_probs('degree', adjacency) probs_in = probs_ou.copy() elif self.modularity == 'dugue': probs_ou = check_probs('degree', adjacency) probs_in = check_probs('degree', adjacency.T) else: raise ValueError('Unknown modularity function.') nodes = np.arange(n, dtype=np.int32) if self.shuffle_nodes: nodes = self.random_state.permutation(nodes) adjacency = adjacency[nodes, :].tocsc()[:, nodes].tocsr() adjacency_clust = adjacency / adjacency.data.sum() membership = sparse.identity(n, format='csr') increase = True count_aggregations = 0 self.log.print("Starting with", n, "nodes.") while increase: count_aggregations += 1 labels_clust, pass_increase = self._optimize( adjacency_clust, probs_ou, probs_in) _, labels_clust = np.unique(labels_clust, return_inverse=True) if pass_increase <= self.tol_aggregation: increase = False else: membership_clust = membership_matrix(labels_clust) membership = membership.dot(membership_clust) adjacency_clust, probs_ou, probs_in = self._aggregate( adjacency_clust, probs_ou, probs_in, membership_clust) n = adjacency_clust.shape[0] if n == 1: break self.log.print("Aggregation", count_aggregations, "completed with", n, "clusters and ", pass_increase, "increment.") if count_aggregations == self.n_aggregations: break if self.sort_clusters: labels = reindex_labels(membership.indices) else: labels = membership.indices if self.shuffle_nodes: reverse = np.empty(nodes.size, nodes.dtype) reverse[nodes] = np.arange(nodes.size) labels = labels[reverse] self.labels_ = labels self._secondary_outputs(adjacency) return self
def cosine_modularity(adjacency, embedding: np.ndarray, embedding_col=None, resolution=1., weights='degree', return_all: bool = False): """Quality metric of an embedding :math:`x` defined by: :math:`Q = \\sum_{ij}\\left(\\dfrac{A_{ij}}{w} - \\gamma \\dfrac{w^+_iw^-_j}{w^2}\\right) \\left(\\dfrac{1 + \\cos(x_i, x_j)}{2}\\right)` where * :math:`w^+_i, w^-_i` are the out-weight, in-weight of node :math:`i` (for digraphs),\n * :math:`w = 1^TA1` is the total weight of the graph. For bipartite graphs with column embedding :math:`y`, the metric is :math:`Q = \\sum_{ij}\\left(\\dfrac{B_{ij}}{w} - \\gamma \\dfrac{w_{1,i}w_{2,j}}{w^2}\\right) \\left(\\dfrac{1 + \\cos(x_i, y_j)}{2}\\right)` where * :math:`w_{1,i}, w_{2,j}` are the weights of nodes :math:`i` (row) and :math:`j` (column),\n * :math:`w = 1^TB1` is the total weight of the graph. Parameters ---------- adjacency : Adjacency matrix of the graph. embedding : Embedding of the nodes. embedding_col : Embedding of the columns (for bipartite graphs). resolution : Resolution parameter. weights : ``'degree'`` or ``'uniform'`` Weights of the nodes. return_all : If ``True``, also return fit and diversity Returns ------- modularity : float fit: float, optional diversity: float, optional Example ------- >>> from sknetwork.embedding import cosine_modularity >>> from sknetwork.data import karate_club >>> graph = karate_club(metadata=True) >>> adjacency = graph.adjacency >>> embedding = graph.position >>> np.round(cosine_modularity(adjacency, embedding), 2) 0.35 """ adjacency = check_format(adjacency) total_weight: float = adjacency.data.sum() if embedding_col is None: check_square(adjacency) embedding_col = embedding.copy() embedding_row_norm = normalize(embedding, p=2) embedding_col_norm = normalize(embedding_col, p=2) probs_row = check_probs(weights, adjacency) probs_col = check_probs(weights, adjacency.T) if isinstance(embedding_row_norm, sparse.csr_matrix) and isinstance(embedding_col_norm, sparse.csr_matrix): fit: float = 0.5 * (1 + (embedding_row_norm.multiply(adjacency.dot(embedding_col_norm))).sum() / total_weight) else: fit: float = 0.5 * ( 1 + (np.multiply(embedding_row_norm, adjacency.dot(embedding_col_norm))).sum() / total_weight) div: float = 0.5 * (1 + (embedding.T.dot(probs_row)).dot(embedding_col.T.dot(probs_col))) if return_all: return fit, div, fit - resolution * div else: return fit - resolution * div
def bimodularity(biadjacency: Union[sparse.csr_matrix, np.ndarray], labels: np.ndarray, labels_col: np.ndarray, weights: Union[str, np.ndarray] = 'degree', weights_col: Union[str, np.ndarray] = 'degree', resolution: float = 1, return_all: bool = False) -> Union[float, Tuple[float, float, float]]: """Bimodularity of a clustering (node partition). * Bigraphs The bimodularity of a clustering is :math:`Q = \\sum_{i}\\sum_{j}\\left(\\dfrac{B_{ij}}{w} - \\gamma \\dfrac{w_{1,i}w_{2,j}}{w^2}\\right) \\delta_{c_{1,i},c_{2,j}}` where * :math:`c_{1,i}, c_{2,j}` are the clusters of nodes :math:`i` (row) and :math:`j` (column),\n * :math:`w_{1,i}, w_{2,j}` are the weights of nodes :math:`i` (row) and :math:`j` (column),\n * :math:`w = 1^TB1` is the total weight,\n * :math:`\\delta` is the Kronecker symbol,\n * :math:`\\gamma \\ge 0` is the resolution parameter. Parameters ---------- biadjacency : Biadjacency matrix of the graph (shape :math:`n_1 \\times n_2`). labels : Labels of rows, vector of size :math:`n_1`. labels_col: Labels of columns, vector of size :math:`n_2`. weights : Weights of nodes. ``'degree'`` (default), ``'uniform'`` or custom weights. weights_col : Weights of columns. ``'degree'`` (default), ``'uniform'`` or custom weights. resolution: Resolution parameter (default = 1). return_all: If ``True``, return modularity, fit, diversity. Returns ------- modularity : float fit: float, optional diversity: float, optional Example ------- >>> from sknetwork.clustering import bimodularity >>> from sknetwork.data import star_wars >>> biadjacency = star_wars() >>> labels = np.array([1, 1, 0, 0]) >>> labels_col = np.array([1, 0, 0]) >>> np.round(bimodularity(biadjacency, labels, labels_col), 2) 0.22 """ biadjacency = check_format(biadjacency).astype(float) n_row, n_col = biadjacency.shape if len(labels) != n_row: raise ValueError('Dimension mismatch between labels and biadjacency matrix.') if len(labels_col) != n_col: raise ValueError('Dimension mismatch between labels_col and biadjacency matrix.') adjacency = bipartite2directed(biadjacency) weights_ = check_probs(weights, biadjacency) weights_ = np.hstack((weights_, np.zeros(n_col))) weights_col_ = check_probs(weights_col, biadjacency.T) weights_col_ = np.hstack((np.zeros(n_row), weights_col_)) labels_ = np.hstack((labels, labels_col)) return modularity(adjacency, labels_, weights_, weights_col_, resolution, return_all)
def modularity(adjacency: Union[sparse.csr_matrix, np.ndarray], labels: np.ndarray, weights: Union[str, np.ndarray] = 'degree', weights_in: Union[str, np.ndarray] = 'degree', resolution: float = 1, return_all: bool = False) -> Union[float, Tuple[float, float, float]]: """Modularity of a clustering (node partition). * Graphs * Digraphs The modularity of a clustering is :math:`Q = \\sum_{i,j}\\left(\\dfrac{A_{ij}}{w} - \\gamma \\dfrac{w_iw_j}{w^2}\\right)\\delta_{c_i,c_j}` for graphs, :math:`Q = \\sum_{i,j}\\left(\\dfrac{A_{ij}}{w} - \\gamma \\dfrac{w^+_iw^-_j}{w^2}\\right)\\delta_{c_i,c_j}` for digraphs, where * :math:`c_i` is the cluster of node :math:`i`,\n * :math:`w_i` is the weight of node :math:`i`,\n * :math:`w^+_i, w^-_i` are the out-weight, in-weight of node :math:`i` (for digraphs),\n * :math:`w = 1^TA1` is the total weight,\n * :math:`\\delta` is the Kronecker symbol,\n * :math:`\\gamma \\ge 0` is the resolution parameter. Parameters ---------- adjacency: Adjacency matrix of the graph. labels: Labels of nodes, vector of size :math:`n` . weights : Weights of nodes. ``'degree'`` (default), ``'uniform'`` or custom weights. weights_in : In-weights of nodes. ``None`` (default), ``'degree'``, ``'uniform'`` or custom weights. If ``None``, taken equal to weights. resolution: Resolution parameter (default = 1). return_all: If ``True``, return modularity, fit, diversity. Returns ------- modularity : float fit: float, optional diversity: float, optional Example ------- >>> from sknetwork.clustering import modularity >>> from sknetwork.data import house >>> adjacency = house() >>> labels = np.array([0, 0, 1, 1, 0]) >>> np.round(modularity(adjacency, labels), 2) 0.11 """ adjacency = check_format(adjacency).astype(float) check_square(adjacency) if len(labels) != adjacency.shape[0]: raise ValueError('Dimension mismatch between labels and adjacency matrix.') probs_row = check_probs(weights, adjacency) probs_col = check_probs(weights_in, adjacency.T) membership = membership_matrix(labels) fit: float = membership.multiply(adjacency.dot(membership)).data.sum() / adjacency.data.sum() div: float = membership.T.dot(probs_col).dot(membership.T.dot(probs_row)) mod: float = fit - resolution * div if return_all: return mod, fit, div else: return mod