def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'KMeans': """Apply embedding method followed by K-means. Parameters ---------- adjacency: Adjacency matrix of the graph. Returns ------- self: :class:`KMeans` """ n = adjacency.shape[0] check_n_clusters(self.n_clusters, n) embedding = self.embedding_method.fit_transform(adjacency) kmeans = KMeansDense(self.n_clusters) kmeans.fit(embedding) if self.sort_clusters: labels = reindex_labels(kmeans.labels_) else: labels = kmeans.labels_ self.labels_ = labels self._secondary_outputs(adjacency) return self
def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray]) -> 'KMeans': """Apply embedding method followed by K-means. Parameters ---------- input_matrix : Adjacency matrix or biadjacency matrix of the graph. Returns ------- self: :class:`KMeans` """ self._init_vars() # input check_format(input_matrix) if self.co_cluster: check_n_clusters(self.n_clusters, np.sum(input_matrix.shape)) else: check_n_clusters(self.n_clusters, input_matrix.shape[0]) # embedding embedding, self.bipartite = get_embedding(input_matrix, self.embedding_method, self.co_cluster) # clustering kmeans = KMeansDense(self.n_clusters) kmeans.fit(embedding) # sort if self.sort_clusters: labels = reindex_labels(kmeans.labels_) else: labels = kmeans.labels_ # output self.labels_ = labels if self.co_cluster: self._split_vars(input_matrix.shape) self._secondary_outputs(input_matrix) return self
def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'Louvain': """Fit algorithm to the data. Parameters ---------- adjacency : Adjacency matrix of the graph. Returns ------- self: :class:`Louvain` """ adjacency = check_format(adjacency) check_square(adjacency) n_nodes = adjacency.shape[0] probs_out = check_probs('degree', adjacency) probs_in = check_probs('degree', adjacency.T) nodes = np.arange(n_nodes) if self.shuffle_nodes: nodes = self.random_state.permutation(nodes) adjacency = adjacency[nodes, :].tocsc()[:, nodes].tocsr() adjacency_norm = adjacency / adjacency.data.sum() membership = sparse.identity(n_nodes, format='csr') increase = True count_aggregations = 0 self.log.print("Starting with", n_nodes, "nodes.") while increase: count_aggregations += 1 current_labels, pass_increase = self._optimize( n_nodes, adjacency_norm, probs_out, probs_in) _, current_labels = np.unique(current_labels, return_inverse=True) if pass_increase <= self.tol_aggregation: increase = False else: membership_agg = membership_matrix(current_labels) membership = membership.dot(membership_agg) n_nodes, adjacency_norm, probs_out, probs_in = self._aggregate( adjacency_norm, probs_out, probs_in, membership_agg) if n_nodes == 1: break self.log.print("Aggregation", count_aggregations, "completed with", n_nodes, "clusters and ", pass_increase, "increment.") if count_aggregations == self.n_aggregations: break if self.sort_clusters: labels = reindex_labels(membership.indices) else: labels = membership.indices if self.shuffle_nodes: reverse = np.empty(nodes.size, nodes.dtype) reverse[nodes] = np.arange(nodes.size) labels = labels[reverse] self.labels_ = labels self._secondary_outputs(adjacency) return self
def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'Louvain': """Fit algorithm to the data. Parameters ---------- adjacency : Adjacency matrix of the graph. Returns ------- self: :class:`Louvain` """ adjacency = check_format(adjacency) check_square(adjacency) n = adjacency.shape[0] if self.modularity == 'potts': probs_ou = check_probs('uniform', adjacency) probs_in = probs_ou.copy() elif self.modularity == 'newman': probs_ou = check_probs('degree', adjacency) probs_in = probs_ou.copy() elif self.modularity == 'dugue': probs_ou = check_probs('degree', adjacency) probs_in = check_probs('degree', adjacency.T) else: raise ValueError('Unknown modularity function.') nodes = np.arange(n, dtype=np.int32) if self.shuffle_nodes: nodes = self.random_state.permutation(nodes) adjacency = adjacency[nodes, :].tocsc()[:, nodes].tocsr() adjacency_clust = adjacency / adjacency.data.sum() membership = sparse.identity(n, format='csr') increase = True count_aggregations = 0 self.log.print("Starting with", n, "nodes.") while increase: count_aggregations += 1 labels_clust, pass_increase = self._optimize( adjacency_clust, probs_ou, probs_in) _, labels_clust = np.unique(labels_clust, return_inverse=True) if pass_increase <= self.tol_aggregation: increase = False else: membership_clust = membership_matrix(labels_clust) membership = membership.dot(membership_clust) adjacency_clust, probs_ou, probs_in = self._aggregate( adjacency_clust, probs_ou, probs_in, membership_clust) n = adjacency_clust.shape[0] if n == 1: break self.log.print("Aggregation", count_aggregations, "completed with", n, "clusters and ", pass_increase, "increment.") if count_aggregations == self.n_aggregations: break if self.sort_clusters: labels = reindex_labels(membership.indices) else: labels = membership.indices if self.shuffle_nodes: reverse = np.empty(nodes.size, nodes.dtype) reverse[nodes] = np.arange(nodes.size) labels = labels[reverse] self.labels_ = labels self._secondary_outputs(adjacency) return self
def fit(self, input_matrix: Union[sparse.csr_matrix, np.ndarray], force_bipartite: bool = False) -> 'Louvain': """Fit algorithm to data. Parameters ---------- input_matrix : Adjacency matrix or biadjacency matrix of the graph. force_bipartite : If ``True``, force the input matrix to be considered as a biadjacency matrix even if square. Returns ------- self: :class:`Louvain` """ self._init_vars() if self.modularity == 'dugue': adjacency, self.bipartite = get_adjacency(input_matrix, force_directed=True, force_bipartite=force_bipartite) else: adjacency, self.bipartite = get_adjacency(input_matrix, force_bipartite=force_bipartite) n = adjacency.shape[0] if self.modularity == 'potts': probs_out = get_probs('uniform', adjacency) probs_in = probs_out.copy() elif self.modularity == 'newman': probs_out = get_probs('degree', adjacency) probs_in = probs_out.copy() elif self.modularity == 'dugue': probs_out = get_probs('degree', adjacency) probs_in = get_probs('degree', adjacency.T) else: raise ValueError('Unknown modularity function.') nodes = np.arange(n) if self.shuffle_nodes: nodes = self.random_state.permutation(nodes) adjacency = adjacency[nodes, :].tocsc()[:, nodes].tocsr() adjacency_cluster = adjacency / adjacency.data.sum() membership = sparse.identity(n, format='csr') increase = True count_aggregations = 0 self.log.print("Starting with", n, "nodes.") while increase: count_aggregations += 1 labels_cluster, pass_increase = self._optimize(adjacency_cluster, probs_out, probs_in) _, labels_cluster = np.unique(labels_cluster, return_inverse=True) if pass_increase <= self.tol_aggregation: increase = False else: membership_cluster = membership_matrix(labels_cluster) membership = membership.dot(membership_cluster) adjacency_cluster, probs_out, probs_in = self._aggregate(adjacency_cluster, probs_out, probs_in, membership_cluster) n = adjacency_cluster.shape[0] if n == 1: break self.log.print("Aggregation", count_aggregations, "completed with", n, "clusters and ", pass_increase, "increment.") if count_aggregations == self.n_aggregations: break if self.sort_clusters: labels = reindex_labels(membership.indices) else: labels = membership.indices if self.shuffle_nodes: reverse = np.empty(nodes.size, nodes.dtype) reverse[nodes] = np.arange(nodes.size) labels = labels[reverse] self.labels_ = labels if self.bipartite: self._split_vars(input_matrix.shape) self._secondary_outputs(input_matrix) return self
def fit(self, biadjacency: Union[sparse.csr_matrix, np.ndarray]) -> 'BiKMeans': """Apply embedding method followed by clustering to the graph. Parameters ---------- biadjacency: Biadjacency matrix of the graph. Returns ------- self: :class:`BiKMeans` """ n_row, n_col = biadjacency.shape check_n_clusters(self.n_clusters, n_row) method = self.embedding_method method.fit(biadjacency) if self.co_cluster: embedding = np.vstack( (method.embedding_row_, method.embedding_col_)) else: embedding = method.embedding_ kmeans = KMeansDense(self.n_clusters) kmeans.fit(embedding) if self.sort_clusters: labels = reindex_labels(kmeans.labels_) else: labels = kmeans.labels_ self.labels_ = labels if self.co_cluster: self._split_vars(n_row) else: self.labels_row_ = labels if self.return_membership: membership_row = membership_matrix(self.labels_row_, n_labels=self.n_clusters) if self.labels_col_ is not None: membership_col = membership_matrix(self.labels_col_, n_labels=self.n_clusters) self.membership_row_ = normalize( biadjacency.dot(membership_col)) self.membership_col_ = normalize( biadjacency.T.dot(membership_row)) else: self.membership_row_ = normalize( biadjacency.dot(biadjacency.T.dot(membership_row))) self.membership_ = self.membership_row_ if self.return_aggregate: membership_row = membership_matrix(self.labels_row_, n_labels=self.n_clusters) biadjacency_ = sparse.csr_matrix(membership_row.T.dot(biadjacency)) if self.labels_col_ is not None: membership_col = membership_matrix(self.labels_col_, n_labels=self.n_clusters) biadjacency_ = biadjacency_.dot(membership_col) self.biadjacency_ = biadjacency_ return self