def get_embedding(input_matrix: Union[sparse.csr_matrix, np.ndarray], method: BaseEmbedding, co_embedding: bool = False) -> Tuple[np.ndarray, bool]: """Return the embedding of the input_matrix. Parameters ---------- input_matrix : Adjacency matrix of biadjacency matrix of the graph. method : Embedding method. co_embedding : bool If ``True``, co-embedding of rows and columns. Otherwise, do it only if the input matrix is not square or not symmetric with ``allow_directed=False``. """ bipartite = (not is_square(input_matrix)) or co_embedding if co_embedding: try: method.fit(input_matrix, force_bipartite=True) except: method.fit(input_matrix) embedding = np.vstack((method.embedding_row_, method.embedding_col_)) else: method.fit(input_matrix) embedding = method.embedding_ return embedding, bipartite
def get_adjacency(input_matrix: Union[sparse.csr_matrix, np.ndarray], allow_directed: bool = True, force_bipartite: bool = False, force_directed: bool = False)\ -> Tuple[sparse.csr_matrix, bool]: """Check the input matrix and return a proper adjacency matrix. Parameters ---------- input_matrix : Adjacency matrix of biadjacency matrix of the graph. allow_directed : If ``True`` (default), allow the graph to be directed. force_bipartite : bool If ``True``, return the adjacency matrix of a bipartite graph. Otherwise (default), do it only if the input matrix is not square or not symmetric with ``allow_directed=False``. force_directed : If ``True`` return :math:`A = \\begin{bmatrix} 0 & B \\\\ 0 & 0 \\end{bmatrix}`. Otherwise (default), return :math:`A = \\begin{bmatrix} 0 & B \\\\ B^T & 0 \\end{bmatrix}`. """ input_matrix = check_format(input_matrix) bipartite = False if force_bipartite or not is_square(input_matrix) or not ( allow_directed or is_symmetric(input_matrix)): bipartite = True if bipartite: if force_directed: adjacency = bipartite2directed(input_matrix) else: adjacency = bipartite2undirected(input_matrix) else: adjacency = input_matrix return adjacency, bipartite
def save(folder: str, data: Union[sparse.csr_matrix, Bunch]): """Save a Bunch or a CSR matrix in the current directory to a collection of Numpy and Pickle files for faster subsequent loads. Parameters ---------- folder : str The name to be used for the bundle folder data : Union[sparse.csr_matrix, Bunch] The data to save Example ------- >>> from sknetwork.data import save >>> graph = Bunch() >>> graph.adjacency = sparse.csr_matrix(np.random.random((10, 10)) < 0.2) >>> graph.names = np.array(list('abcdefghij')) >>> save('random_data', graph) >>> 'random_data' in listdir('.') True """ folder = expanduser(folder) if exists(folder): shutil.rmtree(folder) if isinstance(data, sparse.csr_matrix): bunch = Bunch() if is_square(data): bunch.adjacency = data else: bunch.biadjacency = data data = bunch if isabs(folder): save_to_numpy_bundle(data, folder, '') else: save_to_numpy_bundle(data, folder, './')
def largest_connected_component(adjacency: Union[sparse.csr_matrix, np.ndarray], return_labels: bool = False): """Extract the largest connected component of a graph. Bipartite graphs are treated as undirected. * Graphs * Digraphs * Bigraphs Parameters ---------- adjacency : Adjacency or biadjacency matrix of the graph. return_labels : bool Whether to return the indices of the new nodes in the original graph. Returns ------- new_adjacency : sparse.csr_matrix Adjacency or biadjacency matrix of the largest connected component. indices : array or tuple of array Indices of the nodes in the original graph. For biadjacency matrices, ``indices[0]`` corresponds to the rows and ``indices[1]`` to the columns. """ adjacency = check_format(adjacency) n_row, n_col = adjacency.shape if not is_square(adjacency): bipartite: bool = True full_adjacency = sparse.bmat([[None, adjacency], [adjacency.T, None]], format='csr') else: bipartite: bool = False full_adjacency = adjacency labels = connected_components(full_adjacency) unique_labels, counts = np.unique(labels, return_counts=True) component_label = unique_labels[np.argmax(counts)] component_indices = np.where(labels == component_label)[0] if bipartite: split_ix = np.searchsorted(component_indices, n_row) row_ix, col_ix = component_indices[:split_ix], component_indices[ split_ix:] - n_row else: row_ix, col_ix = component_indices, component_indices new_adjacency = adjacency[row_ix, :] new_adjacency = (new_adjacency.tocsc()[:, col_ix]).tocsr() if return_labels: if bipartite: return new_adjacency, (row_ix, col_ix) else: return new_adjacency, row_ix else: return new_adjacency
def fit(self, input_matrix: sparse.csr_matrix, force_bipartite: bool = False): """Embedding of graphs from the clustering obtained with Louvain. Parameters ---------- input_matrix : Adjacency matrix or biadjacency matrix of the graph. force_bipartite : bool (default = ``False``) If ``True``, force the input matrix to be considered as a biadjacency matrix. Returns ------- self: :class:`BiLouvainEmbedding` """ louvain = Louvain(resolution=self.resolution, modularity=self.modularity, tol_optimization=self.tol_optimization, tol_aggregation=self.tol_aggregation, n_aggregations=self.n_aggregations, shuffle_nodes=self.shuffle_nodes, sort_clusters=False, return_membership=True, return_aggregate=True, random_state=self.random_state) louvain.fit(input_matrix, force_bipartite=force_bipartite) # isolated nodes if is_square(input_matrix): labels = louvain.labels_ labels_secondary = None else: labels = louvain.labels_col_ labels_secondary = louvain.labels_row_ self.labels_, labels_row = reindex_labels(labels, labels_secondary, self.isolated_nodes) # embedding probs = normalize(input_matrix) embedding_ = probs.dot(membership_matrix(self.labels_)) self.embedding_ = embedding_.toarray() if labels_row is not None: probs = normalize(input_matrix.T) embedding_col = probs.dot(membership_matrix(labels_row)) self.embedding_row_ = self.embedding_ self.embedding_col_ = embedding_col.toarray() return self
def save(folder: Union[str, Path], data: Union[sparse.csr_matrix, Bunch]): """Save a Bunch or a CSR matrix in the current directory to a collection of Numpy and Pickle files for faster subsequent loads. Supported attribute types include sparse matrices, NumPy arrays, strings and Bunch. Parameters ---------- folder : str or :class:`pathlib.Path` The name to be used for the bundle folder data : Union[sparse.csr_matrix, Bunch] The data to save Example ------- >>> from sknetwork.data import save >>> graph = Bunch() >>> graph.adjacency = sparse.csr_matrix(np.random.random((10, 10)) < 0.2) >>> graph.names = np.array(list('abcdefghij')) >>> save('random_data', graph) >>> 'random_data' in listdir('.') True """ folder = Path(folder) folder = folder.expanduser() if folder.exists(): shutil.rmtree(folder) if isinstance(data, sparse.csr_matrix): bunch = Bunch() if is_square(data): bunch.adjacency = data else: bunch.biadjacency = data data = bunch if folder.is_absolute(): save_to_numpy_bundle(data, folder, '/') else: save_to_numpy_bundle(data, folder, '.')