示例#1
0
def get_adjacency(input_matrix: Union[sparse.csr_matrix, np.ndarray], allow_directed: bool = True,
                  force_bipartite: bool = False, force_directed: bool = False)\
        -> Tuple[sparse.csr_matrix, bool]:
    """Check the input matrix and return a proper adjacency matrix.
    Parameters
    ----------
    input_matrix :
        Adjacency matrix of biadjacency matrix of the graph.
    allow_directed :
        If ``True`` (default), allow the graph to be directed.
    force_bipartite : bool
        If ``True``, return the adjacency matrix of a bipartite graph.
        Otherwise (default), do it only if the input matrix is not square or not symmetric
        with ``allow_directed=False``.
    force_directed :
        If ``True`` return :math:`A  = \\begin{bmatrix} 0 & B \\\\ 0 & 0 \\end{bmatrix}`.
        Otherwise (default), return :math:`A  = \\begin{bmatrix} 0 & B \\\\ B^T & 0 \\end{bmatrix}`.
    """
    input_matrix = check_format(input_matrix)
    bipartite = False
    if force_bipartite or not is_square(input_matrix) or not (
            allow_directed or is_symmetric(input_matrix)):
        bipartite = True
    if bipartite:
        if force_directed:
            adjacency = bipartite2directed(input_matrix)
        else:
            adjacency = bipartite2undirected(input_matrix)
    else:
        adjacency = input_matrix
    return adjacency, bipartite
示例#2
0
def depth_first_search(adjacency: sparse.csr_matrix,
                       source: int,
                       return_predecessors: bool = True):
    """Depth-first ordering starting with specified node.

    * Graphs
    * Digraphs

    Based on SciPy (scipy.sparse.csgraph.depth_first_order)

    Parameters
    ----------
    adjacency :
        The adjacency matrix of the graph
    source :
        The node from which to start the ordering
    return_predecessors:
        If ``True``, the size predecessor matrix is returned

    Returns
    -------
    node_array : np.ndarray
        The depth-first list of nodes, starting with specified node. The length of node_array is the number of nodes
        reachable from the specified node.
    predecessors : np.ndarray
        Returned only if ``return_predecessors == True``. The list of predecessors of each node in a depth-first tree.
        If node ``i`` is in the tree, then its parent is given by ``predecessors[i]``. If node ``i`` is not in the tree
        (and for the parent node) then ``predecessors[i] = -9999``.
    """
    directed = not is_symmetric(adjacency)
    return sparse.csgraph.depth_first_order(adjacency, source, directed,
                                            return_predecessors)
示例#3
0
def connected_components(adjacency: sparse.csr_matrix, connection: str = 'weak',
                         return_components: bool = True) -> Union[int, Tuple[int, np.ndarray]]:
    """
    Extract the connected components of the graph

    * Graphs
    * Digraphs

    Based on SciPy (scipy.sparse.csgraph.connected_components).

    Parameters
    ----------
    adjacency:
        Adjacency matrix of the graph.
    connection
        Must be ``'weak'`` (default) or ``'strong'``. The type of connection to use for directed graphs.
    return_components
        If ``True`` (default), then return the labels for each of the connected components.

    Returns
    -------
    n_components: int
        The number of connected components.
    components: ndarray
        The array such that for each node ``i``, ``components[i]`` is the connected component of ``i``.

    """
    return sparse.csgraph.connected_components(adjacency, (not is_symmetric(adjacency)), connection, return_components)
示例#4
0
def connected_components(adjacency: sparse.csr_matrix,
                         connection: str = 'weak') -> np.ndarray:
    """Extract the connected components of the graph.

    * Graphs
    * Digraphs

    Based on SciPy (scipy.sparse.csgraph.connected_components).

    Parameters
    ----------
    adjacency :
        Adjacency matrix of the graph.
    connection :
        Must be ``'weak'`` (default) or ``'strong'``. The type of connection to use for directed graphs.

    Returns
    -------
    labels : np.ndarray
        Connected component of each node.
    """
    adjacency = check_format(adjacency)
    if len(adjacency.data) == 0:
        raise ValueError('The graph is empty (no edge).')
    return sparse.csgraph.connected_components(adjacency,
                                               not is_symmetric(adjacency),
                                               connection, True)[1]
示例#5
0
def is_bipartite(adjacency: sparse.csr_matrix, return_biadjacency: bool = False) \
        -> Union[bool, Tuple[bool, Optional[sparse.csr_matrix], Optional[np.ndarray], Optional[np.ndarray]]]:
    """Check whether an undirected graph is bipartite.

    * Graphs

    Parameters
    ----------
    adjacency :
       Adjacency matrix of the graph (symmetric).
    return_biadjacency :
        If ``True``, return a biadjacency matrix of the graph if bipartite.

    Returns
    -------
    is_bipartite : bool
        A boolean denoting if the graph is bipartite.
    biadjacency : sparse.csr_matrix
        A biadjacency matrix of the graph if bipartite (optional).
    rows : np.ndarray
        Index of rows in the original graph (optional).
    cols : np.ndarray
        Index of columns in the original graph (optional).
    """
    if not is_symmetric(adjacency):
        raise ValueError('The graph must be undirected.')
    if adjacency.diagonal().any():
        if return_biadjacency:
            return False, None, None, None
        else:
            return False
    n = adjacency.indptr.shape[0] - 1
    coloring = np.full(n, -1, dtype=int)
    exists_remaining = n
    while exists_remaining:
        src = np.argwhere(coloring == -1)[0, 0]
        next_nodes = [src]
        coloring[src] = 0
        exists_remaining -= 1
        while next_nodes:
            node = next_nodes.pop()
            for neighbor in adjacency.indices[adjacency.indptr[node]:adjacency.
                                              indptr[node + 1]]:
                if coloring[neighbor] == -1:
                    coloring[neighbor] = 1 - coloring[node]
                    next_nodes.append(neighbor)
                    exists_remaining -= 1
                elif coloring[neighbor] == coloring[node]:
                    if return_biadjacency:
                        return False, None, None, None
                    else:
                        return False
    if return_biadjacency:
        rows = np.argwhere(coloring == 0).ravel()
        cols = np.argwhere(coloring == 1).ravel()
        return True, adjacency[rows, :][:, cols], rows, cols
    else:
        return True
示例#6
0
    def test_bip2undir(self):
        n_row, n_col = self.biadjacency.shape
        n = n_row + n_col

        undirected_graph = bipartite2undirected(self.biadjacency)
        self.assertEqual(undirected_graph.shape, (n, n))
        self.assertTrue(is_symmetric(undirected_graph))

        slr = SparseLR(self.biadjacency, [(np.ones(n_row), np.ones(n_col))])
        undirected_graph = bipartite2undirected(slr)
        self.assertTrue(type(undirected_graph) == SparseLR)
示例#7
0
def is_bipartite(adjacency: sparse.csr_matrix,
                 return_biadjacency: bool = False) -> Union[bool, Tuple[bool, Optional[sparse.csr_matrix]]]:
    """Check whether an undirected graph is bipartite and can return a possible biadjacency.

    * Graphs

    Parameters
    ----------
    adjacency:
       The symmetric adjacency matrix of the graph.
    return_biadjacency:
        If ``True`` , a possible biadjacency is returned if the graph is bipartite (None is returned otherwise)

    Returns
    -------
    is_bipartite: bool
        A boolean denoting if the graph is bipartite
    biadjacency: sparse.csr_matrix
        A possible biadjacency of the bipartite graph (None if the graph is not bipartite)
    """
    if not is_symmetric(adjacency):
        raise ValueError('The graph must be undirected.')
    if adjacency.diagonal().any():
        if return_biadjacency:
            return False, None
        else:
            return False
    n_nodes = adjacency.indptr.shape[0] - 1
    coloring = np.full(n_nodes, -1, dtype=int)
    exists_remaining = n_nodes
    while exists_remaining:
        src = np.argwhere(coloring == -1)[0, 0]
        next_nodes = [src]
        coloring[src] = 0
        exists_remaining -= 1
        while next_nodes:
            node = next_nodes.pop()
            for neighbor in adjacency.indices[adjacency.indptr[node]:adjacency.indptr[node + 1]]:
                if coloring[neighbor] == -1:
                    coloring[neighbor] = 1 - coloring[node]
                    next_nodes.append(neighbor)
                    exists_remaining -= 1
                elif coloring[neighbor] == coloring[node]:
                    if return_biadjacency:
                        return False, None
                    else:
                        return False
    if return_biadjacency:
        return True, adjacency[coloring == 0, :][:, coloring == 1]
    else:
        return True
示例#8
0
    def test_dir2undir(self):
        n = 3
        adjacency = cyclic_digraph(n)
        ref = directed2undirected(adjacency)
        self.assertEqual(ref.shape, adjacency.shape)
        self.assertTrue(is_symmetric(ref))

        adjacency = house()
        n = adjacency.shape[0]
        error = 0.5 * directed2undirected(adjacency) - adjacency
        self.assertEqual(error.nnz, 0)

        slr = SparseLR(adjacency, [(np.zeros(n), np.zeros(n))])
        slr = 0.5 * directed2undirected(slr)
        self.assertEqual(slr.shape, (n, n))

        x = np.random.randn(n)
        error = np.linalg.norm(slr.dot(x) - adjacency.dot(x))
        self.assertAlmostEqual(error, 0)
示例#9
0
def is_acyclic(adjacency: sparse.csr_matrix) -> bool:
    """Check whether a graph has no cycle.

    Parameters
    ----------
    adjacency:
        Adjacency matrix of the graph.

    Returns
    -------
    is_acyclic : bool
        A boolean with value True if the graph has no cycle and False otherwise
    """
    n_nodes = adjacency.shape[0]
    n_cc = sparse.csgraph.connected_components(adjacency,
                                               (not is_symmetric(adjacency)),
                                               'strong', False)
    if n_cc == n_nodes:
        # check for self-loops (= cycles)
        return (adjacency.diagonal() == 0).all()
    else:
        return False
示例#10
0
    def fit(self, adjacency: Union[sparse.csr_matrix, np.ndarray], position_init: Optional[np.ndarray] = None,
            n_iter: Optional[int] = None) -> 'Spring':
        """Compute layout.

        Parameters
        ----------
        adjacency :
            Adjacency matrix of the graph, treated as undirected.
        position_init : np.ndarray
            Custom initial positions of the nodes. Shape must be (n, 2).
            If ``None``, use the value of self.pos_init.
        n_iter : int
            Number of iterations to update positions.
            If ``None``, use the value of self.n_iter.

        Returns
        -------
        self: :class:`Spring`
        """
        adjacency = check_format(adjacency)
        check_square(adjacency)
        if not is_symmetric(adjacency):
            adjacency = directed2undirected(adjacency)
        n = adjacency.shape[0]

        position = np.zeros((n, self.n_components))
        if position_init is None:
            if self.position_init == 'random':
                position = np.random.randn(n, self.n_components)
            elif self.position_init == 'spectral':
                position = Spectral(n_components=self.n_components, normalized=False).fit_transform(adjacency)
        elif isinstance(position_init, np.ndarray):
            if position_init.shape == (n, self.n_components):
                position = position_init.copy()
            else:
                raise ValueError('Initial position has invalid shape.')
        else:
            raise TypeError('Initial position must be a numpy array.')

        if n_iter is None:
            n_iter = self.n_iter

        if self.strength is None:
            strength = np.sqrt((1 / n))
        else:
            strength = self.strength

        pos_max = position.max(axis=0)
        pos_min = position.min(axis=0)
        step_max: float = 0.1 * (pos_max - pos_min).max()
        step: float = step_max / (n_iter + 1)
        tree = None

        delta = np.zeros((n, self.n_components))
        for iteration in range(n_iter):
            delta *= 0
            if self.approx_radius > 0:
                tree = cKDTree(position)

            for i in range(n):
                # attraction
                indices = adjacency.indices[adjacency.indptr[i]:adjacency.indptr[i+1]]
                attraction = adjacency.data[adjacency.indptr[i]:adjacency.indptr[i+1]] / strength

                grad = position[i] - position[indices]
                attraction *= np.linalg.norm(grad, axis=1)
                attraction = (grad * attraction[:, np.newaxis]).sum(axis=0)

                # repulsion
                if tree is None:
                    grad: np.ndarray = (position[i] - position)  # shape (n, n_components)
                    distance: np.ndarray = np.linalg.norm(grad, axis=1)  # shape (n,)
                else:
                    neighbors = tree.query_ball_point(position[i], self.approx_radius)
                    grad: np.ndarray = (position[i] - position[neighbors])  # shape (n_neigh, n_components)
                    distance: np.ndarray = np.linalg.norm(grad, axis=1)  # shape (n_neigh,)

                distance = np.where(distance < 0.01, 0.01, distance)
                repulsion = (grad * (strength / distance)[:, np.newaxis] ** 2).sum(axis=0)

                # total force
                delta[i]: np.ndarray = repulsion - attraction

            length = np.linalg.norm(delta, axis=0)
            length = np.where(length < 0.01, 0.1, length)
            delta = delta * step_max / length
            position += delta
            step_max -= step
            err: float = np.linalg.norm(delta) / n
            if err < self.tol:
                break

        self.embedding_ = position
        return self
    def fit(self,
            adjacency: Union[sparse.csr_matrix, np.ndarray],
            pos_init: Optional[np.ndarray] = None,
            n_iter: Optional[int] = None) -> 'ForceAtlas':
        """Compute layout.

        Parameters
        ----------
        adjacency :
            Adjacency matrix of the graph, treated as undirected.
        pos_init :
            Position to start with. Random if not provided.
        n_iter : int
            Number of iterations to update positions.
            If ``None``, use the value of self.n_iter.

        Returns
        -------
        self: :class:`ForceAtlas`
        """
        # verify the format of the adjacency matrix
        adjacency = check_format(adjacency)
        check_square(adjacency)
        if not is_symmetric(adjacency):
            adjacency = directed2undirected(adjacency)
        n = adjacency.shape[0]

        # setting of the tolerance according to the size of the graph
        if n < 5000:
            tolerance = 0.1
        elif 5000 <= n < 50000:  # pragma: no cover
            tolerance = 1
        else:  # pragma: no cover
            tolerance = 10

        if n_iter is None:
            n_iter = self.n_iter

        # initial position of the nodes of the graph
        if pos_init is None:
            position: np.ndarray = np.random.randn(n, self.n_components)
        else:
            if pos_init.shape != (n, self.n_components):
                raise ValueError(
                    'The initial position does not have valid dimensions.')
            else:
                position = pos_init
        # compute the vector with the degree of each node
        degree: np.ndarray = adjacency.dot(np.ones(adjacency.shape[1])) + 1

        # initialization of variation of position of nodes
        resultants = np.zeros(n)
        delta: np.ndarray = np.zeros((n, self.n_components))
        swing_vector: np.ndarray = np.zeros(n)
        global_speed = 1

        for iteration in range(n_iter):
            delta *= 0
            global_swing = 0
            global_traction = 0

            if self.approx_radius > 0:
                tree = cKDTree(position)
            else:
                tree = None

            for i in range(n):

                # attraction
                indices = adjacency.indices[adjacency.indptr[i]:adjacency.
                                            indptr[i + 1]]
                attraction = position[i] - position[indices]

                if self.lin_log:
                    attraction = np.sign(attraction) * np.log(
                        1 + np.abs(10 * attraction))
                attraction = attraction.sum(axis=0)

                # repulsion
                if tree is None:
                    neighbors = np.arange(n)
                else:
                    neighbors = tree.query_ball_point(position[i],
                                                      self.approx_radius)

                grad: np.ndarray = (position[i] - position[neighbors]
                                    )  # shape (n_neigh, n_components)
                distance: np.ndarray = np.linalg.norm(
                    grad, axis=1)  # shape (n_neigh,)
                distance = np.where(distance < 0.01, 0.01, distance)
                repulsion = grad * (degree[neighbors] / distance)[:,
                                                                  np.newaxis]

                repulsion *= self.repulsive_factor * degree[i]
                repulsion = repulsion.sum(axis=0)

                # gravity
                gravity = self.gravity_factor * degree[i] * grad
                gravity = gravity.sum(axis=0)

                # forces resultant applied on node i for traction, swing and speed computation
                force = repulsion - attraction - gravity
                resultant_new: float = np.linalg.norm(force)
                resultant_old: float = resultants[i]

                swing_node: float = np.abs(
                    resultant_new -
                    resultant_old)  # force variation applied on node i
                swing_vector[i] = swing_node
                global_swing += (degree[i] + 1) * swing_node

                traction: float = np.abs(
                    resultant_new +
                    resultant_old) / 2  # traction force applied on node i
                global_traction += (degree[i] + 1) * traction

                node_speed = self.speed * global_speed / (
                    1 + global_speed * np.sqrt(swing_node))
                if node_speed > self.speed_max / resultant_new:  # pragma: no cover
                    node_speed = self.speed_max / resultant_new

                delta[i]: np.ndarray = node_speed * force
                resultants[i] = resultant_new
                global_speed = tolerance * global_traction / global_swing

            position += delta  # calculating displacement and final position of points after iteration
            if (swing_vector < 1).all():
                break  # if the swing of all nodes is zero, then convergence is reached and we break.

        self.embedding_ = position
        return self
示例#12
0
def distance(adjacency: sparse.csr_matrix,
             sources: Optional[Union[int, Iterable]] = None,
             method: str = 'D',
             return_predecessors: bool = False,
             unweighted: bool = False,
             n_jobs: Optional[int] = None):
    """Compute distances between nodes.

    * Graphs
    * Digraphs

    Based on SciPy (scipy.sparse.csgraph.shortest_path)

    Parameters
    ----------
    adjacency :
        The adjacency matrix of the graph
    sources :
        If specified, only compute the paths for the points at the given indices. Will not work with ``method =='FW'``.
    method :
        The method to be used.

        * ``'D'`` (Dijkstra),
        * ``'BF'`` (Bellman-Ford),
        * ``'J'`` (Johnson).
    return_predecessors :
        If ``True``, the size predecessor matrix is returned
    unweighted :
        If ``True``, the weights of the edges are ignored
    n_jobs :
        If an integer value is given, denotes the number of workers to use (-1 means the maximum number will be used).
        If ``None``, no parallel computations are made.

    Returns
    -------
    dist_matrix : np.ndarray
        The matrix of distances between graph nodes. ``dist_matrix[i,j]`` gives the shortest
        distance from point ``i`` to point ``j`` along the graph.
        If no path exists between nodes ``i`` and ``j``, then ``dist_matrix[i, j] = np.inf``.
    predecessors : np.ndarray, optional
        Returned only if ``return_predecessors == True``. The matrix of predecessors, which can be used to reconstruct
        the shortest paths. Row i of the predecessor matrix contains information on the shortest paths from point ``i``:
        each entry ``predecessors[i, j]`` gives the index of the previous node in the path from point ``i`` to point
        ``j``. If no path exists between nodes ``i`` and ``j``, then ``predecessors[i, j] = -9999``.

    Examples
    --------
    >>> from sknetwork.data import cyclic_digraph
    >>> adjacency = cyclic_digraph(3)
    >>> distance(adjacency, sources=0)
    array([0., 1., 2.])
    >>> distance(adjacency, sources=0, return_predecessors=True)
    (array([0., 1., 2.]), array([-9999,     0,     1]))
    """
    n_jobs = check_n_jobs(n_jobs)
    if method == 'FW' and n_jobs != 1:
        raise ValueError(
            'The Floyd-Warshall algorithm cannot be used with parallel computations.'
        )
    if sources is None:
        sources = np.arange(adjacency.shape[0])
    elif np.issubdtype(type(sources), np.integer):
        sources = np.array([sources])
    n = len(sources)
    directed = not is_symmetric(adjacency)
    local_function = partial(sparse.csgraph.shortest_path, adjacency, method,
                             directed, return_predecessors, unweighted, False)
    if n_jobs == 1 or n == 1:
        res = sparse.csgraph.shortest_path(adjacency, method, directed,
                                           return_predecessors, unweighted,
                                           False, sources)
    else:
        with Pool(n_jobs) as pool:
            res = np.array(pool.map(local_function, sources))
    if return_predecessors:
        if n == 1:
            return res[0].ravel(), res[1].astype(int).ravel()
        else:
            return res[0], res[1].astype(int)
    else:
        if n == 1:
            return res.ravel()
        else:
            return res
示例#13
0
    def fit(self,
            adjacency: Union[sparse.csr_matrix, np.ndarray],
            position_init: Optional[np.ndarray] = None,
            n_iter: Optional[int] = None) -> 'Spring':
        """Compute layout.

        Parameters
        ----------
        adjacency :
            Adjacency matrix of the graph, treated as undirected.
        position_init : np.ndarray
            Custom initial positions of the nodes. Shape must be (n, 2).
            If ``None``, use the value of self.pos_init.
        n_iter : int
            Number of iterations to update positions.
            If ``None``, use the value of self.n_iter.

        Returns
        -------
        self: :class:`Spring`
        """
        adjacency = check_format(adjacency)
        check_square(adjacency)
        if not is_symmetric(adjacency):
            adjacency = directed2undirected(adjacency)
        n = adjacency.shape[0]

        position = np.zeros((n, 2))
        if position_init is None:
            if self.position_init == 'random':
                position = np.random.randn(n, 2)
            elif self.position_init == 'spectral':
                position = Spectral(n_components=2,
                                    normalized=False).fit_transform(adjacency)
        elif isinstance(position_init, np.ndarray):
            if position_init.shape == (n, 2):
                position = position_init.copy()
            else:
                raise ValueError('Initial position has invalid shape.')
        else:
            raise TypeError('Initial position must be a numpy array.')

        if n_iter is None:
            n_iter = self.n_iter

        if self.strength is None:
            strength = np.sqrt((1 / n))
        else:
            strength = self.strength

        delta_x: float = position[:, 0].max() - position[:, 0].min()
        delta_y: float = position[:, 1].max() - position[:, 1].min()
        step_max: float = 0.1 * max(delta_x, delta_y)
        step: float = step_max / (n_iter + 1)

        delta = np.zeros((n, 2))
        for iteration in range(n_iter):
            delta *= 0
            for i in range(n):
                indices = adjacency.indices[adjacency.indptr[i]:adjacency.
                                            indptr[i + 1]]
                data = adjacency.data[adjacency.indptr[i]:adjacency.indptr[i +
                                                                           1]]

                grad: np.ndarray = (position[i] - position)  # shape (n, 2)
                distance: np.ndarray = np.linalg.norm(grad,
                                                      axis=1)  # shape (n,)
                distance = np.where(distance < 0.01, 0.01, distance)

                attraction = np.zeros(n)
                attraction[indices] += data * distance[indices] / strength

                repulsion = (strength / distance)**2

                delta[i]: np.ndarray = (
                    grad * (repulsion - attraction)[:, np.newaxis]).sum(
                        axis=0)  # shape (2,)
            length = np.linalg.norm(delta, axis=0)
            length = np.where(length < 0.01, 0.1, length)
            delta = delta * step_max / length
            position += delta
            step_max -= step
            err: float = np.linalg.norm(delta) / n
            if err < self.tol:
                break

        self.embedding_ = position
        return self