Пример #1
0
 def test_format_keyword_raise(self):
     with pytest.raises(nx.NetworkXError):
         WP4 = nx.Graph()
         WP4.add_edges_from(
             (n, n + 1, dict(weight=0.5, other=0.3)) for n in range(3))
         P4 = path_graph(4)
         nx.to_scipy_sparse_array(P4, format="any_other")
Пример #2
0
    def test_selfloop_digraph(self):
        G = nx.DiGraph([(1, 1)])
        M = nx.to_scipy_sparse_array(G)
        np.testing.assert_equal(M.toarray(), np.array([[1]]))

        G.add_edges_from([(2, 3), (3, 4)])
        M = nx.to_scipy_sparse_array(G, nodelist=[2, 3, 4])
        np.testing.assert_equal(M.toarray(),
                                np.array([[0, 1, 0], [0, 0, 1], [0, 0, 0]]))
Пример #3
0
 def test_weight_keyword(self):
     WP4 = nx.Graph()
     WP4.add_edges_from(
         (n, n + 1, dict(weight=0.5, other=0.3)) for n in range(3))
     P4 = path_graph(4)
     A = nx.to_scipy_sparse_array(P4)
     np.testing.assert_equal(
         A.todense(),
         nx.to_scipy_sparse_array(WP4, weight=None).todense())
     np.testing.assert_equal(0.5 * A.todense(),
                             nx.to_scipy_sparse_array(WP4).todense())
     np.testing.assert_equal(
         0.3 * A.todense(),
         nx.to_scipy_sparse_array(WP4, weight="other").todense())
Пример #4
0
    def test_nodelist(self):
        """Conversion from graph to sparse matrix to graph with nodelist."""
        P4 = path_graph(4)
        P3 = path_graph(3)
        nodelist = list(P3.nodes())
        A = nx.to_scipy_sparse_array(P4, nodelist=nodelist)
        GA = nx.Graph(A)
        assert nx.is_isomorphic(GA, P3)

        pytest.raises(nx.NetworkXError,
                      nx.to_scipy_sparse_array,
                      P3,
                      nodelist=[])
        # Test nodelist duplicates.
        long_nl = nodelist + [0]
        pytest.raises(nx.NetworkXError,
                      nx.to_scipy_sparse_array,
                      P3,
                      nodelist=long_nl)

        # Test nodelist contains non-nodes
        non_nl = [-1, 0, 1, 2]
        pytest.raises(nx.NetworkXError,
                      nx.to_scipy_sparse_array,
                      P3,
                      nodelist=non_nl)
Пример #5
0
    def __init__(self, transform: Optional[Callable] = None):
        super().__init__('.', transform)

        import networkx as nx

        G = nx.karate_club_graph()

        x = torch.eye(G.number_of_nodes(), dtype=torch.float)

        if hasattr(nx, 'to_scipy_sparse_array'):
            adj = nx.to_scipy_sparse_array(G).tocoo()
        else:
            adj = nx.to_scipy_sparse_matrix(G).tocoo()
        row = torch.from_numpy(adj.row.astype(np.int64)).to(torch.long)
        col = torch.from_numpy(adj.col.astype(np.int64)).to(torch.long)
        edge_index = torch.stack([row, col], dim=0)

        # Create communities.
        y = torch.tensor([
            1, 1, 1, 1, 3, 3, 3, 1, 0, 1, 3, 1, 1, 1, 0, 0, 3, 1, 0, 1, 0, 1,
            0, 0, 2, 2, 0, 0, 2, 0, 0, 2, 0, 0
        ],
                         dtype=torch.long)

        # Select a single training node for each community
        # (we just use the first one).
        train_mask = torch.zeros(y.size(0), dtype=torch.bool)
        for i in range(int(y.max()) + 1):
            train_mask[(y == i).nonzero(as_tuple=False)[0]] = True

        data = Data(x=x, edge_index=edge_index, y=y, train_mask=train_mask)

        self.data, self.slices = self.collate([data])
Пример #6
0
 def test_adjacency_interface_scipy(self):
     A = nx.to_scipy_sparse_array(self.Gs, dtype="d")
     pos = nx.drawing.layout._sparse_fruchterman_reingold(A)
     assert pos.shape == (6, 2)
     pos = nx.drawing.layout._sparse_spectral(A)
     assert pos.shape == (6, 2)
     pos = nx.drawing.layout._sparse_fruchterman_reingold(A, dim=3)
     assert pos.shape == (6, 3)
Пример #7
0
 def test_ordering(self):
     G = nx.DiGraph()
     G.add_edge(1, 2)
     G.add_edge(2, 3)
     G.add_edge(3, 1)
     M = nx.to_scipy_sparse_array(G, nodelist=[3, 2, 1])
     np.testing.assert_equal(M.toarray(),
                             np.array([[0, 0, 1], [1, 0, 0], [0, 1, 0]]))
Пример #8
0
def MCL(cdr3, edgelist=None, mcl_hyper=[1.2, 2], outfile=None):
    """
    Perform clustering on a network of CDR3 amino acid sequences with
    a known hamming distance, using the Markov clustering (MCL) algorithm.
    For more info about the inflation and expansion parameters,
    visit: https://micans.org/mcl/


    Parameters
    ----------
    edgelist : set, optional
        Tab-separated edgelist. The default is None.
    mcl_hyper : list, optional
        MCL hyperparameters: inflation and expansion.
        The default is [1.2,2].
    outfile : str, optional
        Name of outfile. The default is None.

    Returns
    -------
    clusters : pd.DataFrame
        pd.DataFrame containing two columns: 'CDR3' and 'cluster'.
        The first column contains CDR3 sequences, the second column
        contains the corresponding cluster ids.
    """
    if edgelist is None:
        edgelist = create_edgelist(cdr3)

    try:
        G = nx.parse_adjlist(edgelist, nodetype=str)
        m = nx.to_scipy_sparse_array(G)
    
        # Run MCL
        result = mcl.run_mcl(m, inflation=mcl_hyper[0], expansion=mcl_hyper[1])
        mcl_output = mcl.get_clusters(result)
        identifiers = list(G.nodes())
    
        # Map cluster ids back to seqs
        cluster_ids = dict()
        for i in range(len(mcl_output)):
            cluster_ids[i] = list(identifiers[i] for i in mcl_output[i])
    
        # Generate nodelist
        clusters = {"CDR3": [], "cluster": []}
        for c in cluster_ids:
            for seq in cluster_ids[c]:
                clusters["CDR3"].append(seq)
                clusters["cluster"].append(c)
        clusters = pd.DataFrame(data=clusters)
    
        # Write to file
        if outfile is not None:
            clusters.to_csv(outfile, sep="\t", index=False)
    except nx.NetworkXError:
        clusters = pd.DataFrame({"CDR3": [], "cluster": []})

    return clusters
Пример #9
0
def laplacian_matrix(G, nodelist=None, weight="weight"):
    """Returns the Laplacian matrix of G.

    The graph Laplacian is the matrix L = D - A, where
    A is the adjacency matrix and D is the diagonal matrix of node degrees.

    Parameters
    ----------
    G : graph
       A NetworkX graph

    nodelist : list, optional
       The rows and columns are ordered according to the nodes in nodelist.
       If nodelist is None, then the ordering is produced by G.nodes().

    weight : string or None, optional (default='weight')
       The edge data key used to compute each value in the matrix.
       If None, then each edge has weight 1.

    Returns
    -------
    L : SciPy sparse matrix
      The Laplacian matrix of G.

    Notes
    -----
    For MultiGraph/MultiDiGraph, the edges weights are summed.

    See Also
    --------
    to_numpy_array
    normalized_laplacian_matrix
    laplacian_spectrum
    """
    import scipy as sp
    import scipy.sparse  # call as sp.sparse

    if nodelist is None:
        nodelist = list(G)
    A = nx.to_scipy_sparse_array(G,
                                 nodelist=nodelist,
                                 weight=weight,
                                 format="csr")
    n, m = A.shape
    # TODO: rm csr_array wrapper when spdiags can produce arrays
    D = sp.sparse.csr_array(
        sp.sparse.spdiags(A.sum(axis=1), 0, m, n, format="csr"))
    import warnings

    warnings.warn(
        "laplacian_matrix will return a scipy.sparse array instead of a matrix in Networkx 3.0.",
        FutureWarning,
        stacklevel=2,
    )
    # TODO: rm sp.sparse.csr_matrix in version 3.0
    return sp.sparse.csr_matrix(D - A)
Пример #10
0
    def from_networkx(cls, graph, weight_col="weight"):
        """Convert a ``networkx`` graph to a PySAL ``W`` object.

        Parameters
        ----------
        graph : networkx.Graph
            The graph to convert to a ``W``.
        weight_col : string
            If the graph is labeled, this should be the name of the field
            to use as the weight for the ``W``.

        Returns
        -------
        w : libpysal.weights.W
            A ``W`` object containing the same graph as the ``networkx`` graph.
        """
        try:
            import networkx as nx
        except ImportError:
            raise ImportError(
                "NetworkX 2.7+ is required to use this function.")
        sparse_array = nx.to_scipy_sparse_array(graph)
        w = WSP(sparse_array).to_W()
        return w
Пример #11
0
def spectral_layout(G, weight="weight", scale=1, center=None, dim=2):
    """Position nodes using the eigenvectors of the graph Laplacian.

    Using the unnormalized Laplacian, the layout shows possible clusters of
    nodes which are an approximation of the ratio cut. If dim is the number of
    dimensions then the positions are the entries of the dim eigenvectors
    corresponding to the ascending eigenvalues starting from the second one.

    Parameters
    ----------
    G : NetworkX graph or list of nodes
        A position will be assigned to every node in G.

    weight : string or None   optional (default='weight')
        The edge attribute that holds the numerical value used for
        the edge weight.  If None, then all edge weights are 1.

    scale : number (default: 1)
        Scale factor for positions.

    center : array-like or None
        Coordinate pair around which to center the layout.

    dim : int
        Dimension of layout.

    Returns
    -------
    pos : dict
        A dictionary of positions keyed by node

    Examples
    --------
    >>> G = nx.path_graph(4)
    >>> pos = nx.spectral_layout(G)

    Notes
    -----
    Directed graphs will be considered as undirected graphs when
    positioning the nodes.

    For larger graphs (>500 nodes) this will use the SciPy sparse
    eigenvalue solver (ARPACK).
    """
    # handle some special cases that break the eigensolvers
    import numpy as np

    G, center = _process_params(G, center, dim)

    if len(G) <= 2:
        if len(G) == 0:
            pos = np.array([])
        elif len(G) == 1:
            pos = np.array([center])
        else:
            pos = np.array([np.zeros(dim), np.array(center) * 2.0])
        return dict(zip(G, pos))
    try:
        # Sparse matrix
        if len(G) < 500:  # dense solver is faster for small graphs
            raise ValueError
        A = nx.to_scipy_sparse_array(G, weight=weight, dtype="d")
        # Symmetrize directed graphs
        if G.is_directed():
            A = A + np.transpose(A)
        pos = _sparse_spectral(A, dim)
    except (ImportError, ValueError):
        # Dense matrix
        A = nx.to_numpy_array(G, weight=weight)
        # Symmetrize directed graphs
        if G.is_directed():
            A += A.T
        pos = _spectral(A, dim)

    pos = rescale_layout(pos, scale=scale) + center
    pos = dict(zip(G, pos))
    return pos
Пример #12
0
def pagerank_scipy(
    G,
    alpha=0.85,
    personalization=None,
    max_iter=100,
    tol=1.0e-6,
    nstart=None,
    weight="weight",
    dangling=None,
):
    """Returns the PageRank of the nodes in the graph.

    PageRank computes a ranking of the nodes in the graph G based on
    the structure of the incoming links. It was originally designed as
    an algorithm to rank web pages.

    Parameters
    ----------
    G : graph
      A NetworkX graph.  Undirected graphs will be converted to a directed
      graph with two directed edges for each undirected edge.

    alpha : float, optional
      Damping parameter for PageRank, default=0.85.

    personalization: dict, optional
      The "personalization vector" consisting of a dictionary with a
      key some subset of graph nodes and personalization value each of those.
      At least one personalization value must be non-zero.
      If not specfiied, a nodes personalization value will be zero.
      By default, a uniform distribution is used.

    max_iter : integer, optional
      Maximum number of iterations in power method eigenvalue solver.

    tol : float, optional
      Error tolerance used to check convergence in power method solver.

    nstart : dictionary, optional
      Starting value of PageRank iteration for each node.

    weight : key, optional
      Edge data key to use as weight.  If None weights are set to 1.

    dangling: dict, optional
      The outedges to be assigned to any "dangling" nodes, i.e., nodes without
      any outedges. The dict key is the node the outedge points to and the dict
      value is the weight of that outedge. By default, dangling nodes are given
      outedges according to the personalization vector (uniform if not
      specified) This must be selected to result in an irreducible transition
      matrix (see notes under google_matrix). It may be common to have the
      dangling dict to be the same as the personalization dict.

    Returns
    -------
    pagerank : dictionary
       Dictionary of nodes with PageRank as value

    Examples
    --------
    >>> G = nx.DiGraph(nx.path_graph(4))
    >>> pr = nx.pagerank_scipy(G, alpha=0.9)

    Notes
    -----
    The eigenvector calculation uses power iteration with a SciPy
    sparse matrix representation.

    This implementation works with Multi(Di)Graphs. For multigraphs the
    weight between two nodes is set to be the sum of all edge weights
    between those nodes.

    See Also
    --------
    pagerank, pagerank_numpy, google_matrix

    Raises
    ------
    PowerIterationFailedConvergence
        If the algorithm fails to converge to the specified tolerance
        within the specified number of iterations of the power iteration
        method.

    References
    ----------
    .. [1] A. Langville and C. Meyer,
       "A survey of eigenvector methods of web information retrieval."
       http://citeseer.ist.psu.edu/713792.html
    .. [2] Page, Lawrence; Brin, Sergey; Motwani, Rajeev and Winograd, Terry,
       The PageRank citation ranking: Bringing order to the Web. 1999
       http://dbpubs.stanford.edu:8090/pub/showDoc.Fulltext?lang=en&doc=1999-66&format=pdf
    """
    msg = "networkx.pagerank_scipy is deprecated and will be removed in NetworkX 3.0, use networkx.pagerank instead."
    warn(msg, DeprecationWarning, stacklevel=2)
    import numpy as np
    import scipy as sp
    import scipy.sparse  # call as sp.sparse

    N = len(G)
    if N == 0:
        return {}

    nodelist = list(G)
    A = nx.to_scipy_sparse_array(G,
                                 nodelist=nodelist,
                                 weight=weight,
                                 dtype=float)
    S = A.sum(axis=1)
    S[S != 0] = 1.0 / S[S != 0]
    # TODO: csr_array
    Q = sp.sparse.csr_array(sp.sparse.spdiags(S.T, 0, *A.shape))
    A = Q @ A

    # initial vector
    if nstart is None:
        x = np.repeat(1.0 / N, N)
    else:
        x = np.array([nstart.get(n, 0) for n in nodelist], dtype=float)
        x = x / x.sum()

    # Personalization vector
    if personalization is None:
        p = np.repeat(1.0 / N, N)
    else:
        p = np.array([personalization.get(n, 0) for n in nodelist],
                     dtype=float)
        if p.sum() == 0:
            raise ZeroDivisionError
        p = p / p.sum()
    # Dangling nodes
    if dangling is None:
        dangling_weights = p
    else:
        # Convert the dangling dictionary into an array in nodelist order
        dangling_weights = np.array([dangling.get(n, 0) for n in nodelist],
                                    dtype=float)
        dangling_weights /= dangling_weights.sum()
    is_dangling = np.where(S == 0)[0]

    # power iteration: make up to max_iter iterations
    for _ in range(max_iter):
        xlast = x
        x = alpha * (x @ A +
                     sum(x[is_dangling]) * dangling_weights) + (1 - alpha) * p
        # check convergence, l1 norm
        err = np.absolute(x - xlast).sum()
        if err < N * tol:
            return dict(zip(nodelist, map(float, x)))
    raise nx.PowerIterationFailedConvergence(max_iter)
Пример #13
0
def hits_scipy(G, max_iter=100, tol=1.0e-6, nstart=None, normalized=True):
    """Returns HITS hubs and authorities values for nodes.

    .. deprecated:: 2.6

       hits_scipy is deprecated and will be removed in networkx 3.0

    The HITS algorithm computes two numbers for a node.
    Authorities estimates the node value based on the incoming links.
    Hubs estimates the node value based on outgoing links.

    Parameters
    ----------
    G : graph
      A NetworkX graph

    max_iter : integer, optional
      Maximum number of iterations in power method.

    tol : float, optional
      Error tolerance used to check convergence in power method iteration.

    nstart : dictionary, optional
      Starting value of each node for power method iteration.

    normalized : bool (default=True)
       Normalize results by the sum of all of the values.

    Returns
    -------
    (hubs,authorities) : two-tuple of dictionaries
       Two dictionaries keyed by node containing the hub and authority
       values.

    Examples
    --------
    >>> G = nx.path_graph(4)
    >>> h, a = nx.hits(G)

    Notes
    -----
    This implementation uses SciPy sparse matrices.

    The eigenvector calculation is done by the power iteration method
    and has no guarantee of convergence.  The iteration will stop
    after max_iter iterations or an error tolerance of
    number_of_nodes(G)*tol has been reached.

    The HITS algorithm was designed for directed graphs but this
    algorithm does not check if the input graph is directed and will
    execute on undirected graphs.

    Raises
    ------
    PowerIterationFailedConvergence
        If the algorithm fails to converge to the specified tolerance
        within the specified number of iterations of the power iteration
        method.

    References
    ----------
    .. [1] A. Langville and C. Meyer,
       "A survey of eigenvector methods of web information retrieval."
       http://citeseer.ist.psu.edu/713792.html
    .. [2] Jon Kleinberg,
       Authoritative sources in a hyperlinked environment
       Journal of the ACM 46 (5): 604-632, 1999.
       doi:10.1145/324133.324140.
       http://www.cs.cornell.edu/home/kleinber/auth.pdf.
    """
    import numpy as np
    import warnings

    warnings.warn(
        ("networkx.hits_scipy is deprecated and will be removed"
         "in NetworkX 3.0, use networkx.hits instead."),
        DeprecationWarning,
        stacklevel=2,
    )

    if len(G) == 0:
        return {}, {}
    A = nx.to_scipy_sparse_array(G, nodelist=list(G))
    (n, m) = A.shape  # should be square
    ATA = A.T @ A  # authority matrix
    # choose fixed starting vector if not given
    if nstart is None:
        x = np.ones((n, 1)) / n
    else:
        x = np.array([nstart.get(n, 0) for n in list(G)], dtype=float)
        x = x / x.sum()

    # power iteration on authority matrix
    i = 0
    while True:
        xlast = x
        x = ATA @ x
        x /= x.max()
        # check convergence, l1 norm
        err = np.absolute(x - xlast).sum()
        if err < tol:
            break
        if i > max_iter:
            raise nx.PowerIterationFailedConvergence(max_iter)
        i += 1

    a = x.flatten()
    h = A @ a
    if normalized:
        h = h / h.sum()
        a = a / a.sum()
    hubs = dict(zip(G, map(float, h)))
    authorities = dict(zip(G, map(float, a)))
    return hubs, authorities
Пример #14
0
def spring_layout(
    G,
    k=None,
    pos=None,
    fixed=None,
    iterations=50,
    threshold=1e-4,
    weight="weight",
    scale=1,
    center=None,
    dim=2,
    seed=None,
):
    """Position nodes using Fruchterman-Reingold force-directed algorithm.

    The algorithm simulates a force-directed representation of the network
    treating edges as springs holding nodes close, while treating nodes
    as repelling objects, sometimes called an anti-gravity force.
    Simulation continues until the positions are close to an equilibrium.

    There are some hard-coded values: minimal distance between
    nodes (0.01) and "temperature" of 0.1 to ensure nodes don't fly away.
    During the simulation, `k` helps determine the distance between nodes,
    though `scale` and `center` determine the size and place after
    rescaling occurs at the end of the simulation.

    Fixing some nodes doesn't allow them to move in the simulation.
    It also turns off the rescaling feature at the simulation's end.
    In addition, setting `scale` to `None` turns off rescaling.

    Parameters
    ----------
    G : NetworkX graph or list of nodes
        A position will be assigned to every node in G.

    k : float (default=None)
        Optimal distance between nodes.  If None the distance is set to
        1/sqrt(n) where n is the number of nodes.  Increase this value
        to move nodes farther apart.

    pos : dict or None  optional (default=None)
        Initial positions for nodes as a dictionary with node as keys
        and values as a coordinate list or tuple.  If None, then use
        random initial positions.

    fixed : list or None  optional (default=None)
        Nodes to keep fixed at initial position.
        Nodes not in ``G.nodes`` are ignored.
        ValueError raised if `fixed` specified and `pos` not.

    iterations : int  optional (default=50)
        Maximum number of iterations taken

    threshold: float optional (default = 1e-4)
        Threshold for relative error in node position changes.
        The iteration stops if the error is below this threshold.

    weight : string or None   optional (default='weight')
        The edge attribute that holds the numerical value used for
        the edge weight.  Larger means a stronger attractive force.
        If None, then all edge weights are 1.

    scale : number or None (default: 1)
        Scale factor for positions. Not used unless `fixed is None`.
        If scale is None, no rescaling is performed.

    center : array-like or None
        Coordinate pair around which to center the layout.
        Not used unless `fixed is None`.

    dim : int
        Dimension of layout.

    seed : int, RandomState instance or None  optional (default=None)
        Set the random state for deterministic node layouts.
        If int, `seed` is the seed used by the random number generator,
        if numpy.random.RandomState instance, `seed` is the random
        number generator,
        if None, the random number generator is the RandomState instance used
        by numpy.random.

    Returns
    -------
    pos : dict
        A dictionary of positions keyed by node

    Examples
    --------
    >>> G = nx.path_graph(4)
    >>> pos = nx.spring_layout(G)

    # The same using longer but equivalent function name
    >>> pos = nx.fruchterman_reingold_layout(G)
    """
    import numpy as np

    G, center = _process_params(G, center, dim)

    if fixed is not None:
        if pos is None:
            raise ValueError("nodes are fixed without positions given")
        for node in fixed:
            if node not in pos:
                raise ValueError("nodes are fixed without positions given")
        nfixed = {node: i for i, node in enumerate(G)}
        fixed = np.asarray([nfixed[node] for node in fixed if node in nfixed])

    if pos is not None:
        # Determine size of existing domain to adjust initial positions
        dom_size = max(coord for pos_tup in pos.values() for coord in pos_tup)
        if dom_size == 0:
            dom_size = 1
        pos_arr = seed.rand(len(G), dim) * dom_size + center

        for i, n in enumerate(G):
            if n in pos:
                pos_arr[i] = np.asarray(pos[n])
    else:
        pos_arr = None
        dom_size = 1

    if len(G) == 0:
        return {}
    if len(G) == 1:
        return {nx.utils.arbitrary_element(G.nodes()): center}

    try:
        # Sparse matrix
        if len(G) < 500:  # sparse solver for large graphs
            raise ValueError
        A = nx.to_scipy_sparse_array(G, weight=weight, dtype="f")
        if k is None and fixed is not None:
            # We must adjust k by domain size for layouts not near 1x1
            nnodes, _ = A.shape
            k = dom_size / np.sqrt(nnodes)
        pos = _sparse_fruchterman_reingold(A, k, pos_arr, fixed, iterations,
                                           threshold, dim, seed)
    except ValueError:
        A = nx.to_numpy_array(G, weight=weight)
        if k is None and fixed is not None:
            # We must adjust k by domain size for layouts not near 1x1
            nnodes, _ = A.shape
            k = dom_size / np.sqrt(nnodes)
        pos = _fruchterman_reingold(A, k, pos_arr, fixed, iterations,
                                    threshold, dim, seed)
    if fixed is None and scale is not None:
        pos = rescale_layout(pos, scale=scale) + center
    pos = dict(zip(G, pos))
    return pos
Пример #15
0
def _transition_matrix(G,
                       nodelist=None,
                       weight="weight",
                       walk_type=None,
                       alpha=0.95):
    """Returns the transition matrix of G.

    This is a row stochastic giving the transition probabilities while
    performing a random walk on the graph. Depending on the value of walk_type,
    P can be the transition matrix induced by a random walk, a lazy random walk,
    or a random walk with teleportation (PageRank).

    Parameters
    ----------
    G : DiGraph
       A NetworkX graph

    nodelist : list, optional
       The rows and columns are ordered according to the nodes in nodelist.
       If nodelist is None, then the ordering is produced by G.nodes().

    weight : string or None, optional (default='weight')
       The edge data key used to compute each value in the matrix.
       If None, then each edge has weight 1.

    walk_type : string or None, optional (default=None)
       If None, `P` is selected depending on the properties of the
       graph. Otherwise is one of 'random', 'lazy', or 'pagerank'

    alpha : real
       (1 - alpha) is the teleportation probability used with pagerank

    Returns
    -------
    P : NumPy matrix
      transition matrix of G.

    Raises
    ------
    NetworkXError
        If walk_type not specified or alpha not in valid range
    """
    import numpy as np
    import scipy as sp
    import scipy.sparse  # call as sp.sparse

    if walk_type is None:
        if nx.is_strongly_connected(G):
            if nx.is_aperiodic(G):
                walk_type = "random"
            else:
                walk_type = "lazy"
        else:
            walk_type = "pagerank"

    A = nx.to_scipy_sparse_array(G,
                                 nodelist=nodelist,
                                 weight=weight,
                                 dtype=float)
    n, m = A.shape
    if walk_type in ["random", "lazy"]:
        # TODO: Rm csr_array wrapper when spdiags array creation becomes available
        DI = sp.sparse.csr_array(
            sp.sparse.spdiags(1.0 / A.sum(axis=1), 0, n, n))
        if walk_type == "random":
            P = DI @ A
        else:
            # TODO: Rm csr_array wrapper when identity array creation becomes available
            I = sp.sparse.csr_array(sp.sparse.identity(n))
            P = (I + DI @ A) / 2.0

    elif walk_type == "pagerank":
        if not (0 < alpha < 1):
            raise nx.NetworkXError("alpha must be between 0 and 1")
        # this is using a dense representation. NOTE: This should be sparsified!
        A = A.toarray()
        # add constant to dangling nodes' row
        A[A.sum(axis=1) == 0, :] = 1 / n
        # normalize
        A = A / A.sum(axis=1)[np.newaxis, :].T
        P = alpha * A + (1 - alpha) / n
    else:
        raise nx.NetworkXError("walk_type must be random, lazy, or pagerank")

    return P
Пример #16
0
def normalized_laplacian_matrix(G, nodelist=None, weight="weight"):
    r"""Returns the normalized Laplacian matrix of G.

    The normalized graph Laplacian is the matrix

    .. math::

        N = D^{-1/2} L D^{-1/2}

    where `L` is the graph Laplacian and `D` is the diagonal matrix of
    node degrees.

    Parameters
    ----------
    G : graph
       A NetworkX graph

    nodelist : list, optional
       The rows and columns are ordered according to the nodes in nodelist.
       If nodelist is None, then the ordering is produced by G.nodes().

    weight : string or None, optional (default='weight')
       The edge data key used to compute each value in the matrix.
       If None, then each edge has weight 1.

    Returns
    -------
    N : Scipy sparse matrix
      The normalized Laplacian matrix of G.

    Notes
    -----
    For MultiGraph/MultiDiGraph, the edges weights are summed.
    See to_numpy_array for other options.

    If the Graph contains selfloops, D is defined as diag(sum(A,1)), where A is
    the adjacency matrix [2]_.

    See Also
    --------
    laplacian_matrix
    normalized_laplacian_spectrum

    References
    ----------
    .. [1] Fan Chung-Graham, Spectral Graph Theory,
       CBMS Regional Conference Series in Mathematics, Number 92, 1997.
    .. [2] Steve Butler, Interlacing For Weighted Graphs Using The Normalized
       Laplacian, Electronic Journal of Linear Algebra, Volume 16, pp. 90-98,
       March 2007.
    """
    import numpy as np
    import scipy as sp
    import scipy.sparse  # call as sp.sparse

    if nodelist is None:
        nodelist = list(G)
    A = nx.to_scipy_sparse_array(G,
                                 nodelist=nodelist,
                                 weight=weight,
                                 format="csr")
    n, m = A.shape
    diags = A.sum(axis=1)
    # TODO: rm csr_array wrapper when spdiags can produce arrays
    D = sp.sparse.csr_array(sp.sparse.spdiags(diags, 0, m, n, format="csr"))
    L = D - A
    with sp.errstate(divide="ignore"):
        diags_sqrt = 1.0 / np.sqrt(diags)
    diags_sqrt[np.isinf(diags_sqrt)] = 0
    # TODO: rm csr_array wrapper when spdiags can produce arrays
    DH = sp.sparse.csr_array(
        sp.sparse.spdiags(diags_sqrt, 0, m, n, format="csr"))
    import warnings

    warnings.warn(
        "normalized_laplacian_matrix will return a scipy.sparse array instead of a matrix in Networkx 3.0.",
        FutureWarning,
        stacklevel=2,
    )
    # TODO: rm csr_matrix wrapper for NX 3.0
    return sp.sparse.csr_matrix(DH @ (L @ DH))
Пример #17
0
    def test_format_keyword(self):
        WP4 = nx.Graph()
        WP4.add_edges_from(
            (n, n + 1, dict(weight=0.5, other=0.3)) for n in range(3))
        P4 = path_graph(4)
        A = nx.to_scipy_sparse_array(P4, format="csr")
        np.testing.assert_equal(
            A.todense(),
            nx.to_scipy_sparse_array(WP4, weight=None).todense())

        A = nx.to_scipy_sparse_array(P4, format="csc")
        np.testing.assert_equal(
            A.todense(),
            nx.to_scipy_sparse_array(WP4, weight=None).todense())

        A = nx.to_scipy_sparse_array(P4, format="coo")
        np.testing.assert_equal(
            A.todense(),
            nx.to_scipy_sparse_array(WP4, weight=None).todense())

        A = nx.to_scipy_sparse_array(P4, format="bsr")
        np.testing.assert_equal(
            A.todense(),
            nx.to_scipy_sparse_array(WP4, weight=None).todense())

        A = nx.to_scipy_sparse_array(P4, format="lil")
        np.testing.assert_equal(
            A.todense(),
            nx.to_scipy_sparse_array(WP4, weight=None).todense())

        A = nx.to_scipy_sparse_array(P4, format="dia")
        np.testing.assert_equal(
            A.todense(),
            nx.to_scipy_sparse_array(WP4, weight=None).todense())

        A = nx.to_scipy_sparse_array(P4, format="dok")
        np.testing.assert_equal(
            A.todense(),
            nx.to_scipy_sparse_array(WP4, weight=None).todense())
Пример #18
0
def harmonic_function(G, max_iter=30, label_name="label"):
    """Node classification by Harmonic function

    Parameters
    ----------
    G : NetworkX Graph
    max_iter : int
        maximum number of iterations allowed
    label_name : string
        name of target labels to predict

    Returns
    -------
    predicted : list
        List of length ``len(G)`` with the predicted labels for each node.

    Raises
    ------
    NetworkXError
        If no nodes in `G` have attribute `label_name`.

    Examples
    --------
    >>> from networkx.algorithms import node_classification
    >>> G = nx.path_graph(4)
    >>> G.nodes[0]["label"] = "A"
    >>> G.nodes[3]["label"] = "B"
    >>> G.nodes(data=True)
    NodeDataView({0: {'label': 'A'}, 1: {}, 2: {}, 3: {'label': 'B'}})
    >>> G.edges()
    EdgeView([(0, 1), (1, 2), (2, 3)])
    >>> predicted = node_classification.harmonic_function(G)
    >>> predicted
    ['A', 'A', 'B', 'B']

    References
    ----------
    Zhu, X., Ghahramani, Z., & Lafferty, J. (2003, August).
    Semi-supervised learning using gaussian fields and harmonic functions.
    In ICML (Vol. 3, pp. 912-919).
    """
    import numpy as np
    import scipy as sp
    import scipy.sparse  # call as sp.sparse

    X = nx.to_scipy_sparse_array(G)  # adjacency matrix
    labels, label_dict = _get_label_info(G, label_name)

    if labels.shape[0] == 0:
        raise nx.NetworkXError(
            f"No node on the input graph is labeled by '{label_name}'.")

    n_samples = X.shape[0]
    n_classes = label_dict.shape[0]
    F = np.zeros((n_samples, n_classes))

    # Build propagation matrix
    degrees = X.sum(axis=0)
    degrees[degrees == 0] = 1  # Avoid division by 0
    # TODO: csr_array
    D = sp.sparse.csr_array(sp.sparse.diags((1.0 / degrees), offsets=0))
    P = (D @ X).tolil()
    P[labels[:, 0]] = 0  # labels[:, 0] indicates IDs of labeled nodes
    # Build base matrix
    B = np.zeros((n_samples, n_classes))
    B[labels[:, 0], labels[:, 1]] = 1

    for _ in range(max_iter):
        F = (P @ F) + B

    return label_dict[np.argmax(F, axis=1)].tolist()
Пример #19
0
 def test_identity_weighted_digraph_matrix(self):
     """Conversion from weighted digraph to sparse matrix to weighted digraph."""
     A = nx.to_scipy_sparse_array(self.G4)
     self.identity_conversion(self.G4, A, nx.DiGraph())
Пример #20
0
 def test_null_raise(self):
     with pytest.raises(nx.NetworkXError):
         nx.to_scipy_sparse_array(nx.Graph())
Пример #21
0
def local_and_global_consistency(G, alpha=0.99, max_iter=30, label_name="label"):
    """Node classification by Local and Global Consistency

    Parameters
    ----------
    G : NetworkX Graph
    alpha : float
        Clamping factor
    max_iter : int
        Maximum number of iterations allowed
    label_name : string
        Name of target labels to predict

    Returns
    -------
    predicted : list
        List of length ``len(G)`` with the predicted labels for each node.

    Raises
    ------
    NetworkXError
        If no nodes in `G` have attribute `label_name`.

    Examples
    --------
    >>> from networkx.algorithms import node_classification
    >>> G = nx.path_graph(4)
    >>> G.nodes[0]["label"] = "A"
    >>> G.nodes[3]["label"] = "B"
    >>> G.nodes(data=True)
    NodeDataView({0: {'label': 'A'}, 1: {}, 2: {}, 3: {'label': 'B'}})
    >>> G.edges()
    EdgeView([(0, 1), (1, 2), (2, 3)])
    >>> predicted = node_classification.local_and_global_consistency(G)
    >>> predicted
    ['A', 'A', 'B', 'B']

    References
    ----------
    Zhou, D., Bousquet, O., Lal, T. N., Weston, J., & Schölkopf, B. (2004).
    Learning with local and global consistency.
    Advances in neural information processing systems, 16(16), 321-328.
    """
    import numpy as np
    import scipy as sp
    import scipy.sparse  # call as sp.sparse

    X = nx.to_scipy_sparse_array(G)  # adjacency matrix
    labels, label_dict = _get_label_info(G, label_name)

    if labels.shape[0] == 0:
        raise nx.NetworkXError(
            f"No node on the input graph is labeled by '{label_name}'."
        )

    n_samples = X.shape[0]
    n_classes = label_dict.shape[0]
    F = np.zeros((n_samples, n_classes))

    # Build propagation matrix
    degrees = X.sum(axis=0)
    degrees[degrees == 0] = 1  # Avoid division by 0
    # TODO: csr_array
    D2 = np.sqrt(sp.sparse.csr_array(sp.sparse.diags((1.0 / degrees), offsets=0)))
    P = alpha * ((D2 @ X) @ D2)
    # Build base matrix
    B = np.zeros((n_samples, n_classes))
    B[labels[:, 0], labels[:, 1]] = 1 - alpha

    for _ in range(max_iter):
        F = (P @ F) + B

    return label_dict[np.argmax(F, axis=1)].tolist()
Пример #22
0
def modularity_matrix(G, nodelist=None, weight=None):
    r"""Returns the modularity matrix of G.

    The modularity matrix is the matrix B = A - <A>, where A is the adjacency
    matrix and <A> is the average adjacency matrix, assuming that the graph
    is described by the configuration model.

    More specifically, the element B_ij of B is defined as

    .. math::
        A_{ij} - {k_i k_j \over 2 m}

    where k_i is the degree of node i, and where m is the number of edges
    in the graph. When weight is set to a name of an attribute edge, Aij, k_i,
    k_j and m are computed using its value.

    Parameters
    ----------
    G : Graph
       A NetworkX graph

    nodelist : list, optional
       The rows and columns are ordered according to the nodes in nodelist.
       If nodelist is None, then the ordering is produced by G.nodes().

    weight : string or None, optional (default=None)
       The edge attribute that holds the numerical value used for
       the edge weight.  If None then all edge weights are 1.

    Returns
    -------
    B : Numpy matrix
      The modularity matrix of G.

    Examples
    --------
    >>> k = [3, 2, 2, 1, 0]
    >>> G = nx.havel_hakimi_graph(k)
    >>> B = nx.modularity_matrix(G)


    See Also
    --------
    to_numpy_array
    modularity_spectrum
    adjacency_matrix
    directed_modularity_matrix

    References
    ----------
    .. [1] M. E. J. Newman, "Modularity and community structure in networks",
           Proc. Natl. Acad. Sci. USA, vol. 103, pp. 8577-8582, 2006.
    """
    import numpy as np

    if nodelist is None:
        nodelist = list(G)
    A = nx.to_scipy_sparse_array(G,
                                 nodelist=nodelist,
                                 weight=weight,
                                 format="csr")
    k = A.sum(axis=1)
    m = k.sum() * 0.5
    # Expected adjacency matrix
    X = np.outer(k, k) / (2 * m)

    import warnings

    warnings.warn(
        "modularity_matrix will return a numpy array instead of a matrix in NetworkX 3.0.",
        FutureWarning,
        stacklevel=2,
    )
    # TODO: rm np.asmatrix for networkx 3.0
    return np.asmatrix(A - X)
Пример #23
0
 def test_empty(self):
     G = nx.Graph()
     G.add_node(1)
     M = nx.to_scipy_sparse_array(G)
     np.testing.assert_equal(M.toarray(), np.array([[0]]))
Пример #24
0
def directed_modularity_matrix(G, nodelist=None, weight=None):
    """Returns the directed modularity matrix of G.

    The modularity matrix is the matrix B = A - <A>, where A is the adjacency
    matrix and <A> is the expected adjacency matrix, assuming that the graph
    is described by the configuration model.

    More specifically, the element B_ij of B is defined as

    .. math::
        B_{ij} = A_{ij} - k_i^{out} k_j^{in} / m

    where :math:`k_i^{in}` is the in degree of node i, and :math:`k_j^{out}` is the out degree
    of node j, with m the number of edges in the graph. When weight is set
    to a name of an attribute edge, Aij, k_i, k_j and m are computed using
    its value.

    Parameters
    ----------
    G : DiGraph
       A NetworkX DiGraph

    nodelist : list, optional
       The rows and columns are ordered according to the nodes in nodelist.
       If nodelist is None, then the ordering is produced by G.nodes().

    weight : string or None, optional (default=None)
       The edge attribute that holds the numerical value used for
       the edge weight.  If None then all edge weights are 1.

    Returns
    -------
    B : Numpy matrix
      The modularity matrix of G.

    Examples
    --------
    >>> G = nx.DiGraph()
    >>> G.add_edges_from(
    ...     (
    ...         (1, 2),
    ...         (1, 3),
    ...         (3, 1),
    ...         (3, 2),
    ...         (3, 5),
    ...         (4, 5),
    ...         (4, 6),
    ...         (5, 4),
    ...         (5, 6),
    ...         (6, 4),
    ...     )
    ... )
    >>> B = nx.directed_modularity_matrix(G)


    Notes
    -----
    NetworkX defines the element A_ij of the adjacency matrix as 1 if there
    is a link going from node i to node j. Leicht and Newman use the opposite
    definition. This explains the different expression for B_ij.

    See Also
    --------
    to_numpy_array
    modularity_spectrum
    adjacency_matrix
    modularity_matrix

    References
    ----------
    .. [1] E. A. Leicht, M. E. J. Newman,
        "Community structure in directed networks",
        Phys. Rev Lett., vol. 100, no. 11, p. 118703, 2008.
    """
    import numpy as np

    if nodelist is None:
        nodelist = list(G)
    A = nx.to_scipy_sparse_array(G,
                                 nodelist=nodelist,
                                 weight=weight,
                                 format="csr")
    k_in = A.sum(axis=0)
    k_out = A.sum(axis=1)
    m = k_in.sum()
    # Expected adjacency matrix
    X = np.outer(k_out, k_in) / m

    import warnings

    warnings.warn(
        "directed_modularity_matrix will return a numpy array instead of a matrix in NetworkX 3.0.",
        FutureWarning,
        stacklevel=2,
    )
    # TODO: rm np.asmatrix for networkx 3.0
    return np.asmatrix(A - X)
Пример #25
0
 def test_identity_graph_matrix(self):
     "Conversion from graph to sparse matrix to graph."
     A = nx.to_scipy_sparse_array(self.G1)
     self.identity_conversion(self.G1, A, nx.Graph())
Пример #26
0
 def test_identity_digraph_matrix(self):
     "Conversion from digraph to sparse matrix to digraph."
     A = nx.to_scipy_sparse_array(self.G2)
     self.identity_conversion(self.G2, A, nx.DiGraph())
Пример #27
0
def bethe_hessian_matrix(G, r=None, nodelist=None):
    r"""Returns the Bethe Hessian matrix of G.

    The Bethe Hessian is a family of matrices parametrized by r, defined as
    H(r) = (r^2 - 1) I - r A + D where A is the adjacency matrix, D is the
    diagonal matrix of node degrees, and I is the identify matrix. It is equal
    to the graph laplacian when the regularizer r = 1.

    The default choice of regularizer should be the ratio [2]

    .. math::
      r_m = \left(\sum k_i \right)^{-1}\left(\sum k_i^2 \right) - 1

    Parameters
    ----------
    G : Graph
       A NetworkX graph

    r : float
       Regularizer parameter

    nodelist : list, optional
       The rows and columns are ordered according to the nodes in nodelist.
       If nodelist is None, then the ordering is produced by G.nodes().


    Returns
    -------
    H : scipy.sparse.csr_matrix
      The Bethe Hessian matrix of G, with paramter r.

    Examples
    --------
    >>> k = [3, 2, 2, 1, 0]
    >>> G = nx.havel_hakimi_graph(k)
    >>> H = nx.modularity_matrix(G)


    See Also
    --------
    bethe_hessian_spectrum
    adjacency_matrix
    laplacian_matrix

    References
    ----------
    .. [1] A. Saade, F. Krzakala and L. Zdeborová
       "Spectral clustering of graphs with the bethe hessian",
       Advances in Neural Information Processing Systems. 2014.
    .. [2] C. M. Lee, E. Levina
       "Estimating the number of communities in networks by spectral methods"
       arXiv:1507.00827, 2015.
    """
    import scipy as sp
    import scipy.sparse  # call as sp.sparse

    if nodelist is None:
        nodelist = list(G)
    if r is None:
        r = sum(d**2
                for v, d in nx.degree(G)) / sum(d for v, d in nx.degree(G)) - 1
    A = nx.to_scipy_sparse_array(G, nodelist=nodelist, format="csr")
    n, m = A.shape
    # TODO: Rm csr_array wrapper when spdiags array creation becomes available
    D = sp.sparse.csr_array(
        sp.sparse.spdiags(A.sum(axis=1), 0, m, n, format="csr"))
    # TODO: Rm csr_array wrapper when eye array creation becomes available
    I = sp.sparse.csr_array(sp.sparse.eye(m, n, format="csr"))
    import warnings

    warnings.warn(
        "bethe_hessian_matrix will return a scipy.sparse array instead of a matrix in Networkx 3.0",
        FutureWarning,
        stacklevel=2,
    )
    # TODO: Remove the csr_matrix wrapper in NetworkX 3.0
    return sp.sparse.csr_matrix((r**2 - 1) * I - r * A + D)