Пример #1
0
def betweenness_centrality(
    G,
    k=None,
    normalized=True,
    weight=None,
    endpoints=False,
    seed=None,
    result_dtype=np.float64,
):
    """
    Compute the betweenness centrality for all nodes of the graph G from a
    sample of 'k' sources.
    CuGraph does not currently support the 'endpoints' and 'weight' parameters
    as seen in the corresponding networkX call.

    Parameters
    ----------
    G : cuGraph.Graph
        cuGraph graph descriptor with connectivity information. The graph can
        be either directed (DiGraph) or undirected (Graph).
        Weights in the graph are ignored, the current implementation uses
        BFS traversals. Use weight parameter if weights need to be considered
        (currently not supported)

    k : int or list or None, optional, default=None
        If k is not None, use k node samples to estimate betweenness.  Higher
        values give better approximation
        If k is a list, use the content of the list for estimation: the list
        should contain vertices identifiers.
        Vertices obtained through sampling or defined as a list will be used as
        sources for traversals inside the algorithm.

    normalized : bool, optional
        Default is True.
        If true, the betweenness values are normalized by
        2 / ((n - 1) * (n - 2)) for Graphs (undirected), and
        1 / ((n - 1) * (n - 2)) for DiGraphs (directed graphs)
        where n is the number of nodes in G.
        Normalization will ensure that values are in [0, 1],
        this normalization scales for the highest possible value where one
        node is crossed by every single shortest path.

    weight : cudf.DataFrame, optional, default=None
        Specifies the weights to be used for each edge.
        Should contain a mapping between
        edges and weights.
        (Not Supported)

    endpoints : bool, optional, default=False
        If true, include the endpoints in the shortest path counts.
        (Not Supported)

    seed : optional
        if k is specified and k is an integer, use seed to initialize the
        random number generator.
        Using None as seed relies on random.seed() behavior: using current
        system time
        If k is either None or list: seed parameter is ignored

    result_dtype : np.float32 or np.float64, optional, default=np.float64
        Indicate the data type of the betweenness centrality scores

    Returns
    -------
    df : cudf.DataFrame
        GPU data frame containing two cudf.Series of size V: the vertex
        identifiers and the corresponding betweenness centrality values.
        Please note that the resulting the 'vertex' column might not be
        in ascending order.

        df['vertex'] : cudf.Series
            Contains the vertex identifiers
        df['betweenness_centrality'] : cudf.Series
            Contains the betweenness centrality of vertices

    Examples
    --------
    >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ',
    >>>                   dtype=['int32', 'int32', 'float32'], header=None)
    >>> G = cugraph.Graph()
    >>> G.from_cudf_edgelist(M, source='0', destination='1')
    >>> bc = cugraph.betweenness_centrality(G)
    """
    # vertices is intended to be a cuDF series that contains a sampling of
    # k vertices out of the graph.
    #
    # NOTE: cuDF doesn't currently support sampling, but there is a python
    # workaround.

    vertices, k = _initialize_vertices(G, k, seed)

    if weight is not None:
        raise NotImplementedError("weighted implementation of betweenness "
                                  "centrality not currently supported")

    if result_dtype not in [np.float32, np.float64]:
        raise TypeError("result type can only be np.float32 or np.float64")

    df = betweenness_centrality_wrapper.betweenness_centrality(
        G, normalized, endpoints, weight, k, vertices, result_dtype)

    if G.renumbered:
        return G.unrenumber(df, "vertex")

    return df
Пример #2
0
def betweenness_centrality(G,
                           k=None,
                           normalized=True,
                           weight=None,
                           endpoints=False,
                           seed=None):
    """
    Compute betweenness centrality for the nodes of the graph G. cuGraph
    does not currently support the 'endpoints' and 'weight' parameters
    as seen in the corresponding networkX call.

    Parameters
    ----------
    G : cuGraph.Graph
        cuGraph graph descriptor with connectivity information. The graph can
        contain either directed or undirected edges where undirected edges are
        represented as directed edges in both directions.
    k : int, optional
        If k is not None, use k node samples to estimate betweenness.  Higher
        values give better approximation
    normalized : bool, optional
        Value defaults to true.  If true, the betweenness values are normalized
        by 2/((n-1)(n-2)) for graphs, and 1 / ((n-1)(n-2)) for directed graphs
        where n is the number of nodes in G.
    weight : cudf.Series
        Specifies the weights to be used for each vertex.
    endpoints : bool, optional
        If true, include the endpoints in the shortest path counts
    seed : optional
        k is specified and seed is not None, use seed to initialize the random
        number generator

    Returns
    -------
    df : cudf.DataFrame
        GPU data frame containing two cudf.Series of size V: the vertex
        identifiers and the corresponding katz centrality values.

        df['vertex'] : cudf.Series
            Contains the vertex identifiers
        df['betweenness_centrality'] : cudf.Series
            Contains the betweenness centrality of vertices

    Examples
    --------
    >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ',
    >>>                   dtype=['int32', 'int32', 'float32'], header=None)
    >>> sources = cudf.Series(M['0'])
    >>> destinations = cudf.Series(M['1'])
    >>> G = cugraph.Graph()
    >>> G.add_edge_list(sources, destinations, None)
    >>> bc = cugraph.betweenness_centrality(G)
    """

    #
    # Some features not implemented for gunrock implementation, failing fast,
    # but passing parameters through
    #
    # vertices is intended to be a cuDF series that contains a sampling of
    # k vertices out of the graph.
    #
    # NOTE: cuDF doesn't currently support sampling, but there is a python
    # workaround.
    #
    vertices = None
    if k is not None:
        raise Exception("sampling feature of betweenness "
                        "centrality not currently supported")

    if weight is not None:
        raise Exception("weighted implementation of betweenness "
                        "centrality not currently supported")

    df = betweenness_centrality_wrapper.betweenness_centrality(
        G, normalized, endpoints, weight, k, vertices)
    return df
Пример #3
0
def betweenness_centrality(
    G,
    k=None,
    normalized=True,
    weight=None,
    endpoints=False,
    seed=None,
    result_dtype=np.float64,
):
    """
    Compute the betweenness centrality for all vertices of the graph G.
    Betweenness centrality is a measure of the number of shortest paths that
    pass through a vertex.  A vertex with a high betweenness centrality score
    has more paths passing through it and is therefore believed to be more
    important.

    To improve performance. rather than doing an all-pair shortest path,
    a sample of k starting vertices can be used.

    CuGraph does not currently support the 'endpoints' and 'weight' parameters
    as seen in the corresponding networkX call.

    Parameters
    ----------
    G : cuGraph.Graph or networkx.Graph
        The graph can be either directed (Graph(directed=True)) or undirected.
        Weights in the graph are ignored, the current implementation uses
        BFS traversals. Use weight parameter if weights need to be considered
        (currently not supported)

    k : int or list or None, optional (default=None)
        If k is not None, use k node samples to estimate betweenness.  Higher
        values give better approximation.  If k is a list, use the content
        of the list for estimation: the list should contain vertex
        identifiers. If k is None (the default), all the vertices are used
        to estimate betweenness.  Vertices obtained through sampling or
        defined as a list will be used assources for traversals inside the
        algorithm.

    normalized : bool, optional
        Default is True.
        If true, the betweenness values are normalized by
        __2 / ((n - 1) * (n - 2))__ for undirected Graphs, and
        __1 / ((n - 1) * (n - 2))__ for directed Graphs
        where n is the number of nodes in G.
        Normalization will ensure that values are in [0, 1],
        this normalization scales for the highest possible value where one
        node is crossed by every single shortest path.

    weight : cudf.DataFrame, optional (default=None)
        Specifies the weights to be used for each edge.
        Should contain a mapping between
        edges and weights.
        (Not Supported)

    endpoints : bool, optional (default=False)
        If true, include the endpoints in the shortest path counts.
        (Not Supported)

    seed : optional
        if k is specified and k is an integer, use seed to initialize the
        random number generator.
        Using None as seed relies on random.seed() behavior: using current
        system time
        If k is either None or list: seed parameter is ignored

    result_dtype : np.float32 or np.float64, optional, default=np.float64
        Indicate the data type of the betweenness centrality scores

    Returns
    -------
    df : cudf.DataFrame or Dictionary if using NetworkX
        GPU data frame containing two cudf.Series of size V: the vertex
        identifiers and the corresponding betweenness centrality values.
        Please note that the resulting the 'vertex' column might not be
        in ascending order.  The Dictionary contains the same two columns

        df['vertex'] : cudf.Series
            Contains the vertex identifiers
        df['betweenness_centrality'] : cudf.Series
            Contains the betweenness centrality of vertices

    Examples
    --------
    >>> gdf = cudf.read_csv(datasets_path / 'karate.csv', delimiter=' ',
    ...                     dtype=['int32', 'int32', 'float32'], header=None)
    >>> G = cugraph.Graph()
    >>> G.from_cudf_edgelist(gdf, source='0', destination='1')
    >>> bc = cugraph.betweenness_centrality(G)

    """
    # vertices is intended to be a cuDF series that contains a sampling of
    # k vertices out of the graph.
    #
    # NOTE: cuDF doesn't currently support sampling, but there is a python
    # workaround.

    if weight is not None:
        raise NotImplementedError("weighted implementation of betweenness "
                                  "centrality not currently supported")

    if result_dtype not in [np.float32, np.float64]:
        raise TypeError("result type can only be np.float32 or np.float64")

    G, isNx = ensure_cugraph_obj_for_nx(G)

    vertices = _initialize_vertices(G, k, seed)

    df = betweenness_centrality_wrapper.betweenness_centrality(
        G, normalized, endpoints, weight, vertices, result_dtype)

    if G.renumbered:
        df = G.unrenumber(df, "vertex")

    if isNx is True:
        dict = df_score_to_dictionary(df, 'betweenness_centrality')
        return dict
    else:
        return df
def betweenness_centrality(G,
                           k=None,
                           normalized=True,
                           weight=None,
                           endpoints=False,
                           seed=None,
                           result_dtype=np.float64):
    """
    Compute the betweenness centrality for all nodes of the graph G from a
    sample of 'k' sources.
    CuGraph does not currently support the 'endpoints' and 'weight' parameters
    as seen in the corresponding networkX call.

    Parameters
    ----------
    G : cuGraph.Graph
        cuGraph graph descriptor with connectivity information. The graph can
        be either directed (DiGraph) or undirected (Graph).
        Weights in the graph are ignored, the current implementation uses
        BFS traversals. Use weight parameter if weights need to be considered
        (currently not supported)

    k : int or list or None, optional, default=None
        If k is not None, use k node samples to estimate betweenness.  Higher
        values give better approximation
        If k is a list, use the content of the list for estimation: the list
        should contain vertices identifiers.
        Vertices obtained through sampling or defined as a list will be used as
        sources for traversals inside the algorithm.

    normalized : bool, optional
        Default is True.
        If true, the betweenness values are normalized by
        2 / ((n - 1) * (n - 2)) for Graphs (undirected), and
        1 / ((n - 1) * (n - 2)) for DiGraphs (directed graphs)
        where n is the number of nodes in G.
        Normalization will ensure that the values in [0, 1],
        this normalization scales fo the highest possible value where one
        node is crossed by every single shortest path.

    weight : cudf.DataFrame, optional, default=None
        Specifies the weights to be used for each edge.
        Should contain a mapping between
        edges and weights.
        (Not Supported)

    endpoints : bool, optional, default=False
        If true, include the endpoints in the shortest path counts.
        (Not Supported)

    seed : optional
        if k is specified and k is an integer, use seed to initialize the
        random number generator.
        Using None as seed relies on random.seed() behavior: using current
        system time
        If k is either None or list: seed parameter is ignored

    result_dtype : np.float32 or np.float64, optional, default=np.float64
        Indicate the data type of the betweenness centrality scores

    Returns
    -------
    df : cudf.DataFrame
        GPU data frame containing two cudf.Series of size V: the vertex
        identifiers and the corresponding betweenness centrality values.
        Please note that the resulting the 'vertex' column might not be
        in ascending order.

        df['vertex'] : cudf.Series
            Contains the vertex identifiers
        df['betweenness_centrality'] : cudf.Series
            Contains the betweenness centrality of vertices

    Examples
    --------
    >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ',
    >>>                   dtype=['int32', 'int32', 'float32'], header=None)
    >>> G = cugraph.Graph()
    >>> G.from_cudf_edgelist(M, source='0', destination='1')
    >>> bc = cugraph.betweenness_centrality(G)
    """

    # vertices is intended to be a cuDF series that contains a sampling of
    # k vertices out of the graph.
    #
    # NOTE: cuDF doesn't currently support sampling, but there is a python
    # workaround.
    #
    vertices = None

    if k is not None:
        # In order to compare with pre-set sources,
        # k can either be a list or an integer or None
        #  int: Generate an random sample with k elements
        # list: k become the length of the list and vertices become the content
        # None: All the vertices are considered
        # NOTE: We do not renumber in case k is an int, the sampling is
        #       not operating on the valid vertices identifiers but their
        #       indices:
        # Example:
        # - vertex '2' is missing
        # - vertices '0' '1' '3' '4' exist
        # - There is a vertex at index 2 (there is not guarantee that it is
        #   vertice '3' )
        if isinstance(k, int):
            random.seed(seed)
            vertices = random.sample(range(G.number_of_vertices()), k)
        # Using k as a list allows to have an easier way to compare against
        # other implementations on
        elif isinstance(k, list):
            vertices = k
            k = len(vertices)
            # We assume that the list that was provided is not the indices
            # in the graph structure but the vertices identifiers in the graph
            # hence: [1, 2, 10] should proceed to sampling on vertices that
            # have 1, 2 and 10 as their identifiers
            # FIXME: There might be a cleaner way to obtain the inverse mapping
            if G.renumbered:
                vertices = [
                    G.edgelist.renumber_map[G.edgelist.renumber_map ==
                                            vert].index[0] for vert in vertices
                ]

    if endpoints is True:
        raise NotImplementedError("endpoints accumulation for betweenness "
                                  "centrality not currently supported")

    if weight is not None:
        raise NotImplementedError("weighted implementation of betweenness "
                                  "centrality not currently supported")
    if result_dtype not in [np.float32, np.float64]:
        raise TypeError("result type can only be np.float32 or np.float64")

    df = betweenness_centrality_wrapper.betweenness_centrality(
        G, normalized, endpoints, weight, k, vertices, result_dtype)
    return df