Пример #1
0
def jaccard_coefficient(G, ebunch=None):
    """
    For NetworkX Compatability.  See `jaccard`

    Parameters
    ----------
    graph : cugraph.Graph
        cuGraph graph descriptor, should contain the connectivity information
        as an edge list (edge weights are not used for this algorithm). The
        graph should be undirected where an undirected edge is represented by a
        directed edge in both direction. The adjacency list will be computed if
        not already present.
    ebunch : cudf.DataFrame
        A GPU dataframe consisting of two columns representing pairs of
        vertices. If provided, the jaccard coefficient is computed for the
        given vertex pairs.  If the vertex_pair is not provided then the
        current implementation computes the jaccard coefficient for all
        adjacent vertices in the graph.

    Returns
    -------
    df  : cudf.DataFrame
        GPU data frame of size E (the default) or the size of the given pairs
        (first, second) containing the Jaccard weights. The ordering is
        relative to the adjacency list, or that given by the specified vertex
        pairs.

        df['source'] : cudf.Series
            The source vertex ID (will be identical to first if specified)
        df['destination'] : cudf.Series
            The destination vertex ID (will be identical to second if
            specified)
        df['jaccard_coeff'] : cudf.Series
            The computed Jaccard coefficient between the source and destination
            vertices

    Examples
    --------
    >>> gdf = cudf.read_csv('datasets/karate.csv', delimiter=' ',
    >>>                   dtype=['int32', 'int32', 'float32'], header=None)
    >>> G = cugraph.Graph()
    >>> G.from_cudf_edgelist(gdf, source='0', destination='1')
    >>> df = cugraph.jaccard_coefficient(G)
    """
    vertex_pair = None

    G, isNx = check_nx_graph(G)

    if isNx is True and ebunch is not None:
        vertex_pair = cudf.from_pandas(pd.DataFrame(ebunch))

    df = jaccard(G, vertex_pair)

    if isNx is True:
        df = df_edge_score_to_dictionary(df,
                                         k="jaccard_coeff",
                                         src="source",
                                         dst="destination")

    return df
Пример #2
0
def sorensen_coefficient(G, ebunch=None):
    """

    Parameters
    ----------
    G : cugraph.Graph
        cuGraph Graph instance, should contain the connectivity information
        as an edge list (edge weights are not used for this algorithm). The
        graph should be undirected where an undirected edge is represented by a
        directed edge in both direction. The adjacency list will be computed if
        not already present.
    ebunch : cudf.DataFrame, optional (default=None)
        A GPU dataframe consisting of two columns representing pairs of
        vertices. If provided, the sorensen coefficient is computed for the
        given vertex pairs.  If the vertex_pair is not provided then the
        current implementation computes the sorensen coefficient for all
        adjacent vertices in the graph.

    Returns
    -------
    df  : cudf.DataFrame
        GPU data frame of size E (the default) or the size of the given pairs
        (first, second) containing the Sorensen weights. The ordering is
        relative to the adjacency list, or that given by the specified vertex
        pairs.

        df['source'] : cudf.Series
            The source vertex ID (will be identical to first if specified)
        df['destination'] : cudf.Series
            The destination vertex ID (will be identical to second if
            specified)
        df['sorensen_coeff'] : cudf.Series
            The computed sorensen coefficient between the source and
            destination vertices

    Examples
    --------
    >>> gdf = cudf.read_csv(datasets_path / 'karate.csv', delimiter=' ',
    ...                     dtype=['int32', 'int32', 'float32'], header=None)
    >>> G = cugraph.Graph()
    >>> G.from_cudf_edgelist(gdf, source='0', destination='1')
    >>> df = cugraph.sorensen_coefficient(G)

    """
    vertex_pair = None

    G, isNx = ensure_cugraph_obj_for_nx(G)

    if isNx is True and ebunch is not None:
        vertex_pair = cudf.DataFrame(ebunch)

    df = sorensen(G, vertex_pair)

    if isNx is True:
        df = df_edge_score_to_dictionary(df,
                                         k="sorensen_coeff",
                                         src="source",
                                         dst="destination")

    return df
Пример #3
0
def overlap_coefficient(G, ebunch=None):
    """
    NetworkX similar API.  See 'jaccard' for a description

    """
    vertex_pair = None

    G, isNx = check_nx_graph(G)

    if isNx is True and ebunch is not None:
        vertex_pair = cudf.from_pandas(pd.DataFrame(ebunch))

    df = overlap(G, vertex_pair)

    if isNx is True:
        df = df_edge_score_to_dictionary(df,
                                         k="overlap_coeff",
                                         src="source",
                                         dst="destination")

    return df
Пример #4
0
def edge_betweenness_centrality(G,
                                k=None,
                                normalized=True,
                                weight=None,
                                seed=None,
                                result_dtype=np.float64):
    """
    Compute the edge betweenness centrality for all edges of the graph G.
    Betweenness centrality is a measure of the number of shortest paths
    that pass over an edge.  An edge with a high betweenness centrality
    score has more paths passing over it and is therefore believed to be
    more important. Rather than doing an all-pair shortest path, a sample
    of k starting vertices can be used.

    CuGraph does not currently support the 'weight' parameter
    as seen in the corresponding networkX call.

    Parameters
    ----------
    G : cuGraph.Graph or networkx.Graph
        The graph can be either directed (DiGraph) or undirected (Graph).
        Weights in the graph are ignored, the current implementation uses
        BFS traversals. Use weight parameter if weights need to be considered
        (currently not supported)

    k : int or list or None, optional, default=None
        If k is not None, use k node samples to estimate betweenness.  Higher
        values give better approximation
        If k is a list, use the content of the list for estimation: the list
        should contain vertices identifiers.
        Vertices obtained through sampling or defined as a list will be used as
        sources for traversals inside the algorithm.

    normalized : bool, optional
        Default is True.
        If true, the betweenness values are normalized by
        2 / (n * (n - 1)) for Graphs (undirected), and
        1 / (n * (n - 1)) for DiGraphs (directed graphs)
        where n is the number of nodes in G.
        Normalization will ensure that values are in [0, 1],
        this normalization scales for the highest possible value where one
        edge is crossed by every single shortest path.

    weight : cudf.DataFrame, optional, default=None
        Specifies the weights to be used for each edge.
        Should contain a mapping between
        edges and weights.
        (Not Supported)

    seed : optional
        if k is specified and k is an integer, use seed to initialize the
        random number generator.
        Using None as seed relies on random.seed() behavior: using current
        system time
        If k is either None or list: seed parameter is ignored

    result_dtype : np.float32 or np.float64, optional, default=np.float64
        Indicate the data type of the betweenness centrality scores
        Using double automatically switch implementation to "default"

    Returns
    -------
    df : cudf.DataFrame or Dictionary if using NetworkX
        GPU data frame containing three cudf.Series of size E: the vertex
        identifiers of the sources, the vertex identifies of the destinations
        and the corresponding betweenness centrality values.
        Please note that the resulting the 'src', 'dst' column might not be
        in ascending order.

        df['src'] : cudf.Series
            Contains the vertex identifiers of the source of each edge

        df['dst'] : cudf.Series
            Contains the vertex identifiers of the destination of each edge

        df['edge_betweenness_centrality'] : cudf.Series
            Contains the betweenness centrality of edges

        When using undirected graphs, 'src' and 'dst' only contains elements
        such that 'src' < 'dst', which might differ from networkx and user's
        input. Namely edge (1 -> 0) is transformed into (0 -> 1) but
        contains the betweenness centrality of edge (1 -> 0).


    Examples
    --------
    >>> gdf = cudf.read_csv('datasets/karate.csv', delimiter=' ',
    >>>                   dtype=['int32', 'int32', 'float32'], header=None)
    >>> G = cugraph.Graph()
    >>> G.from_cudf_edgelist(gdf, source='0', destination='1')
    >>> ebc = cugraph.edge_betweenness_centrality(G)
    """

    if weight is not None:
        raise NotImplementedError("weighted implementation of betweenness "
                                  "centrality not currently supported")
    if result_dtype not in [np.float32, np.float64]:
        raise TypeError("result type can only be np.float32 or np.float64")

    G, isNx = cugraph.utilities.check_nx_graph(G)
    vertices = _initialize_vertices(G, k, seed)

    df = edge_betweenness_centrality_wrapper.edge_betweenness_centrality(
        G, normalized, weight, vertices, result_dtype)

    if G.renumbered:
        df = G.unrenumber(df, "src")
        df = G.unrenumber(df, "dst")

    if type(G) is cugraph.Graph:
        lower_triangle = df['src'] >= df['dst']
        df[["src", "dst"]][lower_triangle] = df[["dst", "src"]][lower_triangle]
        df = df.groupby(by=["src", "dst"]).sum().reset_index()

    if isNx is True:
        return df_edge_score_to_dictionary(df, 'betweenness_centrality')
    else:
        return df