示例#1
0
def from_cudf_edgelist(df,
                       source='source',
                       destination='destination',
                       edge_attr=None,
                       create_using=Graph,
                       renumber=True):
    """
    Return a new graph created from the edge list representaion. This function
    is added for NetworkX compatibility (this function is a RAPIDS version of
    NetworkX's from_pandas_edge_list()).  This function does not support
    multiple source or destination columns.  But does support renumbering

    Parameters
    ----------
    df : cudf.DataFrame
        This cudf.DataFrame contains columns storing edge source vertices,
        destination (or target following NetworkX's terminology) vertices, and
        (optional) weights.
    source : string or integer
        This is used to index the source column.
    destination : string or integer
        This is used to index the destination (or target following NetworkX's
        terminology) column.
    edge_attr : string or integer, optional
        This pointer can be ``None``. If not, this is used to index the weight
        column.
    create_using : cuGraph.Graph
        Specify the type of Graph to create.  Default is cugraph.Graph
    renumber : bool
        If source and destination indices are not in range 0 to V where V
        is number of vertices, renumber argument should be True.

    Examples
    --------
    >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ',
    >>>                   dtype=['int32', 'int32', 'float32'], header=None)
    >>> G = cugraph.Graph()
    >>> G = cugraph.from_cudf_edgelist(M, source='0', target='1', weight='2')

    """
    if create_using is Graph:
        G = Graph()
    elif create_using is DiGraph:
        G = DiGraph()
    else:
        raise Exception("create_using supports Graph and DiGraph")

    G.from_cudf_edgelist(df,
                         source=source,
                         destination=destination,
                         edge_attr=edge_attr,
                         renumber=renumber)

    return G
示例#2
0
def _minimum_spanning_tree_subgraph(G):
    mst_subgraph = Graph()
    if type(G) is not Graph:
        raise Exception("input graph must be undirected")
    mst_df = minimum_spanning_tree_wrapper.minimum_spanning_tree(G)
    if G.renumbered:
        mst_df = G.unrenumber(mst_df, "src")
        mst_df = G.unrenumber(mst_df, "dst")

    mst_subgraph.from_cudf_edgelist(mst_df,
                                    source="src",
                                    destination="dst",
                                    edge_attr="weight")
    return mst_subgraph
示例#3
0
def from_cudf_edgelist(df,
                       source='source',
                       destination='destination',
                       edge_attr=None,
                       create_using=Graph,
                       renumber=True):
    """
    Return a new graph created from the edge list representaion. This function
    is added for NetworkX compatibility (this function is a RAPIDS version of
    NetworkX's from_pandas_edge_list()).
    Parameters
    ----------
    df : cudf.DataFrame
        This cudf.DataFrame contains columns storing edge source vertices,
        destination (or target following NetworkX's terminology) vertices, and
        (optional) weights.
    source : string or integer
        This is used to index the source column.
    target : string or integer
        This is used to index the destination (or target following NetworkX's
        terminology) column.
    weight : string or integer, optional
        This pointer can be ``None``. If not, this is used to index the weight
        column.
    Examples
    --------
    >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ',
    >>>                   dtype=['int32', 'int32', 'float32'], header=None)
    >>> G = cugraph.Graph()
    >>> G = cugraph.from_cudf_edgelist(M, source='0', target='1', weight='2')
    """
    if create_using is Graph:
        G = Graph()
    elif create_using is DiGraph:
        G = DiGraph()
    else:
        raise Exception("create_using supports Graph and DiGraph")

    G.from_cudf_edgelist(df,
                         source=source,
                         destination=destination,
                         edge_attr=edge_attr,
                         renumber=renumber)

    return G
示例#4
0
def _maximum_spanning_tree_subgraph(G):
    mst_subgraph = Graph()
    if type(G) is not Graph:
        raise Exception("input graph must be undirected")

    if G.adjlist.weights is not None:
        G.adjlist.weights = G.adjlist.weights.mul(-1)

    mst_df = minimum_spanning_tree_wrapper.minimum_spanning_tree(G)

    # revert to original weights
    if G.adjlist.weights is not None:
        G.adjlist.weights = G.adjlist.weights.mul(-1)
        mst_df["weight"] = mst_df["weight"].mul(-1)

    if G.renumbered:
        mst_df = G.unrenumber(mst_df, "src")
        mst_df = G.unrenumber(mst_df, "dst")

    mst_subgraph.from_cudf_edgelist(mst_df,
                                    source="src",
                                    destination="dst",
                                    edge_attr="weight")
    return mst_subgraph
示例#5
0
def ktruss_subgraph(G, k, use_weights=True):
    """
    Returns the K-Truss subgraph of a graph for a specific k.

    The k-truss of a graph is a subgraph where each edge is part of at least
    (k−2) triangles. K-trusses are used for finding tighlty knit groups of
    vertices in a graph. A k-truss is a relaxation of a k-clique in the graph
    and was define in [1]. Finding cliques is computationally demanding and
    finding the maximal k-clique is known to be NP-Hard.

    In contrast, finding a k-truss is computationally tractable as its
    key building block, namely triangle counting counting, can be executed
    in polnymomial time.Typically, it takes many iterations of triangle
    counting to find the k-truss of a graph. Yet these iterations operate
    on a weakly monotonically shrinking graph.
    Therefore, finding the k-truss of a graph can be done in a fairly
    reasonable amount of time. The solution in cuGraph is based on a
    GPU algorithm first shown in [2] and uses the triangle counting algorithm
    from [3].

    [1] Cohen, J.,
    "Trusses: Cohesive subgraphs for social network analysis"
    National security agency technical report, 2008

    [2] O. Green, J. Fox, E. Kim, F. Busato, et al.
    “Quickly Finding a Truss in a Haystack”
    IEEE High Performance Extreme Computing Conference (HPEC), 2017
    https://doi.org/10.1109/HPEC.2017.8091038

    [3] O. Green, P. Yalamanchili, L.M. Munguia,
    “Fast Triangle Counting on GPU”
    Irregular Applications: Architectures and Algorithms (IA3), 2014


    Parameters
    ----------
    G : cuGraph.Graph
        cuGraph graph descriptor with connectivity information. k-Trusses are
        defined for only undirected graphs as they are defined for
        undirected triangle in a graph.

    k : int
        The desired k to be used for extracting the k-truss subgraph.

    use_weights : Bool
        whether the output should contain the edge weights if G has them

    Returns
    -------
    G_truss : cuGraph.Graph
        A cugraph graph descriptor with the k-truss subgraph for the given k.

    Examples
    --------
    >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ',
    >>>                   dtype=['int32', 'int32', 'float32'], header=None)
    >>> G = cugraph.Graph()
    >>> G.from_cudf_edgelist(M, source='0', destination='1')
    >>> k_subgraph = cugraph.ktruss_subgraph(G, 3)
    """

    KTrussSubgraph = Graph()
    if type(G) is not Graph:
        raise Exception("input graph must be undirected")

    subgraph_df = ktruss_subgraph_wrapper.ktruss_subgraph(G, k, use_weights)
    if G.renumbered:
        subgraph_df = G.unrenumber(subgraph_df, "src")
        subgraph_df = G.unrenumber(subgraph_df, "dst")

    if G.edgelist.weights:
        KTrussSubgraph.from_cudf_edgelist(
            subgraph_df, source="src", destination="dst", edge_attr="weight"
        )
    else:
        KTrussSubgraph.from_cudf_edgelist(
            subgraph_df, source="src", destination="dst"
        )

    return KTrussSubgraph