Пример #1
0
def test_dask_mg_degree(client_connection):
    gc.collect()

    input_data_path = r"../datasets/karate.csv"

    chunksize = cugraph.dask.get_chunksize(input_data_path)

    ddf = dask_cudf.read_csv(
        input_data_path,
        chunksize=chunksize,
        delimiter=" ",
        names=["src", "dst", "value"],
        dtype=["int32", "int32", "float32"],
    )

    df = cudf.read_csv(
        input_data_path,
        delimiter=" ",
        names=["src", "dst", "value"],
        dtype=["int32", "int32", "float32"],
    )

    dg = cugraph.DiGraph()
    dg.from_dask_cudf_edgelist(ddf, "src", "dst")

    g = cugraph.DiGraph()
    g.from_cudf_edgelist(df, "src", "dst")

    merge_df = (dg.in_degree().merge(g.in_degree(),
                                     on="vertex",
                                     suffixes=["_dg", "_g"]).compute())

    assert merge_df["degree_dg"].equals(merge_df["degree_g"])
Пример #2
0
def test_dask_mg_degree(client_connection):
    gc.collect()

    input_data_path = r"../datasets/karate.csv"

    chunksize = cugraph.dask.get_chunksize(input_data_path)

    ddf = dask_cudf.read_csv(input_data_path,
                             chunksize=chunksize,
                             delimiter=' ',
                             names=['src', 'dst', 'value'],
                             dtype=['int32', 'int32', 'float32'])

    df = cudf.read_csv(input_data_path,
                       delimiter=' ',
                       names=['src', 'dst', 'value'],
                       dtype=['int32', 'int32', 'float32'])

    dg = cugraph.DiGraph()
    dg.from_dask_cudf_edgelist(ddf, 'src', 'dst')

    g = cugraph.DiGraph()
    g.from_cudf_edgelist(df, 'src', 'dst')

    merge_df = dg.in_degree().merge(g.in_degree(),
                                    on="vertex",
                                    suffixes=['_dg', '_g']).compute()

    assert merge_df['degree_dg'].equals(merge_df['degree_g'])
Пример #3
0
def test_katz_centrality_multi_column(graph_file):
    gc.collect()

    cu_M = utils.read_csv_file(graph_file)
    cu_M.rename(columns={'0': 'src_0', '1': 'dst_0'}, inplace=True)
    cu_M['src_1'] = cu_M['src_0'] + 1000
    cu_M['dst_1'] = cu_M['dst_0'] + 1000

    G1 = cugraph.DiGraph()
    G1.from_cudf_edgelist(cu_M, source=["src_0", "src_1"],
                          destination=["dst_0", "dst_1"])

    G2 = cugraph.DiGraph()
    G2.from_cudf_edgelist(cu_M, source="src_0", destination="dst_0")

    k_df_exp = cugraph.katz_centrality(G2, alpha=None, max_iter=1000)
    k_df_exp = k_df_exp.sort_values("vertex").reset_index(drop=True)

    nstart = cudf.DataFrame()
    nstart['vertex_0'] = k_df_exp['vertex']
    nstart['vertex_1'] = nstart['vertex_0'] + 1000
    nstart['values'] = k_df_exp['katz_centrality']

    k_df_res = cugraph.katz_centrality(G1, nstart=nstart,
                                       alpha=None, max_iter=1000)
    k_df_res = k_df_res.sort_values("0_vertex").reset_index(drop=True)
    k_df_res.rename(columns={'0_vertex': 'vertex'}, inplace=True)

    top_res = topKVertices(k_df_res, "katz_centrality", 10)
    top_exp = topKVertices(k_df_exp, "katz_centrality", 10)

    assert top_res.equals(top_exp)
def test_cugraph_edge_map_to_cugraph_edge_set():
    """
               +-+
     ------>   |1|
     |         +-+
     |
     |          |
     9          6
     |          |
     |          v

    +-+  <-8-  +-+        +-+
    |0|        |2|  <-5-  |3|
    +-+  -7->  +-+        +-+"""
    dpr = mg.resolver
    sources = [0, 0, 1, 2, 3]
    destinations = [1, 2, 2, 0, 2]
    weights = [9, 7, 6, 8, 5]
    gdf = cudf.DataFrame(
        {"Source": sources, "Destination": destinations, "Weight": weights}
    )

    g_x = cugraph.DiGraph()
    g_x.from_cudf_edgelist(
        gdf, source="Source", destination="Destination", edge_attr="Weight"
    )
    x = dpr.wrappers.EdgeMap.CuGraphEdgeMap(g_x)

    g_intermediate = cugraph.DiGraph()
    g_intermediate.from_cudf_edgelist(gdf, source="Source", destination="Destination")
    intermediate = dpr.wrappers.EdgeSet.CuGraphEdgeSet(g_intermediate)

    y = dpr.translate(x, CuGraphEdgeSet)
    dpr.assert_equal(y, intermediate)
    assert len(dpr.plan.translate(x, CuGraphEdgeSet)) == 1
Пример #5
0
def test_multi_column_unrenumbering(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(managed_memory=managed,
                     pool_allocator=pool,
                     initial_pool_size=2 << 27)

    assert (rmm.is_initialized())

    translate = 100
    cu_M = utils.read_csv_file(graph_file)
    cu_M['00'] = cu_M['0'] + translate
    cu_M['11'] = cu_M['1'] + translate

    G = cugraph.DiGraph()
    G.from_cudf_edgelist(cu_M, ['0', '00'], ['1', '11'])
    result_multi = cugraph.pagerank(G).sort_values(by='0').\
        reset_index(drop=True)

    G = cugraph.DiGraph()
    G.from_cudf_edgelist(cu_M, '0', '1')
    result_single = cugraph.pagerank(G)

    result_exp = cudf.DataFrame()
    result_exp['0'] = result_single['vertex']
    result_exp['1'] = result_single['vertex'] + translate
    result_exp['pagerank'] = result_single['pagerank']

    assert result_multi.equals(result_exp)
Пример #6
0
def test_modularity_clustering_with_edgevals(graph_file, partitions):
    # Read in the graph and get a cugraph object
    M = utils.read_csv_for_nx(graph_file, read_weights_in_sp=False)
    M = M.tocsr().sorted_indices()
    cu_M = utils.read_csv_file(graph_file, read_weights_in_sp=False)

    row_offsets = cudf.Series(M.indptr)
    col_indices = cudf.Series(M.indices)
    val = cudf.Series(M.data)

    G_adj = cugraph.DiGraph()
    G_adj.from_cudf_adjlist(row_offsets, col_indices, val)

    G_edge = cugraph.DiGraph()
    G_edge.from_cudf_edgelist(cu_M, source='0', target='1', edge_attr='2')

    # Get the modularity score for partitioning versus random assignment
    cu_vid, cu_score = cugraph_call(G_adj, partitions)
    rand_vid, rand_score = random_call(G_adj, partitions)

    # Assert that the partitioning has better modularity than the random
    # assignment
    assert cu_score < rand_score

    # Get the modularity score for partitioning versus random assignment
    cu_vid, cu_score = cugraph_call(G_edge, partitions)
    rand_vid, rand_score = random_call(G_edge, partitions)

    # Assert that the partitioning has better modularity than the random
    # assignment
    assert cu_score < rand_score
Пример #7
0
def test_dask_katz_centrality(client_connection):
    gc.collect()

    input_data_path = r"../datasets/karate.csv"
    chunksize = dcg.get_chunksize(input_data_path)

    ddf = dask_cudf.read_csv(
        input_data_path,
        chunksize=chunksize,
        delimiter=" ",
        names=["src", "dst", "value"],
        dtype=["int32", "int32", "float32"],
    )

    df = cudf.read_csv(
        input_data_path,
        delimiter=" ",
        names=["src", "dst", "value"],
        dtype=["int32", "int32", "float32"],
    )

    g = cugraph.DiGraph()
    g.from_cudf_edgelist(df, "src", "dst")

    dg = cugraph.DiGraph()
    dg.from_dask_cudf_edgelist(ddf, "src", "dst")

    largest_out_degree = g.degrees().nlargest(n=1, columns="out_degree")
    largest_out_degree = largest_out_degree["out_degree"].iloc[0]
    katz_alpha = 1 / (largest_out_degree + 1)

    mg_res = dcg.katz_centrality(dg, alpha=katz_alpha, tol=1e-6)
    mg_res = mg_res.compute()

    import networkx as nx
    from cugraph.tests import utils
    NM = utils.read_csv_for_nx(input_data_path)
    Gnx = nx.from_pandas_edgelist(
        NM, create_using=nx.DiGraph(), source="0", target="1"
    )
    nk = nx.katz_centrality(Gnx, alpha=katz_alpha)
    import pandas as pd
    pdf = pd.DataFrame(nk.items(), columns=['vertex', 'katz_centrality'])
    exp_res = cudf.DataFrame(pdf)
    err = 0
    tol = 1.0e-05

    compare_res = exp_res.merge(
        mg_res, on="vertex", suffixes=["_local", "_dask"]
    )

    for i in range(len(compare_res)):
        diff = abs(
            compare_res["katz_centrality_local"].iloc[i]
            - compare_res["katz_centrality_dask"].iloc[i]
        )
        if diff > tol * 1.1:
            err = err + 1
    assert err == 0
Пример #8
0
def test_dask_bfs(dask_client):
    gc.collect()

    input_data_path = (RAPIDS_DATASET_ROOT_DIR_PATH /
                       "netscience.csv").as_posix()

    print(f"dataset={input_data_path}")
    chunksize = dcg.get_chunksize(input_data_path)

    ddf = dask_cudf.read_csv(
        input_data_path,
        chunksize=chunksize,
        delimiter=" ",
        names=["src", "dst", "value"],
        dtype=["int32", "int32", "float32"],
    )

    def modify_dataset(df):
        temp_df = cudf.DataFrame()
        temp_df['src'] = df['src'] + 1000
        temp_df['dst'] = df['dst'] + 1000
        temp_df['value'] = df['value']
        return cudf.concat([df, temp_df])

    meta = ddf._meta
    ddf = ddf.map_partitions(modify_dataset, meta=meta)

    df = cudf.read_csv(
        input_data_path,
        delimiter=" ",
        names=["src", "dst", "value"],
        dtype=["int32", "int32", "float32"],
    )

    df = modify_dataset(df)

    g = cugraph.DiGraph()
    g.from_cudf_edgelist(df, "src", "dst")

    dg = cugraph.DiGraph()
    dg.from_dask_cudf_edgelist(ddf, "src", "dst")

    expected_dist = cugraph.bfs(g, [0, 1000])
    result_dist = dcg.bfs(dg, [0, 1000])
    result_dist = result_dist.compute()

    compare_dist = expected_dist.merge(result_dist,
                                       on="vertex",
                                       suffixes=["_local", "_dask"])

    err = 0

    for i in range(len(compare_dist)):
        if (compare_dist["distance_local"].iloc[i] !=
                compare_dist["distance_dask"].iloc[i]):
            err = err + 1
    assert err == 0
Пример #9
0
def test_dask_pagerank(client_connection, personalization_perc):
    gc.collect()

    input_data_path = r"../datasets/karate.csv"
    chunksize = dcg.get_chunksize(input_data_path)

    ddf = dask_cudf.read_csv(
        input_data_path,
        chunksize=chunksize,
        delimiter=" ",
        names=["src", "dst", "value"],
        dtype=["int32", "int32", "float32"],
    )

    df = cudf.read_csv(
        input_data_path,
        delimiter=" ",
        names=["src", "dst", "value"],
        dtype=["int32", "int32", "float32"],
    )

    g = cugraph.DiGraph()
    g.from_cudf_edgelist(df, "src", "dst")

    dg = cugraph.DiGraph()
    dg.from_dask_cudf_edgelist(ddf, "src", "dst")

    # Pre compute local data and personalize
    personalization = None
    if personalization_perc != 0:
        dg.compute_local_data(by="dst")
        personalization = personalize(dg.number_of_vertices(),
                                      personalization_perc)

    expected_pr = cugraph.pagerank(g,
                                   personalization=personalization,
                                   tol=1e-6)
    result_pr = dcg.pagerank(dg, personalization=personalization, tol=1e-6)
    result_pr = result_pr.compute()

    err = 0
    tol = 1.0e-05

    assert len(expected_pr) == len(result_pr)

    compare_pr = expected_pr.merge(result_pr,
                                   on="vertex",
                                   suffixes=["_local", "_dask"])

    for i in range(len(compare_pr)):
        diff = abs(compare_pr["pagerank_local"].iloc[i] -
                   compare_pr["pagerank_dask"].iloc[i])
        if diff > tol * 1.1:
            err = err + 1
    assert err == 0
Пример #10
0
def test_dask_pagerank(client_connection, personalization_perc):
    gc.collect()

    # FIXME: update this to allow dataset to be parameterized and have dataset
    # part of test param id (see other tests)
    input_data_path = r"../datasets/karate.csv"
    print(f"dataset={input_data_path}")
    chunksize = dcg.get_chunksize(input_data_path)

    ddf = dask_cudf.read_csv(
        input_data_path,
        chunksize=chunksize,
        delimiter=" ",
        names=["src", "dst", "value"],
        dtype=["int32", "int32", "float32"],
    )

    df = cudf.read_csv(
        input_data_path,
        delimiter=" ",
        names=["src", "dst", "value"],
        dtype=["int32", "int32", "float32"],
    )

    g = cugraph.DiGraph()
    g.from_cudf_edgelist(df, "src", "dst")

    dg = cugraph.DiGraph()
    dg.from_dask_cudf_edgelist(ddf, "src", "dst")

    personalization = None
    if personalization_perc != 0:
        personalization, p = personalize(g.nodes(), personalization_perc)

    expected_pr = cugraph.pagerank(g,
                                   personalization=personalization,
                                   tol=1e-6)
    result_pr = dcg.pagerank(dg, personalization=personalization, tol=1e-6)
    result_pr = result_pr.compute()

    err = 0
    tol = 1.0e-05

    assert len(expected_pr) == len(result_pr)

    compare_pr = expected_pr.merge(result_pr,
                                   on="vertex",
                                   suffixes=["_local", "_dask"])

    for i in range(len(compare_pr)):
        diff = abs(compare_pr["pagerank_local"].iloc[i] -
                   compare_pr["pagerank_dask"].iloc[i])
        if diff > tol * 1.1:
            err = err + 1
    assert err == 0
Пример #11
0
def test_dask_bfs_multi_column_depthlimit(dask_client):
    gc.collect()

    input_data_path = (RAPIDS_DATASET_ROOT_DIR_PATH /
                       "netscience.csv").as_posix()
    print(f"dataset={input_data_path}")
    chunksize = dcg.get_chunksize(input_data_path)

    ddf = dask_cudf.read_csv(
        input_data_path,
        chunksize=chunksize,
        delimiter=" ",
        names=["src_a", "dst_a", "value"],
        dtype=["int32", "int32", "float32"],
    )
    ddf['src_b'] = ddf['src_a'] + 1000
    ddf['dst_b'] = ddf['dst_a'] + 1000

    df = cudf.read_csv(
        input_data_path,
        delimiter=" ",
        names=["src_a", "dst_a", "value"],
        dtype=["int32", "int32", "float32"],
    )
    df['src_b'] = df['src_a'] + 1000
    df['dst_b'] = df['dst_a'] + 1000

    g = cugraph.DiGraph()
    g.from_cudf_edgelist(df, ["src_a", "src_b"], ["dst_a", "dst_b"])

    dg = cugraph.DiGraph()
    dg.from_dask_cudf_edgelist(ddf, ["src_a", "src_b"], ["dst_a", "dst_b"])

    start = cudf.DataFrame()
    start['a'] = [0]
    start['b'] = [1000]

    depth_limit = 18
    expected_dist = cugraph.bfs(g, start, depth_limit=depth_limit)
    result_dist = dcg.bfs(dg, start, depth_limit=depth_limit)
    result_dist = result_dist.compute()

    compare_dist = expected_dist.merge(result_dist,
                                       on=["0_vertex", "1_vertex"],
                                       suffixes=["_local", "_dask"])

    err = 0
    for i in range(len(compare_dist)):
        if (compare_dist["distance_local"].iloc[i] <= depth_limit
                and compare_dist["distance_dask"].iloc[i] <= depth_limit
                and compare_dist["distance_local"].iloc[i] !=
                compare_dist["distance_dask"].iloc[i]):
            err = err + 1
    assert err == 0
Пример #12
0
def test_dask_pagerank(dask_client, personalization_perc):
    gc.collect()

    input_data_path = (RAPIDS_DATASET_ROOT_DIR_PATH / "karate.csv").as_posix()
    print(f"dataset={input_data_path}")
    chunksize = dcg.get_chunksize(input_data_path)

    ddf = dask_cudf.read_csv(
        input_data_path,
        chunksize=chunksize,
        delimiter=" ",
        names=["src", "dst", "value"],
        dtype=["int32", "int32", "float32"],
    )

    df = cudf.read_csv(
        input_data_path,
        delimiter=" ",
        names=["src", "dst", "value"],
        dtype=["int32", "int32", "float32"],
    )

    g = cugraph.DiGraph()
    g.from_cudf_edgelist(df, "src", "dst")

    dg = cugraph.DiGraph()
    dg.from_dask_cudf_edgelist(ddf, "src", "dst")

    personalization = None
    if personalization_perc != 0:
        personalization, p = personalize(g.nodes(), personalization_perc)

    expected_pr = cugraph.pagerank(g,
                                   personalization=personalization,
                                   tol=1e-6)
    result_pr = dcg.pagerank(dg, personalization=personalization, tol=1e-6)
    result_pr = result_pr.compute()

    err = 0
    tol = 1.0e-05

    assert len(expected_pr) == len(result_pr)

    compare_pr = expected_pr.merge(result_pr,
                                   on="vertex",
                                   suffixes=["_local", "_dask"])

    for i in range(len(compare_pr)):
        diff = abs(compare_pr["pagerank_local"].iloc[i] -
                   compare_pr["pagerank_dask"].iloc[i])
        if diff > tol * 1.1:
            err = err + 1
    assert err == 0
Пример #13
0
def test_dask_pagerank(client_connection):
    gc.collect()

    pandas.set_option("display.max_rows", 10000)

    input_data_path = r"../datasets/karate.csv"
    chunksize = dcg.get_chunksize(input_data_path)

    ddf = dask_cudf.read_csv(
        input_data_path,
        chunksize=chunksize,
        delimiter=" ",
        names=["src", "dst", "value"],
        dtype=["int32", "int32", "float32"],
    )

    df = cudf.read_csv(
        input_data_path,
        delimiter=" ",
        names=["src", "dst", "value"],
        dtype=["int32", "int32", "float32"],
    )

    g = cugraph.DiGraph()
    g.from_cudf_edgelist(df, "src", "dst")

    dg = cugraph.DiGraph()
    dg.from_dask_cudf_edgelist(ddf, "src", "dst")

    # Pre compute local data
    # dg.compute_local_data(by='dst')

    expected_pr = cugraph.pagerank(g)
    result_pr = dcg.pagerank(dg)

    err = 0
    tol = 1.0e-05

    assert len(expected_pr) == len(result_pr)

    compare_pr = expected_pr.merge(result_pr,
                                   on="vertex",
                                   suffixes=["_local", "_dask"])

    for i in range(len(compare_pr)):
        diff = abs(compare_pr["pagerank_local"].iloc[i] -
                   compare_pr["pagerank_dask"].iloc[i])
        if diff > tol * 1.1:
            err = err + 1
    print("Mismatches:", err)
    assert err == 0
Пример #14
0
def construct_graph(dask_dataframe, symmetric=False):
    """
    dask_dataframe contains weighted and undirected edges with self
    loops. Multiple edges will likely be present as well.  The returned Graph
    object must be symmetrized and have self loops removed.
    """

    G = cugraph.DiGraph()
    if len(dask_dataframe.columns) > 2:
        if symmetric:  #symmetrize dask dataframe
            dask_dataframe = symmetrize_ddf(dask_dataframe, 'src', 'dst',
                                            'weight')

        G.from_dask_cudf_edgelist(dask_dataframe,
                                  source="src",
                                  destination="dst",
                                  edge_attr="weight")
        #G.from_dask_cudf_edgelist(
        #    dask_dataframe, source="0", destination="1", edge_attr="2")
    else:
        if symmetric:  #symmetrize dask dataframe
            dask_dataframe = symmetrize_ddf(dask_dataframe, 'src', 'dst')
        G.from_dask_cudf_edgelist(dask_dataframe,
                                  source="src",
                                  destination="dst")

    return G
Пример #15
0
def test_to_undirected(graph_file):
    # Read data and then convert to directed by dropped some edges
    cu_M = utils.read_csv_file(graph_file)
    cu_M = cu_M[cu_M["0"] <= cu_M["1"]].reset_index(drop=True)

    M = utils.read_csv_for_nx(graph_file)
    M = M[M["0"] <= M["1"]]
    assert len(cu_M) == len(M)

    # cugraph add_edge_list
    DiG = cugraph.DiGraph()
    DiG.from_cudf_edgelist(cu_M, source="0", destination="1")

    DiGnx = nx.from_pandas_edgelist(
        M, source="0", target="1", create_using=nx.DiGraph()
    )

    for index, row in cu_M.to_pandas().iterrows():
        assert DiG.has_edge(row['0'], row['1'])
        assert not DiG.has_edge(row['1'], row['0'])

    G = DiG.to_undirected()
    Gnx = DiGnx.to_undirected()

    assert G.number_of_nodes() == Gnx.number_of_nodes()
    assert G.number_of_edges() == Gnx.number_of_edges()

    for index, row in cu_M.to_pandas().iterrows():
        assert G.has_edge(row['0'], row['1'])
        assert G.has_edge(row['1'], row['0'])
Пример #16
0
def test_add_edge_or_adj_list_after_add_edge_or_adj_list(graph_file):
    Mnx = utils.read_csv_for_nx(graph_file)
    df = cudf.DataFrame()
    df["src"] = cudf.Series(Mnx["0"])
    df["dst"] = cudf.Series(Mnx["1"])

    N = max(max(Mnx["0"]), max(Mnx["1"])) + 1
    Mcsr = scipy.sparse.csr_matrix(
        (Mnx.weight, (Mnx["0"], Mnx["1"])), shape=(N, N)
    )

    offsets = cudf.Series(Mcsr.indptr)
    indices = cudf.Series(Mcsr.indices)

    G = cugraph.DiGraph()

    # If cugraph has at least one graph representation, adding a new graph
    # should fail to prevent a single graph object storing two different
    # graphs.

    # If cugraph has a graph edge list, adding a new graph should fail.
    G.from_cudf_edgelist(df, source="src", destination="dst")
    with pytest.raises(Exception):
        G.from_cudf_edgelist(df, source="src", destination="dst")
    with pytest.raises(Exception):
        G.from_cudf_adjlist(offsets, indices, None)
    G.delete_edge_list()

    # If cugraph has a graph adjacency list, adding a new graph should fail.
    G.from_cudf_adjlist(offsets, indices, None)
    with pytest.raises(Exception):
        G.from_cudf_edgelist(df, source="src", destination="dst")
    with pytest.raises(Exception):
        G.from_cudf_adjlist(offsets, indices, None)
    G.delete_adj_list()
Пример #17
0
def test_filter_unreachable(graph_file, source):
    gc.collect()

    cu_M = utils.read_csv_file(graph_file)

    print("sources size = " + str(len(cu_M)))
    print("destinations size = " + str(len(cu_M)))

    # cugraph Pagerank Call
    G = cugraph.DiGraph()
    G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2")

    print("cugraph Solving... ")
    t1 = time.time()

    df = cugraph.sssp(G, source)

    t2 = time.time() - t1
    print("Time : " + str(t2))

    reachable_df = cugraph.filter_unreachable(df)

    if np.issubdtype(df["distance"].dtype, np.integer):
        inf = np.iinfo(reachable_df["distance"].dtype).max  # noqa: F841
        assert len(reachable_df.query("distance == @inf")) == 0
    elif np.issubdtype(df["distance"].dtype, np.inexact):
        inf = np.finfo(reachable_df["distance"].dtype).max  # noqa: F841
        assert len(reachable_df.query("distance == @inf")) == 0

    assert len(reachable_df) != 0
Пример #18
0
def test_unweighted_directed_networkx_to_cugraph():
    """
              +-+
     ------>  |1|
     |        +-+
     |
     |         |
     |         v

    +-+  <--  +-+       +-+
    |0|       |2|  <--  |3|
    +-+  -->  +-+       +-+"""
    dpr = mg.resolver
    networkx_graph_data = [
        (0, 1),
        (0, 2),
        (2, 0),
        (1, 2),
        (3, 2),
    ]
    networkx_graph_unwrapped = nx.DiGraph()
    networkx_graph_unwrapped.add_edges_from(networkx_graph_data)
    x = dpr.wrappers.Graph.NetworkXGraph(networkx_graph_unwrapped)

    sources = [0, 0, 1, 2, 3]
    destinations = [1, 2, 2, 0, 2]
    cdf = cudf.DataFrame({"source": sources, "destination": destinations})
    g = cugraph.DiGraph()
    g.from_cudf_edgelist(cdf, source="source", destination="destination")
    intermediate = dpr.wrappers.Graph.CuGraph(g, None)
    y = dpr.translate(x, CuGraph)
    dpr.assert_equal(y, intermediate)
    assert len(dpr.plan.translate(x, CuGraph)) == 1
Пример #19
0
def test_pandas_edge_set_to_cugraph_edge_set():
    """
              +-+
     ------>  |1|
     |        +-+
     |
     |         |
     |         v

    +-+  <--  +-+       +-+
    |0|       |2|  <--  |3|
    +-+  -->  +-+       +-+"""
    dpr = mg.resolver
    pdf = pd.DataFrame({"src": (0, 0, 2, 1, 3), "dst": (1, 2, 0, 2, 2)})
    x = dpr.wrappers.EdgeSet.PandasEdgeSet(pdf,
                                           src_label="src",
                                           dst_label="dst",
                                           is_directed=True)

    sources = [0, 0, 1, 2, 3]
    destinations = [1, 2, 2, 0, 2]
    cdf = cudf.DataFrame({"source": sources, "destination": destinations})
    g = cugraph.DiGraph()
    g.from_cudf_edgelist(cdf, source="source", destination="destination")
    intermediate = dpr.wrappers.EdgeSet.CuGraphEdgeSet(g)
    y = dpr.translate(x, CuGraphEdgeSet)
    dpr.assert_equal(y, intermediate)
    assert len(dpr.plan.translate(x, CuGraphEdgeSet)) == 1
Пример #20
0
def test_scipy_edge_set_to_cugraph_edge_set():
    """
              +-+
     ------>  |1|
     |        +-+
     |
     |         |
     |         v

    +-+  <--  +-+       +-+
    |0|       |2|  <--  |3|
    +-+  -->  +-+       +-+"""
    dpr = mg.resolver
    scipy_sparse_matrix = ss.csr_matrix(
        np.array([
            [0, 1, 1, 0],
            [0, 0, 1, 0],
            [1, 0, 0, 0],
            [0, 0, 1, 0],
        ]))
    x = dpr.wrappers.EdgeSet.ScipyEdgeSet(scipy_sparse_matrix)

    sources = [0, 0, 1, 2, 3]
    destinations = [1, 2, 2, 0, 2]
    cdf = cudf.DataFrame({"Source": sources, "Destination": destinations})
    g = cugraph.DiGraph()
    g.from_cudf_edgelist(cdf, source="Source", destination="Destination")
    intermediate = dpr.wrappers.EdgeSet.CuGraphEdgeSet(g)

    y = dpr.translate(x, CuGraphEdgeSet)
    dpr.assert_equal(y, intermediate)
    assert len(dpr.plan.translate(x, CuGraphEdgeSet)) == 1
Пример #21
0
def cugraph_call(cu_M, source, edgevals=False):

    G = cugraph.DiGraph()
    if edgevals is True:
        G.from_cudf_edgelist(cu_M, source='0', destination='1', edge_attr='2')
    else:
        G.from_cudf_edgelist(cu_M, source='0', destination='1')
    print('sources size = ' + str(len(cu_M['0'])))
    print('destinations size = ' + str(len(cu_M['1'])))

    print('cugraph Solving... ')
    t1 = time.time()

    df = cugraph.sssp(G, source)

    t2 = time.time() - t1
    print('Cugraph Time : ' + str(t2))

    if (np.issubdtype(df['distance'].dtype, np.integer)):
        max_val = np.iinfo(df['distance'].dtype).max
    else:
        max_val = np.finfo(df['distance'].dtype).max

    verts_np = df['vertex'].to_array()
    dist_np = df['distance'].to_array()
    pred_np = df['predecessor'].to_array()
    result = dict(zip(verts_np, zip(dist_np, pred_np)))
    return result, max_val
Пример #22
0
def cugraph_call(cu_M, max_iter, tol, alpha, personalization, nstart):
    # cugraph Pagerank Call
    G = cugraph.DiGraph()
    G.from_cudf_edgelist(cu_M, source="0", destination="1")
    t1 = time.time()
    df = cugraph.pagerank(
        G,
        alpha=alpha,
        max_iter=max_iter,
        tol=tol,
        personalization=personalization,
        nstart=nstart,
    )
    t2 = time.time() - t1
    print("Cugraph Time : " + str(t2))

    # Sort Pagerank values
    sorted_pr = []

    df = df.sort_values("vertex").reset_index(drop=True)

    pr_scores = df["pagerank"].to_array()
    for i, rank in enumerate(pr_scores):
        sorted_pr.append((i, rank))

    return sorted_pr
Пример #23
0
def test_compute_local_data(client_connection):

    gc.collect()

    input_data_path = r"../datasets/karate.csv"
    chunksize = dcg.get_chunksize(input_data_path)
    ddf = dask_cudf.read_csv(
        input_data_path,
        chunksize=chunksize,
        delimiter=" ",
        names=["src", "dst", "value"],
        dtype=["int32", "int32", "float32"],
    )

    dg = cugraph.DiGraph()
    dg.from_dask_cudf_edgelist(ddf,
                               source="src",
                               destination="dst",
                               edge_attr="value")

    # Compute_local_data
    dg.compute_local_data(by="dst")
    data = dg.local_data["data"]
    by = dg.local_data["by"]

    assert by == "dst"
    assert Comms.is_initialized()

    global_num_edges = data.local_data["edges"].sum()
    assert global_num_edges == dg.number_of_edges()
    global_num_verts = data.local_data["verts"].sum()
    assert global_num_verts == dg.number_of_nodes()
Пример #24
0
def test_to_undirected(graph_file):
    gc.collect()

    cu_M = utils.read_csv_file(graph_file)
    cu_M = cu_M[cu_M["0"] <= cu_M["1"]].reset_index(drop=True)
    M = utils.read_csv_for_nx(graph_file)
    M = M[M["0"] <= M["1"]]
    assert len(cu_M) == len(M)

    # cugraph add_edge_list
    DiG = cugraph.DiGraph()
    DiG.from_cudf_edgelist(cu_M, source="0", destination="1")
    DiGnx = nx.from_pandas_edgelist(M,
                                    source="0",
                                    target="1",
                                    create_using=nx.DiGraph())

    G = DiG.to_undirected()
    Gnx = DiGnx.to_undirected()

    assert G.number_of_nodes() == Gnx.number_of_nodes()
    assert G.number_of_edges() == Gnx.number_of_edges()

    edgelist_df = G.edgelist.edgelist_df

    for i in range(len(edgelist_df)):
        assert Gnx.has_edge(edgelist_df.iloc[i]["src"],
                            edgelist_df.iloc[i]["dst"])
Пример #25
0
def test_delete_edge_list_delete_adj_list(graph_file):
    gc.collect()

    Mnx = utils.read_csv_for_nx(graph_file)
    df = cudf.DataFrame()
    df["src"] = cudf.Series(Mnx["0"])
    df["dst"] = cudf.Series(Mnx["1"])

    N = max(max(Mnx["0"]), max(Mnx["1"])) + 1
    Mcsr = scipy.sparse.csr_matrix((Mnx.weight, (Mnx["0"], Mnx["1"])),
                                   shape=(N, N))
    offsets = cudf.Series(Mcsr.indptr)
    indices = cudf.Series(Mcsr.indices)

    # cugraph delete_adj_list delete_edge_list call
    G = cugraph.DiGraph()
    G.from_cudf_edgelist(df, source="src", destination="dst")
    G.delete_edge_list()
    with pytest.raises(Exception):
        G.view_adj_list()

    G.from_cudf_adjlist(offsets, indices, None)
    G.delete_adj_list()
    with pytest.raises(Exception):
        G.view_edge_list()
Пример #26
0
def test_degrees_functionality(managed, pool, graph_file):
    gc.collect()

    rmm.reinitialize(managed_memory=managed,
                     pool_allocator=pool,
                     initial_pool_size=2 << 27)

    assert (rmm.is_initialized())

    M = utils.read_csv_for_nx(graph_file)
    cu_M = utils.read_csv_file(graph_file)

    G = cugraph.DiGraph()
    G.from_cudf_edgelist(cu_M, source='0', target='1', edge_attr='2')

    Gnx = nx.DiGraph(M)

    df = G.degrees()

    nx_in_degree = Gnx.in_degree()
    nx_out_degree = Gnx.out_degree()

    err_in_degree = 0
    err_out_degree = 0

    for i in range(len(df)):
        if (df['in_degree'][i] != nx_in_degree[i]):
            err_in_degree = err_in_degree + 1
        if (df['out_degree'][i] != nx_out_degree[i]):
            err_out_degree = err_out_degree + 1

    assert err_in_degree == 0
    assert err_out_degree == 0
Пример #27
0
def test_degrees_functionality(graph_file):
    gc.collect()

    M = utils.read_csv_for_nx(graph_file)
    cu_M = utils.read_csv_file(graph_file)

    G = cugraph.DiGraph()
    G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2")

    Gnx = nx.from_pandas_edgelist(M,
                                  source="0",
                                  target="1",
                                  create_using=nx.DiGraph())

    df = G.degrees()

    nx_in_degree = Gnx.in_degree()
    nx_out_degree = Gnx.out_degree()

    err_in_degree = 0
    err_out_degree = 0

    for i in range(len(df)):
        if df["in_degree"][i] != nx_in_degree[df["vertex"][i]]:
            err_in_degree = err_in_degree + 1
        if df["out_degree"][i] != nx_out_degree[df["vertex"][i]]:
            err_out_degree = err_out_degree + 1

    assert err_in_degree == 0
    assert err_out_degree == 0
Пример #28
0
def test_from_edgelist(client_connection):
    # FIXME: update this to allow dataset to be parameterized and have dataset
    # part of test param id (see other tests)
    input_data_path = r"../datasets/karate.csv"
    print(f"dataset={input_data_path}")
    chunksize = dcg.get_chunksize(input_data_path)
    ddf = dask_cudf.read_csv(
        input_data_path,
        chunksize=chunksize,
        delimiter=" ",
        names=["src", "dst", "value"],
        dtype=["int32", "int32", "float32"],
    )

    dg1 = cugraph.from_edgelist(ddf,
                                source="src",
                                destination="dst",
                                edge_attr="value",
                                create_using=cugraph.DiGraph)

    dg2 = cugraph.DiGraph()
    dg2.from_dask_cudf_edgelist(ddf,
                                source="src",
                                destination="dst",
                                edge_attr="value")

    assert dg1.EdgeList == dg2.EdgeList
Пример #29
0
def test_Graph_from_MultiGraph(graph_file):
    # FIXME: Migrate to new test fixtures for Graph setup once available
    cuM = utils.read_csv_file(graph_file)
    GM = cugraph.MultiGraph()
    GM.from_cudf_edgelist(cuM, source="0", destination="1", edge_attr="2")
    nxM = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True)
    GnxM = nx.from_pandas_edgelist(
        nxM,
        source="0",
        target="1",
        edge_attr="weight",
        create_using=nx.MultiGraph(),
    )

    G = cugraph.Graph(GM)
    Gnx = nx.Graph(GnxM)
    assert Gnx.number_of_edges() == G.number_of_edges()

    GdM = cugraph.MultiDiGraph()
    GdM.from_cudf_edgelist(cuM, source="0", destination="1", edge_attr="2")
    GnxdM = nx.from_pandas_edgelist(
        nxM,
        source="0",
        target="1",
        edge_attr="weight",
        create_using=nx.MultiGraph(),
    )
    Gd = cugraph.DiGraph(GdM)
    Gnxd = nx.DiGraph(GnxdM)
    assert Gnxd.number_of_edges() == Gd.number_of_edges()
Пример #30
0
def test_from_edgelist(dask_client):
    input_data_path = (RAPIDS_DATASET_ROOT_DIR_PATH / "karate.csv").as_posix()
    print(f"dataset={input_data_path}")
    chunksize = dcg.get_chunksize(input_data_path)
    ddf = dask_cudf.read_csv(
        input_data_path,
        chunksize=chunksize,
        delimiter=" ",
        names=["src", "dst", "value"],
        dtype=["int32", "int32", "float32"],
    )

    dg1 = cugraph.from_edgelist(ddf,
                                source="src",
                                destination="dst",
                                edge_attr="value",
                                create_using=cugraph.DiGraph)

    dg2 = cugraph.DiGraph()
    dg2.from_dask_cudf_edgelist(ddf,
                                source="src",
                                destination="dst",
                                edge_attr="value")

    assert dg1.EdgeList == dg2.EdgeList