Пример #1
0
def test_dask_bfs(dask_client):
    gc.collect()

    input_data_path = (RAPIDS_DATASET_ROOT_DIR_PATH /
                       "netscience.csv").as_posix()

    print(f"dataset={input_data_path}")
    chunksize = dcg.get_chunksize(input_data_path)

    ddf = dask_cudf.read_csv(
        input_data_path,
        chunksize=chunksize,
        delimiter=" ",
        names=["src", "dst", "value"],
        dtype=["int32", "int32", "float32"],
    )

    def modify_dataset(df):
        temp_df = cudf.DataFrame()
        temp_df['src'] = df['src'] + 1000
        temp_df['dst'] = df['dst'] + 1000
        temp_df['value'] = df['value']
        return cudf.concat([df, temp_df])

    meta = ddf._meta
    ddf = ddf.map_partitions(modify_dataset, meta=meta)

    df = cudf.read_csv(
        input_data_path,
        delimiter=" ",
        names=["src", "dst", "value"],
        dtype=["int32", "int32", "float32"],
    )

    df = modify_dataset(df)

    g = cugraph.DiGraph()
    g.from_cudf_edgelist(df, "src", "dst")

    dg = cugraph.DiGraph()
    dg.from_dask_cudf_edgelist(ddf, "src", "dst")

    expected_dist = cugraph.bfs(g, [0, 1000])
    result_dist = dcg.bfs(dg, [0, 1000])
    result_dist = result_dist.compute()

    compare_dist = expected_dist.merge(result_dist,
                                       on="vertex",
                                       suffixes=["_local", "_dask"])

    err = 0

    for i in range(len(compare_dist)):
        if (compare_dist["distance_local"].iloc[i] !=
                compare_dist["distance_dask"].iloc[i]):
            err = err + 1
    assert err == 0
Пример #2
0
def test_dask_bfs_multi_column_depthlimit(dask_client):
    gc.collect()

    input_data_path = (RAPIDS_DATASET_ROOT_DIR_PATH /
                       "netscience.csv").as_posix()
    print(f"dataset={input_data_path}")
    chunksize = dcg.get_chunksize(input_data_path)

    ddf = dask_cudf.read_csv(
        input_data_path,
        chunksize=chunksize,
        delimiter=" ",
        names=["src_a", "dst_a", "value"],
        dtype=["int32", "int32", "float32"],
    )
    ddf['src_b'] = ddf['src_a'] + 1000
    ddf['dst_b'] = ddf['dst_a'] + 1000

    df = cudf.read_csv(
        input_data_path,
        delimiter=" ",
        names=["src_a", "dst_a", "value"],
        dtype=["int32", "int32", "float32"],
    )
    df['src_b'] = df['src_a'] + 1000
    df['dst_b'] = df['dst_a'] + 1000

    g = cugraph.DiGraph()
    g.from_cudf_edgelist(df, ["src_a", "src_b"], ["dst_a", "dst_b"])

    dg = cugraph.DiGraph()
    dg.from_dask_cudf_edgelist(ddf, ["src_a", "src_b"], ["dst_a", "dst_b"])

    start = cudf.DataFrame()
    start['a'] = [0]
    start['b'] = [1000]

    depth_limit = 18
    expected_dist = cugraph.bfs(g, start, depth_limit=depth_limit)
    result_dist = dcg.bfs(dg, start, depth_limit=depth_limit)
    result_dist = result_dist.compute()

    compare_dist = expected_dist.merge(result_dist,
                                       on=["0_vertex", "1_vertex"],
                                       suffixes=["_local", "_dask"])

    err = 0
    for i in range(len(compare_dist)):
        if (compare_dist["distance_local"].iloc[i] <= depth_limit
                and compare_dist["distance_dask"].iloc[i] <= depth_limit
                and compare_dist["distance_local"].iloc[i] !=
                compare_dist["distance_dask"].iloc[i]):
            err = err + 1
    assert err == 0
Пример #3
0
def test_dask_bfs(client_connection):
    gc.collect()

    # FIXME: update this to allow dataset to be parameterized and have dataset
    # part of test param id (see other tests)
    input_data_path = r"../datasets/netscience.csv"
    print(f"dataset={input_data_path}")
    chunksize = dcg.get_chunksize(input_data_path)

    ddf = dask_cudf.read_csv(
        input_data_path,
        chunksize=chunksize,
        delimiter=" ",
        names=["src", "dst", "value"],
        dtype=["int32", "int32", "float32"],
    )

    df = cudf.read_csv(
        input_data_path,
        delimiter=" ",
        names=["src", "dst", "value"],
        dtype=["int32", "int32", "float32"],
    )

    g = cugraph.DiGraph()
    g.from_cudf_edgelist(df, "src", "dst", renumber=True)

    dg = cugraph.DiGraph()
    dg.from_dask_cudf_edgelist(ddf, "src", "dst")

    expected_dist = cugraph.bfs(g, 0)
    result_dist = dcg.bfs(dg, 0, True)
    result_dist = result_dist.compute()

    compare_dist = expected_dist.merge(result_dist,
                                       on="vertex",
                                       suffixes=["_local", "_dask"])

    err = 0

    for i in range(len(compare_dist)):
        if (compare_dist["distance_local"].iloc[i] !=
                compare_dist["distance_dask"].iloc[i]):
            err = err + 1
    assert err == 0
Пример #4
0
def test_dask_bfs():
    gc.collect()
    cluster = LocalCUDACluster()
    client = Client(cluster)
    Comms.initialize()

    input_data_path = r"../datasets/netscience.csv"
    chunksize = dcg.get_chunksize(input_data_path)

    ddf = dask_cudf.read_csv(input_data_path,
                             chunksize=chunksize,
                             delimiter=' ',
                             names=['src', 'dst', 'value'],
                             dtype=['int32', 'int32', 'float32'])

    df = cudf.read_csv(input_data_path,
                       delimiter=' ',
                       names=['src', 'dst', 'value'],
                       dtype=['int32', 'int32', 'float32'])

    g = cugraph.DiGraph()
    g.from_cudf_edgelist(df, 'src', 'dst', renumber=True)

    dg = cugraph.DiGraph()
    dg.from_dask_cudf_edgelist(ddf, renumber=True)

    expected_dist = cugraph.bfs(g, 0)
    result_dist = dcg.bfs(dg, 0, True)

    compare_dist = expected_dist.merge(result_dist,
                                       on="vertex",
                                       suffixes=['_local', '_dask'])

    err = 0

    for i in range(len(compare_dist)):
        if (compare_dist['distance_local'].iloc[i] !=
                compare_dist['distance_dask'].iloc[i]):
            err = err + 1
    assert err == 0

    Comms.destroy()
    client.close()
    cluster.close()