def test_mg_edge_betweenness_centrality(
    graph_file,
    directed,
    subset_size,
    normalized,
    weight,
    subset_seed,
    result_dtype,
    mg_device_count,
):
    prepare_test()
    skip_if_not_enough_devices(mg_device_count)
    with MGContext(number_of_devices=mg_device_count, p2p=True):
        sorted_df = calc_edge_betweenness_centrality(
            graph_file,
            directed=directed,
            normalized=normalized,
            k=subset_size,
            weight=weight,
            seed=subset_seed,
            result_dtype=result_dtype,
            multi_gpu_batch=True,
        )
    compare_scores(
        sorted_df,
        first_key="cu_bc",
        second_key="ref_bc",
        epsilon=DEFAULT_EPSILON,
    )
Пример #2
0
def test_enable_batch_adjlist_replication_weights(graph_file, directed,
                                                  mg_device_count):
    skip_if_not_enough_devices(mg_device_count)
    df = cudf.read_csv(graph_file,
                       delimiter=' ',
                       names=['src', 'dst', 'value'],
                       dtype=['int32', 'int32', 'float32'])
    G = cugraph.DiGraph() if directed else cugraph.Graph()
    G.from_cudf_edgelist(df,
                         source='src',
                         destination='dst',
                         edge_attr='value')
    with MGContext(mg_device_count):
        G.enable_batch()
        G.view_adj_list()
        adjlist = G.adjlist
        offsets = adjlist.offsets
        indices = adjlist.indices
        weights = adjlist.weights
        for worker in G.batch_adjlists:
            (rep_offsets, rep_indices, rep_weights) = G.batch_adjlists[worker]
            assert offsets.equals(rep_offsets.result()), "Replication of " \
                "adjlist offsets failed"
            assert indices.equals(rep_indices.result()), "Replication of " \
                "adjlist indices failed"
            assert weights.equals(rep_weights.result()), "Replication of " \
                "adjlist weights failed"
Пример #3
0
def test_enable_batch_adjlist_replication_no_weights(graph_file, directed,
                                                     mg_device_count):
    gc.collect()
    skip_if_not_enough_devices(mg_device_count)
    df = cudf.read_csv(
        graph_file,
        delimiter=" ",
        names=["src", "dst"],
        dtype=["int32", "int32"],
    )
    G = cugraph.DiGraph() if directed else cugraph.Graph()
    G.from_cudf_edgelist(df, source="src", destination="dst")
    with MGContext(number_of_devices=mg_device_count, p2p=True):
        G.enable_batch()
        G.view_adj_list()
        adjlist = G.adjlist
        offsets = adjlist.offsets
        indices = adjlist.indices
        weights = adjlist.weights
        for worker in G.batch_adjlists:
            (rep_offsets, rep_indices, rep_weights) = G.batch_adjlists[worker]
            assert offsets.equals(
                rep_offsets.result()), ("Replication of "
                                        "adjlist offsets failed")
            assert indices.equals(
                rep_indices.result()), ("Replication of "
                                        "adjlist indices failed")
            assert weights is None and rep_weights is None
Пример #4
0
def test_enable_batch_edgelist_replication(graph_file, directed,
                                           mg_device_count):
    skip_if_not_enough_devices(mg_device_count)
    G = utils.generate_cugraph_graph_from_file(graph_file, directed)
    with MGContext(mg_device_count):
        G.enable_batch()
        df = G.edgelist.edgelist_df
        for worker in G.batch_edgelists:
            replicated_df = G.batch_edgelists[worker].result()
            assert df.equals(replicated_df), "Replication of edgelist failed"
Пример #5
0
def test_replicate_cudf_dataframe_no_weights(input_data_path, mg_device_count):
    skip_if_not_enough_devices(mg_device_count)
    df = cudf.read_csv(input_data_path,
                       delimiter=' ',
                       names=['src', 'dst'],
                       dtype=['int32', 'int32'])
    with MGContext(mg_device_count):
        worker_to_futures = replication.replicate_cudf_dataframe(df)
        for worker in worker_to_futures:
            replicated_df = worker_to_futures[worker].result()
            assert df.equals(replicated_df), "There is a mismatch in one " \
                "of the replications"
Пример #6
0
def test_enable_batch_context_no_context_views(graph_file, directed,
                                               mg_device_count):
    skip_if_not_enough_devices(mg_device_count)
    G = utils.generate_cugraph_graph_from_file(graph_file, directed)
    with MGContext(mg_device_count):
        assert G.batch_enabled is False, "Internal property should be False"
        G.enable_batch()
        assert G.batch_enabled is True, "Internal property should be True"
        assert G.batch_edgelists is not None, "The graph should have " \
                                              "been created with an "  \
                                              "edgelist"
    G.view_edge_list()
    G.view_adj_list()
    G.view_transposed_adj_list()
Пример #7
0
def test_replicate_cudf_dataframe_no_weights(input_data_path, mg_device_count):
    gc.collect()
    skip_if_not_enough_devices(mg_device_count)
    df = cudf.read_csv(
        input_data_path,
        delimiter=" ",
        names=["src", "dst"],
        dtype=["int32", "int32"],
    )
    with MGContext(number_of_devices=mg_device_count, p2p=True):
        worker_to_futures = replication.replicate_cudf_dataframe(df)
        for worker in worker_to_futures:
            replicated_df = worker_to_futures[worker].result()
            assert df.equals(replicated_df), ("There is a mismatch in one "
                                              "of the replications")
Пример #8
0
def test_replicate_cudf_series(input_data_path, mg_device_count):
    skip_if_not_enough_devices(mg_device_count)
    df = cudf.read_csv(input_data_path,
                       delimiter=' ',
                       names=['src', 'dst', 'value'],
                       dtype=['int32', 'int32', 'float32'])
    with MGContext(mg_device_count):
        for column in df.columns.values:
            series = df[column]
            worker_to_futures = replication.replicate_cudf_series(series)
            for worker in worker_to_futures:
                replicated_series = worker_to_futures[worker].result()
                assert series.equals(replicated_series), "There is a " \
                    "mismatch in one of the replications"
            # FIXME: If we do not clear this dictionary, when comparing
            # results for the 2nd column, one of the workers still
            # has a value from the 1st column
            worker_to_futures = {}
Пример #9
0
def test_enable_batch_context_then_views(graph_file, directed,
                                         mg_device_count):
    gc.collect()
    skip_if_not_enough_devices(mg_device_count)
    G = utils.generate_cugraph_graph_from_file(graph_file, directed)
    with MGContext(number_of_devices=mg_device_count, p2p=True):
        assert G.batch_enabled is False, "Internal property should be False"
        G.enable_batch()
        assert G.batch_enabled is True, "Internal property should be True"
        assert G.batch_edgelists is not None, ("The graph should have "
                                               "been created with an "
                                               "edgelist")
        assert G.batch_adjlists is None
        G.view_adj_list()
        assert G.batch_adjlists is not None

        assert G.batch_transposed_adjlists is None
        G.view_transposed_adj_list()
        assert G.batch_transposed_adjlists is not None
Пример #10
0
def test_replicate_cudf_series(input_data_path, mg_device_count):
    gc.collect()
    skip_if_not_enough_devices(mg_device_count)
    df = cudf.read_csv(
        input_data_path,
        delimiter=" ",
        names=["src", "dst", "value"],
        dtype=["int32", "int32", "float32"],
    )
    with MGContext(number_of_devices=mg_device_count, p2p=True):
        for column in df.columns.values:
            series = df[column]
            worker_to_futures = replication.replicate_cudf_series(series)
            for worker in worker_to_futures:
                replicated_series = worker_to_futures[worker].result()
                assert series.equals(replicated_series), (
                    "There is a "
                    "mismatch in one of the replications")
            # FIXME: If we do not clear this dictionary, when comparing
            # results for the 2nd column, one of the workers still
            # has a value from the 1st column
            worker_to_futures = {}