Пример #1
0
    def _replicate_adjlist(self):
        client = mg_utils.get_client()
        comms = Comms.get_comms()

        # FIXME: There  might be a better way to control it
        if client is None:
            return

        weights = None
        offsets_futures = replication.replicate_cudf_series(
            self.adjlist.offsets,
            client=client,
            comms=comms)
        indices_futures = replication.replicate_cudf_series(
            self.adjlist.indices,
            client=client,
            comms=comms)

        if self.adjlist.weights is not None:
            weights = replication.replicate_cudf_series(self.adjlist.weights)
        else:
            weights = {worker: None for worker in offsets_futures}

        merged_futures = {worker: [offsets_futures[worker],
                                   indices_futures[worker], weights[worker]]
                          for worker in offsets_futures}
        self.batch_adjlists = merged_futures
Пример #2
0
def test_replicate_cudf_series(input_data_path, mg_device_count):
    skip_if_not_enough_devices(mg_device_count)
    df = cudf.read_csv(input_data_path,
                       delimiter=' ',
                       names=['src', 'dst', 'value'],
                       dtype=['int32', 'int32', 'float32'])
    with MGContext(mg_device_count):
        for column in df.columns.values:
            series = df[column]
            worker_to_futures = replication.replicate_cudf_series(series)
            for worker in worker_to_futures:
                replicated_series = worker_to_futures[worker].result()
                assert series.equals(replicated_series), "There is a " \
                    "mismatch in one of the replications"
            # FIXME: If we do not clear this dictionary, when comparing
            # results for the 2nd column, one of the workers still
            # has a value from the 1st column
            worker_to_futures = {}
Пример #3
0
def test_replicate_cudf_series(input_data_path, dask_client):
    gc.collect()
    df = cudf.read_csv(
        input_data_path,
        delimiter=" ",
        names=["src", "dst", "value"],
        dtype=["int32", "int32", "float32"],
    )
    for column in df.columns.values:
        series = df[column]
        worker_to_futures = replication.replicate_cudf_series(series)
        for worker in worker_to_futures:
            replicated_series = worker_to_futures[worker].result()
            assert_series_equal(series, replicated_series, check_names=False)
        # FIXME: If we do not clear this dictionary, when comparing
        # results for the 2nd column, one of the workers still
        # has a value from the 1st column
        worker_to_futures = {}
Пример #4
0
def test_replicate_cudf_series(input_data_path, mg_device_count):
    gc.collect()
    skip_if_not_enough_devices(mg_device_count)
    df = cudf.read_csv(
        input_data_path,
        delimiter=" ",
        names=["src", "dst", "value"],
        dtype=["int32", "int32", "float32"],
    )
    with MGContext(number_of_devices=mg_device_count, p2p=True):
        for column in df.columns.values:
            series = df[column]
            worker_to_futures = replication.replicate_cudf_series(series)
            for worker in worker_to_futures:
                replicated_series = worker_to_futures[worker].result()
                assert series.equals(replicated_series), (
                    "There is a "
                    "mismatch in one of the replications")
            # FIXME: If we do not clear this dictionary, when comparing
            # results for the 2nd column, one of the workers still
            # has a value from the 1st column
            worker_to_futures = {}