def _replicate_adjlist(self): client = mg_utils.get_client() comms = Comms.get_comms() # FIXME: There might be a better way to control it if client is None: return weights = None offsets_futures = replication.replicate_cudf_series( self.adjlist.offsets, client=client, comms=comms) indices_futures = replication.replicate_cudf_series( self.adjlist.indices, client=client, comms=comms) if self.adjlist.weights is not None: weights = replication.replicate_cudf_series(self.adjlist.weights) else: weights = {worker: None for worker in offsets_futures} merged_futures = {worker: [offsets_futures[worker], indices_futures[worker], weights[worker]] for worker in offsets_futures} self.batch_adjlists = merged_futures
def test_replicate_cudf_series(input_data_path, mg_device_count): skip_if_not_enough_devices(mg_device_count) df = cudf.read_csv(input_data_path, delimiter=' ', names=['src', 'dst', 'value'], dtype=['int32', 'int32', 'float32']) with MGContext(mg_device_count): for column in df.columns.values: series = df[column] worker_to_futures = replication.replicate_cudf_series(series) for worker in worker_to_futures: replicated_series = worker_to_futures[worker].result() assert series.equals(replicated_series), "There is a " \ "mismatch in one of the replications" # FIXME: If we do not clear this dictionary, when comparing # results for the 2nd column, one of the workers still # has a value from the 1st column worker_to_futures = {}
def test_replicate_cudf_series(input_data_path, dask_client): gc.collect() df = cudf.read_csv( input_data_path, delimiter=" ", names=["src", "dst", "value"], dtype=["int32", "int32", "float32"], ) for column in df.columns.values: series = df[column] worker_to_futures = replication.replicate_cudf_series(series) for worker in worker_to_futures: replicated_series = worker_to_futures[worker].result() assert_series_equal(series, replicated_series, check_names=False) # FIXME: If we do not clear this dictionary, when comparing # results for the 2nd column, one of the workers still # has a value from the 1st column worker_to_futures = {}
def test_replicate_cudf_series(input_data_path, mg_device_count): gc.collect() skip_if_not_enough_devices(mg_device_count) df = cudf.read_csv( input_data_path, delimiter=" ", names=["src", "dst", "value"], dtype=["int32", "int32", "float32"], ) with MGContext(number_of_devices=mg_device_count, p2p=True): for column in df.columns.values: series = df[column] worker_to_futures = replication.replicate_cudf_series(series) for worker in worker_to_futures: replicated_series = worker_to_futures[worker].result() assert series.equals(replicated_series), ( "There is a " "mismatch in one of the replications") # FIXME: If we do not clear this dictionary, when comparing # results for the 2nd column, one of the workers still # has a value from the 1st column worker_to_futures = {}