def test_send_recv_cu(cuda_obj_generator, enable_rmm): cuda_visible_devices_base = os.environ.get("CUDA_VISIBLE_DEVICES") ucx_net_devices_base = os.environ.get("UCX_NET_DEVICES") # grab first two devices cuda_visible_devices = get_cuda_devices() num_workers = len(cuda_visible_devices) port = random.randint(13000, 15500) # serialize function and send to the client and server # server will use the return value of the contents, # serialize the values, then send serialized values to client. # client will compare return values of the deserialized # data sent from the server server_env = get_environment_variables(cuda_visible_devices[0]) func = cloudpickle.dumps(cuda_obj_generator) ctx = multiprocessing.get_context("spawn") os.environ.update(server_env) parent_conn, child_conn = multiprocessing.Pipe() server_process = ctx.Process( name="server", target=server, args=[server_env, port, func, enable_rmm, num_workers, child_conn], ) server_process.start() server_msg = parent_conn.recv() assert server_msg == "initialized" client_processes = [] print(cuda_visible_devices) for i in range(num_workers): # cudf will ping the driver for validity of device # this will influence device on which a cuda context is created. # work around is to update env with new CVD before spawning client_env = get_environment_variables(cuda_visible_devices[i]) os.environ.update(client_env) proc = ctx.Process( name="client_" + str(i), target=client, args=[client_env, port, func, enable_rmm], ) proc.start() client_processes.append(proc) # Ensure restoration of environment variables immediately after starting # processes, to avoid never restoring them in case of assertion failures below restore_environment_variables(cuda_visible_devices_base, ucx_net_devices_base) server_process.join() for i in range(len(client_processes)): client_processes[i].join() assert client_processes[i].exitcode == 0 assert server_process.exitcode == 0
def empty_dataframe(): import cudf return cudf.DataFrame({"a": [1.0], "b": [1.0]}).head(0) def cupy_obj(): import cupy size = 10**8 return cupy.arange(size) @pytest.mark.skipif(len(get_cuda_devices()) < 2, reason="A minimum of two GPUs is required") @pytest.mark.parametrize("cuda_obj_generator", [dataframe, empty_dataframe, series, cupy_obj]) @pytest.mark.parametrize("enable_rmm", [True, False]) def test_send_recv_cu(cuda_obj_generator, enable_rmm): cuda_visible_devices_base = os.environ.get("CUDA_VISIBLE_DEVICES") ucx_net_devices_base = os.environ.get("UCX_NET_DEVICES") # grab first two devices cuda_visible_devices = get_cuda_devices() num_workers = len(cuda_visible_devices) port = random.randint(13000, 15500) # serialize function and send to the client and server # server will use the return value of the contents,