def worker(rank): dist.init_process_group("localhost", port, world_size, rank, rank) # set in race condition dist.get_client().user_set("foo", 1) # get in race condition ret = dist.get_client().user_get("foo") assert ret == 1
def grad_fr_next_gpu(): shape = dist.get_client().user_get( f"grad_shape_of_src{dist.get_rank() + 1}") dtype = dist.get_client().user_get( f"grad_dtype_of_src{dist.get_rank() + 1}") return F.distributed.remote_recv(src_rank=dist.get_rank() + 1, shape=shape, dtype=dtype)
def worker(rank): dist.init_process_group("localhost", port, world_size, rank, rank, backend) assert dist.is_distributed() == True assert dist.get_rank() == rank assert dist.get_world_size() == world_size assert dist.get_backend() == backend py_server_addr = dist.get_py_server_addr() assert py_server_addr[0] == "localhost" assert py_server_addr[1] == port mm_server_addr = dist.get_mm_server_addr() assert mm_server_addr[0] == "localhost" assert mm_server_addr[1] > 0 assert isinstance(dist.get_client(), dist.Client)
def worker(): # set in race condition dist.get_client().user_set("foo", 1) # get in race condition ret = dist.get_client().user_get("foo") assert ret == 1
def send_to_next_gpu(tensor): shape, dtype = tensor.shape, np.dtype(tensor.dtype).name dist.get_client().user_set(f"shape_of_src{dist.get_rank()}", shape) dist.get_client().user_set(f"dtype_of_src{dist.get_rank()}", dtype) return F.distributed.remote_send(tensor, dest_rank=dist.get_rank() + 1)
def grad_to_prev_gpu(tensor): shape, dtype = tensor.shape, np.dtype(tensor.dtype).name dist.get_client().user_set(f"grad_shape_of_src{dist.get_rank()}", shape) dist.get_client().user_set(f"grad_dtype_of_src{dist.get_rank()}", dtype) return F.distributed.remote_send(tensor, dest_rank=dist.get_rank() - 1)
def recv_fr_prev_gpu(): shape = dist.get_client().user_get(f"shape_of_src{dist.get_rank() - 1}") dtype = dist.get_client().user_get(f"dtype_of_src{dist.get_rank() - 1}") return F.distributed.remote_recv(src_rank=dist.get_rank() - 1, shape=shape, dtype=dtype)
def worker(): # set in race condition dist.get_client().user_set("foo", 1) if dist.get_rank() == 1: ret = dist.get_client().user_pop("foo") assert ret == 1