Пример #1
0
def _test_collect_states(rank, world_size):
    os.environ["MASTER_ADDR"] = "localhost"

    torch.cuda.set_device(f"cuda:{rank}")

    # initialize the process group
    torch.distributed.init_process_group("nccl", rank=rank, world_size=world_size)

    state = {"something": torch.tensor([rank])}
    collected_state = _collect_states_on_rank_zero(state)
    assert collected_state == {1: {"something": torch.tensor([1])}, 0: {"something": torch.tensor([0])}}
Пример #2
0
 def fn(state: Dict):
     if key in state:
         return _collect_states_on_rank_zero(state)
     return {k: apply_to_collection(v, Dict, fn) for k, v in state.items()}