def test_horovod_broadcast_inplace(self): """Test that the broadcast correctly broadcasts 1D, 2D, 3D tensors.""" hvd.init() rank = hvd.rank() size = hvd.size() # This test does not apply if there is only one worker. if size == 1: return dtypes = ['int32', 'int64', 'float32', 'float64'] dims = [1, 2, 3] ctx = self._current_context() count = 0 shapes = [(), (17), (17, 17), (17, 17, 17)] root_ranks = list(range(size)) for dtype, dim, root_rank in itertools.product(dtypes, dims, root_ranks): tensor = mx.nd.ones(shapes[dim], ctx=ctx) * rank root_tensor = mx.nd.ones(shapes[dim], ctx=ctx) * root_rank tensor = tensor.astype(dtype) root_tensor = root_tensor.astype(dtype) # Only do broadcasting using and on broadcast_tensor broadcast_tensor = tensor.copy() hvd.broadcast_(broadcast_tensor, root_rank=root_rank, name=str(count)) if rank != root_rank: if same(tensor.asnumpy(), root_tensor.asnumpy()): print("broadcast", count, dtype, dim, mx.nd.max(tensor == root_tensor)) print("tensor", hvd.rank(), tensor) print("root_tensor", hvd.rank(), root_tensor) print("comparison", hvd.rank(), tensor == root_tensor) assert not same(tensor.asnumpy(), root_tensor.asnumpy()), \ 'hvd.broadcast modifies source tensor' if not same(broadcast_tensor.asnumpy(), root_tensor.asnumpy()): print("broadcast", count, dtype, dim) print("broadcast_tensor", hvd.rank(), broadcast_tensor) print("root_tensor", hvd.rank(), root_tensor) print("comparison", hvd.rank(), broadcast_tensor == root_tensor) broadcast_tensor.wait_to_read() tensor.wait_to_read() assert same(broadcast_tensor.asnumpy(), root_tensor.asnumpy()), \ 'hvd.broadcast produces incorrect broadcasted tensor'
def set_seed_distributed(local_seed): # single-element tensor with the local seed in it rank_0_seed = nd.full((1), local_seed, dtype=np.int32) if hvd.size() > 1: rank_0_seed = hvd.broadcast_(tensor=rank_0_seed, root_rank=0, name="broadcast_the_seed") nd.ndarray.waitall() local_seed = (rank_0_seed[0].asscalar() + hvd.rank()) % 2**31 log_event(key=mlperf_constants.SEED, value=local_seed) random.seed(local_seed) np.random.seed(local_seed) mx.random.seed(local_seed) return local_seed