def test_native_distrib_single_node_spawn_nccl(): world_size = torch.cuda.device_count() idist.spawn("nccl", _test_distrib_config, args=("nccl", world_size, "cuda"), nproc_per_node=world_size)
def test_hvd_distrib_single_node_spawn(): world_size = 4 idist.spawn("horovod", _test_distrib_config, args=("horovod", world_size, "cpu"), nproc_per_node=world_size)
def test_hvd_distrib_single_node_spawn_cuda(): world_size = torch.cuda.device_count() idist.spawn("horovod", _test_distrib_config, args=("horovod", world_size, "cuda"), nproc_per_node=world_size)
def test_hvd_distrib_multi_node_spawn_raise_error(): world_size = 4 with pytest.raises(RuntimeError, match=r"For multi-node configuration, please set 'hosts' argument instead"): idist.spawn( "horovod", _test_distrib_config, args=("horovod", world_size, "cpu"), nproc_per_node=world_size, nnodes=2 )
def test_xla_distrib_single_node_spawn_one_proc(): try: idist.spawn("xla-tpu", _test_distrib_config, args=("xla-tpu", 1, "xla"), nproc_per_node=1) except SystemExit: pass
def test_xla_distrib_single_node_spawn_n_procs(): n = int(os.environ["NUM_TPU_WORKERS"]) try: idist.spawn("xla-tpu", _test_distrib_config, args=("xla-tpu", n, "xla"), nproc_per_node=n) except SystemExit: pass
def test_native_distrib_single_node_spawn_gloo(): from datetime import timedelta timeout = timedelta(seconds=20) world_size = 4 idist.spawn( "gloo", _test_distrib_config, args=("gloo", world_size, "cpu"), nproc_per_node=world_size, timeout=timeout )
def _test_native_distrib_single_node_spawn(init_method, backend, device, **kwargs): world_size = 4 if device == "cpu" else torch.cuda.device_count() idist.spawn( backend, _test_distrib_config, args=(backend, world_size, device), nproc_per_node=world_size, init_method=init_method, **kwargs, )
def test_hvd_distrib_spawn_no_hvd_support(): with pytest.raises(ValueError, match=r"Backend should be one of"): idist.spawn("horovod", _test_distrib_config, args=("horovod", 1, "cpu"), nproc_per_node=1)
def test_xla_distrib_spawn_no_xla_support(): with pytest.raises(ValueError, match=r"Backend should be one of"): idist.spawn("xla-tpu", _test_distrib_config, args=("xla-tpu", 1, "xla"), nproc_per_node=1)