def test_start(ray_start_2_cpus): config = TestConfig() e = BackendExecutor(config, num_workers=2) with pytest.raises(InactiveWorkerGroupError): e.run(lambda: 1) e.start() assert len(e.worker_group) == 2
def test_execute_worker_failure(ray_start_2_cpus): config = TestConfig() e = BackendExecutor(config, num_workers=2) e.start() def train_fail(): ray.actor.exit_actor() new_execute_func = gen_execute_special(train_fail) with patch.object(WorkerGroup, "execute_async", new_execute_func): with pytest.raises(RuntimeError): e.run(lambda: 1)
def test_torch_start_shutdown(ray_start_2_cpus, init_method): torch_config = TorchConfig(backend="gloo", init_method=init_method) e = BackendExecutor(torch_config, num_workers=2) e.start() def check_process_group(): import torch return torch.distributed.is_initialized( ) and torch.distributed.get_world_size() == 2 assert all(e.run(check_process_group)) e._backend.on_shutdown(e.worker_group, e._backend_config) assert not any(e.run(check_process_group))
def test_initialization_hook(ray_start_2_cpus): config = TestConfig() e = BackendExecutor(config, num_workers=2) def init_hook(): import os os.environ["TEST"] = "1" e.start(initialization_hook=init_hook) def check(): import os return os.getenv("TEST", "0") assert e.run(check) == ["1", "1"]
def test_execute(ray_start_2_cpus): config = TestConfig() e = BackendExecutor(config, num_workers=2) e.start() assert e.run(lambda: 1) == [1, 1]