def _dist_worker_task_fn(backend: str, fn: Callable, args: Tuple, kwargs_dict: Mapping) -> None: from ignite.distributed.utils import _set_model, finalize model = _HorovodDistModel.create_from_backend(backend) _set_model(model) fn(model.get_local_rank(), *args, **kwargs_dict) finalize()
def _dist_worker_task_fn(backend, fn, args, kwargs_dict): from ignite.distributed.utils import _set_model, finalize model = _HorovodDistModel.create_from_backend(backend) _set_model(model) fn(model.get_local_rank(), *args, **kwargs_dict) finalize()
def _dist_worker_task_fn( local_rank: int, backend: str, fn: Callable, args: Tuple, kw_dict: Mapping, world_size: int, nprocs_per_node: int, node_rank: int, master_addr: str, master_port: str, kw: Any, ) -> None: from ignite.distributed.utils import _set_model, finalize copy_env_vars = os.environ.copy() os.environ["LOCAL_RANK"] = str(local_rank) os.environ["RANK"] = str(node_rank * nprocs_per_node + local_rank) os.environ["WORLD_SIZE"] = str(world_size) os.environ["MASTER_ADDR"] = str(master_addr) os.environ["MASTER_PORT"] = str(master_port) model = _NativeDistModel.create_from_backend(backend, **kw) _set_model(model) fn(local_rank, *args, **kw_dict) finalize() os.environ.clear() os.environ.update(copy_env_vars)
def _test_idist_methods_in_hvd_context(backend, device): # We explicitly set _model as _SerialModel # then call idist.* methods and check that they give correct values import horovod.torch as hvd from ignite.distributed.utils import _SerialModel, _set_model hvd.init() _set_model(_SerialModel()) ws = hvd.size() rank = hvd.rank() local_rank = hvd.local_rank() if torch.cuda.is_available(): torch.cuda.set_device(local_rank) _test_distrib_config(local_rank, backend=backend, ws=ws, true_device=device, rank=rank) hvd.shutdown()
def _destroy_mnodes_dist_context(): dist.barrier() dist.destroy_process_group() from ignite.distributed.utils import _SerialModel, _set_model # We need to set synced model to initial state _set_model(_SerialModel())
def test_idist_methods_in_xla_context(): # We explicitly set _model as _SerialModel # then call idist.* methods and check that they give correct values from ignite.distributed.utils import _set_model, _SerialModel _set_model(_SerialModel()) _test_distrib_config(local_rank=0, backend="xla-tpu", ws=1, true_device="xla", rank=0)
def _test_sync(cls): from ignite.distributed.utils import _SerialModel, _set_model _set_model(_SerialModel()) sync() from ignite.distributed.utils import _model assert isinstance(_model, cls), f"{type(_model)} vs {cls}"
def _test_idist_methods_in_native_context(backend, device, local_rank): # We explicitly set _model as _SerialModel # then call idist.* methods and check that they give correct values from ignite.distributed.utils import _SerialModel, _set_model _set_model(_SerialModel()) ws = dist.get_world_size() rank = dist.get_rank() _test_distrib_config(local_rank, backend=backend, ws=ws, true_device=device, rank=rank)
def _test_sync(cls): from ignite.distributed.utils import _set_model, _SerialModel _set_model(_SerialModel()) sync() from ignite.distributed.utils import _model assert isinstance(_model, cls), "{} vs {}".format(type(_model), cls)
def _test_idist_methods_in_xla_context_in_child_proc(index): # We explicitly set _model as _SerialModel # then call idist.* methods and check that they give correct values from ignite.distributed.utils import _set_model, _SerialModel _set_model(_SerialModel()) import torch_xla.core.xla_model as xm _test_distrib_config( local_rank=index, backend="xla-tpu", ws=xm.xrt_world_size(), true_device="xla", rank=xm.get_ordinal() )
def _test_idist_methods_in_native_context_set_local_rank(backend, device, local_rank): # We explicitly set _model as _SerialModel # then call idist.* methods and check that they give correct values from ignite.distributed.utils import _SerialModel, _set_model _set_model(_SerialModel()) lrank = int(os.environ["LOCAL_RANK"]) del os.environ["LOCAL_RANK"] ws = dist.get_world_size() rank = dist.get_rank() idist.set_local_rank(local_rank) _test_distrib_config(local_rank=local_rank, backend=backend, ws=ws, true_device=device, rank=rank) os.environ["LOCAL_RANK"] = str(lrank)
def _dist_worker_task_fn( local_rank: int, backend: str, fn: Callable, args: Tuple, kw_dict: Mapping, world_size: int, nprocs_per_node: int, node_rank: int, master_addr: Optional[str], master_port: Optional[str], init_method: str, kw: Any, ) -> None: from ignite.distributed.utils import _set_model, finalize copy_env_vars = os.environ.copy() rank = node_rank * nprocs_per_node + local_rank os.environ["LOCAL_RANK"] = str(local_rank) os.environ["RANK"] = str(rank) os.environ["WORLD_SIZE"] = str(world_size) arg_world_size = world_size # type: Optional[int] arg_rank = rank # type: Optional[int] if init_method == "env://": os.environ["MASTER_ADDR"] = str(master_addr) os.environ["MASTER_PORT"] = str(master_port) arg_world_size = None arg_rank = None model = _NativeDistModel.create_from_backend( backend, init_method=init_method, world_size=arg_world_size, rank=arg_rank, **kw) _set_model(model) fn(local_rank, *args, **kw_dict) finalize() os.environ.clear() os.environ.update(copy_env_vars)
def _dist_worker_task_fn(local_rank, backend, fn, args, kw_dict, world_size, nprocs_per_node, node_rank, master_addr, master_port, kw): from ignite.distributed.utils import _set_model, finalize copy_env_vars = dict(os.environ) os.environ["LOCAL_RANK"] = str(local_rank) os.environ["RANK"] = str(node_rank * nprocs_per_node + local_rank) os.environ["WORLD_SIZE"] = str(world_size) os.environ["MASTER_ADDR"] = str(master_addr) os.environ["MASTER_PORT"] = str(master_port) model = _NativeDistModel.create_from_backend(backend, **kw) _set_model(model) fn(local_rank, *args, **kw_dict) finalize() os.environ = copy_env_vars