def _worker_setup(self, process_idx: int): reset_seed() self.set_world_ranks(process_idx) rank_zero_only.rank = self.global_rank init_dist_connection(self.cluster_environment, self.torch_distributed_backend, self.global_rank, self.world_size)
def _worker_setup(self, process_idx: int): reset_seed() self.set_world_ranks(process_idx) rank_zero_only.rank = self.global_rank self._process_group_backend = self._get_process_group_backend() init_dist_connection(self.cluster_environment, self._process_group_backend, self.global_rank, self.world_size)
def setup_environment(self) -> None: reset_seed() # set warning rank rank_zero_only.rank = self.global_rank self._process_group_backend = self._get_process_group_backend() assert self.cluster_environment is not None init_dist_connection(self.cluster_environment, self._process_group_backend) super().setup_environment()
def setup_distributed(self): log.detail(f"{self.__class__.__name__}: setting up distributed...") reset_seed() # determine which process we are and world size self.set_world_ranks() # set warning rank rank_zero_only.rank = self.global_rank self._process_group_backend = self._get_process_group_backend() init_dist_connection(self.cluster_environment, self._process_group_backend)
def setup_distributed(self): reset_seed() # determine which process we are and world size self.set_world_ranks() # set warning rank rank_zero_only.rank = self.global_rank # set up server using proc 0's ip address # try to init for 20 times at max in case ports are taken # where to store ip_table init_dist_connection(self.cluster_environment, self.torch_distributed_backend)