def __sync_buffers(self): """ Sync all the param buffers from all ranks (exp: batch norm statistics). """ for buffer in self._layer.buffers(include_sublayers=True): collective.broadcast(buffer, self._global_root_rank, self._group, use_calc_stream=True)
def _broadcast_params(self): """Broadcast the parameters of the current rank to each rank""" # Exchange all the shards with the other ranks for dtype_per_rank in self.param_storages.values(): for dst_rank, internal_storage in dtype_per_rank.items(): broadcast(tensor=internal_storage.buffer, src=self._group.ranks[dst_rank], group=self._group, use_calc_stream=True)
def _sync_params_and_buffers(self): """ Sync all model states for all ranks """ for p in self._local_params: broadcast(p, src=self._global_root_rank, group=self._group, use_calc_stream=True)
def __sync_buffers(self): """ Sync all the param buffers from all ranks (exp: batch norm statistics). """ for buffer in self._layer.buffers(include_sublayers=True): dist.broadcast(buffer, self._global_root_rank, self._group, use_calc_stream=True) # Multi stream operation will be supported later dist.wait(tensor=buffer, group=self._group, use_calc_stream=True)
def _sync_params_and_buffers(self): """ Sync all model states for all ranks """ for p in self._local_params: broadcast(p, src=self._global_root_rank, group=self.group, use_calc_stream=True) # Multi stream operation will be supported later wait(tensor=p, group=self.group, use_calc_stream=True)
def _broadcast_params(self): """Broadcast the parameters of the current rank to each rank""" assert self._default_device == "gpu", "Only supported gpu" # Exchange all the shards with the other ranks for dtype_per_rank in self.param_storages.values(): for dst_rank, internal_storage in dtype_per_rank.items(): broadcast(tensor=internal_storage.buffer, src=self.group.ranks[dst_rank], group=self.group, use_calc_stream=True) # Multi stream operation will be supported later wait(tensor=internal_storage.buffer, group=self.group, use_calc_stream=True)