Exemplo n.º 1
0
 def save_checkpoint(self, filename, extra_state):
     """Save all training state in a checkpoint file."""
     if distributed_utils.is_master(self.args):  # only save one checkpoint
         extra_state['train_meters'] = self.meters
         utils.save_state(
             filename, self.args, self.model, self.criterion, self.optimizer,
             self.lr_scheduler, self._num_updates, self._optim_history, extra_state,
         )
Exemplo n.º 2
0
 def save_checkpoint(self, filename, extra_state):
     """Save all training state in a checkpoint file."""
     if distributed_utils.is_master(self.args):  # only save one checkpoint
         extra_state['train_meters'] = self.meters
         utils.save_state(
             filename, self.args, self.model, self.criterion, self.optimizer,
             self.lr_scheduler, self._num_updates, self._optim_history, extra_state,
         )
 def save_checkpoint(self, filename, extra_state):
     """Save all training state in a checkpoint file."""
     if self.args.amp:
         extra_state['amp_state_dict'] = amp.state_dict()
         extra_state['amp_master_params'] = list(amp.master_params(self.optimizer.optimizer))
     if distributed_utils.is_master(self.args):  # only save one checkpoint
         utils.save_state(
             filename, self.args, self.get_model(), self.criterion, self.optimizer,
             self.lr_scheduler, self._num_updates, self._optim_history, extra_state,
         )
Exemplo n.º 4
0
 def save_checkpoint(self, filename, extra_state):
     """Save all training state in a checkpoint file."""
     if distributed_utils.is_master(self.args):  # only save one checkpoint
         extra_state['train_meters'] = self.meters
         if not self.args.sep_optim:
             utils.save_state(
                 filename, self.args, self.get_model().state_dict(), self.criterion, self.optimizer,
                 self.lr_scheduler, self._num_updates, self._optim_history, extra_state
             )
         else:
             utils.save_state_for_sep_optim(filename, self.args, self.get_model().state_dict(), self.criterion, self.optimizer,
                 self.lr_scheduler, self._num_updates, self.dec_optimizer, self.dec_lr_scheduler,
                 self._optim_history, self._dec_optim_history, extra_state)
Exemplo n.º 5
0
    def save_checkpoint(self, filename, extra_state):

        if distributed_utils.is_master(self.args):  # only save one checkpoint
            extra_state['train_meters'] = self.meters
            utils.save_state(
                filename,
                self.args,
                self.get_model().state_dict(),
                self.criterion,
                self.optimizer,
                self.lr_scheduler,
                self._num_updates,
                self._optim_history,
                extra_state,
            )
Exemplo n.º 6
0
 def save_checkpoint(self, filename, extra_state):
     """Save all training state in a checkpoint file."""
     if self.args.distributed_rank == 0:  # only save one checkpoint
         utils.save_state(filename, self.args, self.model, self.criterion, self.optimizer,
                          self.lr_scheduler, self._num_updates, self._optim_history, extra_state)
 def _async_save_checkpoint(self, rank, device_id, filename, extra_state):
     utils.save_state(filename, self.args, self.model, self.criterion,
                      self.optimizer, self.lr_scheduler,
                      self._optim_history, extra_state)