示例#1
0
 def valid(self, force=False):
     """Validate the model every few steps.
     """
     valid_condition = (self._current_step +
                        1) % self._valid_freq == 0 or force
     if valid_condition and self._is_root_node():
         self._model.train(False)
         score_map = self.run_valid()
         is_improved = self.check_improvement(score_map)
         self._scheduler.after_valid(is_improved, score_map)
         self._model.train(True)
         self.log(
             "valid",
             "{}{} (epoch {}, step {})".format(self._dict_str(score_map),
                                               " *" if is_improved else "",
                                               self._current_epoch + 1,
                                               self._global_step + 1))
     # Check new trainer settings when using horovod
     if valid_condition and self._multigpu and self._horovod:
         self.synchronize_learning_rate()
     if (self._current_step +
             1) % 1000 == 0 and self._multigpu and self._horovod:
         import horovod.torch as hvd
         hvd.init()
         from nmtlab.trainers.hvd_utils import broadcast_optimizer_state
         import horovod.torch as hvd
         broadcast_optimizer_state(self._optimizer, ROOT_RANK)
         hvd.broadcast_parameters(self._model.state_dict(), ROOT_RANK)
示例#2
0
文件: base.py 项目: clercrobin/nmtlab
 def valid(self):
     """Validate the model every few steps.
     """
     if (self._current_step + 1) % self._valid_freq == 0 and self._is_root_node():
         self._model.train(False)
         score_map = self.run_valid()
         is_improved = self.check_improvement(score_map)
         self._scheduler.after_valid(is_improved, score_map)
         self._model.train(True)
         self.log("valid", "{}{} (epoch {}, step {})".format(
             self._dict_str(score_map), " *" if is_improved else "",
             self._current_epoch + 1, self._global_step + 1
         ))
     # Check new trainer settings
     if (self._current_step + 1) % self._valid_freq == 0 and self._multigpu:
         self.synchronize_learning_rate()
     if (self._current_step + 1) % 100 == 0 and self._multigpu:
         from nmtlab.trainers.hvd_utils import broadcast_optimizer_state
         broadcast_optimizer_state(self._optimizer, ROOT_RANK)