示例#1
0
 def __init__(self,
              optimizer: dy.Trainer,
              skip_noisy: bool = False) -> None:
     self.optimizer = optimizer
     self.skip_noisy = skip_noisy
     if skip_noisy:
         self.rolling_stats = utils.RollingStatistic()
示例#2
0
 def grad_log_norm(self) -> float:
   if getattr(self, "rolling_stats", None) is None: self.rolling_stats = utils.RollingStatistic()
   sq_norm = 0
   for subcol in ParamManager.param_col.subcols.values():
     for param in subcol.parameters_list():
       cur_grads = param.grad_as_array()
       sq_norm += np.sum(np.square(cur_grads))
   return np.log(np.sqrt(sq_norm))
示例#3
0
 def grad_log_norm(self) -> float:
   if getattr(self, "rolling_stats", None) is None: self.rolling_stats = utils.RollingStatistic()
   sq_norm = 0
   for subcol in ParamManager.param_col.subcols.values():
     for _, param in subcol.named_parameters():
       if param.grad is not None:
         cur_grads = tt.npvalue(param.grad)
         sq_norm += np.sum(np.square(cur_grads))
   return  np.log(np.sqrt(sq_norm))
示例#4
0
 def check_gradients_noisy(self) -> bool:
   if getattr(self, "rolling_stats", None) is None: self.rolling_stats = utils.RollingStatistic()
   log_norm = self.grad_log_norm()
   if settings.USE_TENSORBOARD: tee.tensorboard_writer.add_scalars(name="grad", tag_scalar_dict={"norm": np.exp(log_norm)}, global_step=self.global_step)
   self.rolling_stats.update(log_norm)
   if self.rolling_stats.average is None: # too few statistics
     return False
   else:
     req_min = self.rolling_stats.average - 4*self.rolling_stats.stddev
     req_max = self.rolling_stats.average + 4*self.rolling_stats.stddev
     return not (req_min < log_norm < req_max)