Exemplo n.º 1
0
 def new_update(x, new_x):
     if is_one_of(x, params):
         lr_multiplier = 1
         for k, v in self.paramwise_lr_schedule.items():
             if k in x.name:
                 lr_multiplier *= v
         if lr_multiplier != 1:
             new_x = x + (new_x - x) * lr_multiplier
     return old_update(x, new_x)
Exemplo n.º 2
0
 def new_update(x, new_x):
     if is_one_of(x, params) and self._do_layer_adaptation(x):
         dx = new_x - x
         lr_t = K.clip(self.learning_rate, K.epsilon(), 1e10)
         x_norm = tf.norm(x)
         g_norm = tf.norm(dx / lr_t)
         ratio = K.switch(
             x_norm > 0.,
             K.switch(g_norm > K.epsilon(), x_norm / g_norm, 1.),
             1.)
         new_x = x + dx * ratio
     return old_update(x, new_x)
Exemplo n.º 3
0
 def new_update(x, new_x):
     if is_one_of(x, params) and self._do_lazy_optimization(x):
         g = self.grads[x]
         r = K.any(K.not_equal(g, 0.), axis=-1, keepdims=True)
         new_x = x + (new_x - x) * K.cast(r, K.floatx())
     return old_update(x, new_x)
Exemplo n.º 4
0
 def new_update(x, new_x):
     if is_one_of(x, params):
         new_x = x + (new_x - x) * lr_multiplier
     return old_update(x, new_x)
Exemplo n.º 5
0
 def new_update(x, new_x):
     if is_one_of(x, params) and self._do_weight_decay(x):
         new_x = new_x - self.learning_rate * self.weight_decay_rate * x
     return old_update(x, new_x)