def new_update(x, new_x): if is_one_of(x, params) and self._do_layer_adaptation(x): dx = new_x - x lr_t = K.clip(self.learning_rate, K.epsilon(), 1e10) x_norm = tf.norm(x) g_norm = tf.norm(dx / lr_t) ratio = K.switch( x_norm > 0.0, K.switch(g_norm > K.epsilon(), x_norm / g_norm, 1.0), 1.0) new_x = x + dx * ratio return old_update(x, new_x)
def new_update(x, new_x): if is_one_of(x, params) and self._do_lazy_optimization(x): g = self.grads[x] r = K.any(K.not_equal(g, 0.0), axis=-1, keepdims=True) new_x = x + (new_x - x) * K.cast(r, K.floatx()) return old_update(x, new_x)
def new_update(x, new_x): if is_one_of(x, params): new_x = x + (new_x - x) * lr_multiplier return old_update(x, new_x)
def new_update(x, new_x): if is_one_of(x, params) and self._do_weight_decay(x): new_x = new_x - self.learning_rate * self.weight_decay_rate * x return old_update(x, new_x)