def get_gradients(self, loss, params): """Returns gradients of `loss` with respect to `params`. Arguments: loss: Loss tensor. params: List of variables. Returns: List of gradient tensors. Raises: ValueError: In case any gradient cannot be computed (e.g. if gradient function not implemented). """ grads = K.gradients(loss, params) if None in grads: raise ValueError('An operation has `None` for gradient. ' 'Please make sure that all of your ops have a ' 'gradient defined (i.e. are differentiable). ' 'Common ops without gradient: ' 'K.argmax, K.round, K.eval.') if hasattr(self, 'clipnorm') and self.clipnorm > 0: norm = K.sqrt(sum([K.sum(K.square(g)) for g in grads])) grads = [clip_norm(g, self.clipnorm, norm) for g in grads] if hasattr(self, 'clipvalue') and self.clipvalue > 0: grads = [K.clip(g, -self.clipvalue, self.clipvalue) for g in grads] return grads
def __call__(self, w): norms = K.sqrt( math_ops.reduce_sum(math_ops.square(w), axis=self.axis, keepdims=True)) desired = ( self.rate * K.clip(norms, self.min_value, self.max_value) + (1 - self.rate) * norms) return w * (desired / (K.epsilon() + norms))
def get_gradients(self, loss, params): grads = K.gradients(loss, params) if hasattr(self, 'clipnorm') and self.clipnorm > 0: norm = K.sqrt(sum([K.sum(K.square(g)) for g in grads])) grads = [clip_norm(g, self.clipnorm, norm) for g in grads] if hasattr(self, 'clipvalue') and self.clipvalue > 0: grads = [K.clip(g, -self.clipvalue, self.clipvalue) for g in grads] return grads
def __call__(self, w): # First apply DivideByMax. maximum_weight = K.max(K.abs(w)) w = w / (K.epsilon() + maximum_weight) # On [-1, 1]. # Then apply MinMaxNorm. norms = K.sqrt( math_ops.reduce_sum(math_ops.square(w), axis=self.axis, keepdims=True)) desired = (self.rate * K.clip(norms, self.min_value, self.max_value) + (1 - self.rate) * norms) return w * (desired / (K.epsilon() + norms))
def mean_squared_logarithmic_error(y_true, y_pred): first_log = math_ops.log(K.clip(y_pred, K.epsilon(), None) + 1.) second_log = math_ops.log(K.clip(y_true, K.epsilon(), None) + 1.) return K.mean(math_ops.square(first_log - second_log), axis=-1)
def mean_absolute_percentage_error(y_true, y_pred): diff = math_ops.abs( (y_true - y_pred) / K.clip(math_ops.abs(y_true), K.epsilon(), None)) return 100. * K.mean(diff, axis=-1)
def kullback_leibler_divergence(y_true, y_pred): y_true = K.clip(y_true, K.epsilon(), 1) y_pred = K.clip(y_pred, K.epsilon(), 1) return math_ops.reduce_sum(y_true * math_ops.log(y_true / y_pred), axis=-1)
def kullback_leibler_divergence(y_true, y_pred): y_true = K.clip(y_true, K.epsilon(), 1) y_pred = K.clip(y_pred, K.epsilon(), 1) return K.sum(y_true * K.log(y_true / y_pred), axis=-1)
def mean_squared_logarithmic_error(y_true, y_pred): first_log = K.log(K.clip(y_pred, K.epsilon(), None) + 1.) second_log = K.log(K.clip(y_true, K.epsilon(), None) + 1.) return K.mean(K.square(first_log - second_log), axis=-1)
def __call__(self, w): norms = K.sqrt(K.sum(K.square(w), axis=self.axis, keepdims=True)) desired = K.clip(norms, 0, self.max_value) w *= (desired / (K.epsilon() + norms)) return w
def mean_absolute_percentage_error(y_true, y_pred): # Equivalent to MAE, but sometimes easier to interpret. diff = K.abs((y_true - y_pred) / K.clip(K.abs(y_true), K.epsilon(), None)) return 100. * K.mean(diff, axis=-1)
def __call__(self, w): return K.clip(w, self.min_value, self.max_value)