def weighted(y_true, y_pred, weights, mask=None): """Wrapper function. Arguments: y_true: `y_true` argument of `fn`. y_pred: `y_pred` argument of `fn`. weights: Weights tensor. mask: Mask tensor. Returns: Scalar tensor. """ # score_array has ndim >= 2 score_array = fn(y_true, y_pred) if mask is not None: # Cast the mask to floatX to avoid float64 upcasting in theano mask = K.cast(mask, K.floatx()) # mask should have the same shape as score_array score_array *= mask # the loss per batch should be proportional # to the number of unmasked samples. score_array /= K.mean(mask) # apply sample weighting if weights is not None: # reduce score_array to same ndim as weight array ndim = K.ndim(score_array) weight_ndim = K.ndim(weights) score_array = K.mean(score_array, axis=list(range(weight_ndim, ndim))) score_array *= weights score_array /= K.mean(K.cast(K.not_equal(weights, 0), K.floatx())) return K.mean(score_array)
def weighted(y_true, y_pred, weights, mask=None): """Wrapper function. Arguments: y_true: `y_true` argument of `fn`. y_pred: `y_pred` argument of `fn`. weights: Weights tensor. mask: Mask tensor. Returns: Scalar tensor. """ # score_array has ndim >= 2 score_array = fn(y_true, y_pred) if mask is not None: # Cast the mask to floatX to avoid float64 upcasting in theano mask = K.cast(mask, K.floatx()) # mask should have the same shape as score_array score_array *= mask # the loss per batch should be proportional # to the number of unmasked samples. score_array /= K.mean(mask) # apply sample weighting if weights is not None: # reduce score_array to same ndim as weight array ndim = K.ndim(score_array) weight_ndim = K.ndim(weights) score_array = K.mean(score_array, axis=list(range(weight_ndim, ndim))) score_array *= weights score_array /= K.mean(K.cast(K.not_equal(weights, 0), K.floatx())) return K.mean(score_array)
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr *= (1. / (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay)))) t = K.cast(self.iterations, K.floatx()) + 1 lr_t = lr / (1. - K.pow(self.beta_1, t)) shapes = [K.int_shape(p) for p in params] # zero init of 1st moment ms = [K.zeros(shape) for shape in shapes] # zero init of exponentially weighted infinity norm us = [K.zeros(shape) for shape in shapes] self.weights = [self.iterations] + ms + us for p, g, m, u in zip(params, grads, ms, us): m_t = (self.beta_1 * m) + (1. - self.beta_1) * g u_t = K.maximum(self.beta_2 * u, K.abs(g)) p_t = p - lr_t * m_t / (u_t + self.epsilon) self.updates.append(K.update(m, m_t)) self.updates.append(K.update(u, u_t)) new_p = p_t # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr *= (1. / (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay)))) t = K.cast(self.iterations, K.floatx()) + 1 lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t))) ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] self.weights = [self.iterations] + ms + vs for p, g, m, v in zip(params, grads, ms, vs): m_t = (self.beta_1 * m) + (1. - self.beta_1) * g v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon) self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) new_p = p_t # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def compute_mask(self, inputs, mask=None): if mask is None: return None if not isinstance(mask, list): raise ValueError('`mask` should be a list.') if not isinstance(inputs, list): raise ValueError('`inputs` should be a list.') if len(mask) != len(inputs): raise ValueError('The lists `inputs` and `mask` ' 'should have the same length.') if all([m is None for m in mask]): return None # Make a list of masks while making sure # the dimensionality of each mask # is the same as the corresponding input. masks = [] for input_i, mask_i in zip(inputs, mask): if mask_i is None: # Input is unmasked. Append all 1s to masks, # but cast it to bool first masks.append(K.cast(K.ones_like(input_i), 'bool')) elif K.ndim(mask_i) < K.ndim(input_i): # Mask is smaller than the input, expand it masks.append(K.expand_dims(mask_i)) else: masks.append(mask_i) concatenated = K.concatenate(masks, axis=self.axis) return K.all(concatenated, axis=-1, keepdims=False)
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) shapes = [K.int_shape(p) for p in params] accumulators = [K.zeros(shape) for shape in shapes] delta_accumulators = [K.zeros(shape) for shape in shapes] self.weights = accumulators + delta_accumulators self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr = lr * (1. / # pylint: disable=g-no-augmented-assignment (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay)))) for p, g, a, d_a in zip(params, grads, accumulators, delta_accumulators): # update accumulator new_a = self.rho * a + (1. - self.rho) * K.square(g) self.updates.append(K.update(a, new_a)) # use the new accumulator and the *old* delta_accumulator update = g * K.sqrt(d_a + self.epsilon) / K.sqrt(new_a + self.epsilon) new_p = p - lr * update # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) # update delta_accumulator new_d_a = self.rho * d_a + (1 - self.rho) * K.square(update) self.updates.append(K.update(d_a, new_d_a)) return self.updates
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) accumulators = [ K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params ] self.weights = accumulators self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr *= (1. / (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay)))) for p, g, a in zip(params, grads, accumulators): # update accumulator new_a = self.rho * a + (1. - self.rho) * K.square(g) self.updates.append(K.update(a, new_a)) new_p = p - lr * g / (K.sqrt(new_a) + self.epsilon) # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr = lr * (1. / # pylint: disable=g-no-augmented-assignment (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay)))) # momentum shapes = [K.int_shape(p) for p in params] moments = [K.zeros(shape) for shape in shapes] self.weights = [self.iterations] + moments for p, g, m in zip(params, grads, moments): v = self.momentum * m - lr * g # velocity self.updates.append(K.update(m, v)) if self.nesterov: new_p = p + self.momentum * v - lr * g else: new_p = p + v # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def compute_mask(self, inputs, mask=None): if mask is None: return None if not isinstance(mask, list): raise ValueError('`mask` should be a list.') if not isinstance(inputs, list): raise ValueError('`inputs` should be a list.') if len(mask) != len(inputs): raise ValueError('The lists `inputs` and `mask` ' 'should have the same length.') if all([m is None for m in mask]): return None # Make a list of masks while making sure # the dimensionality of each mask # is the same as the corresponding input. masks = [] for input_i, mask_i in zip(inputs, mask): if mask_i is None: # Input is unmasked. Append all 1s to masks, # but cast it to bool first masks.append(K.cast(K.ones_like(input_i), 'bool')) elif K.ndim(mask_i) < K.ndim(input_i): # Mask is smaller than the input, expand it masks.append(K.expand_dims(mask_i)) else: masks.append(mask_i) concatenated = K.concatenate(masks, axis=self.axis) return K.all(concatenated, axis=-1, keepdims=False)
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr = lr * ( 1. / # pylint: disable=g-no-augmented-assignment (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay)))) # momentum shapes = [K.int_shape(p) for p in params] moments = [K.zeros(shape) for shape in shapes] self.weights = [self.iterations] + moments for p, g, m in zip(params, grads, moments): v = self.momentum * m - lr * g # velocity self.updates.append(K.update(m, v)) if self.nesterov: new_p = p + self.momentum * v - lr * g else: new_p = p + v # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) shapes = [K.int_shape(p) for p in params] accumulators = [K.zeros(shape) for shape in shapes] self.weights = accumulators self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr = lr * ( 1. / # pylint: disable=g-no-augmented-assignment (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay)))) for p, g, a in zip(params, grads, accumulators): new_a = a + K.square(g) # update accumulator self.updates.append(K.update(a, new_a)) new_p = p - lr * g / (K.sqrt(new_a) + self.epsilon) # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) shapes = [K.int_shape(p) for p in params] accumulators = [K.zeros(shape) for shape in shapes] delta_accumulators = [K.zeros(shape) for shape in shapes] self.weights = accumulators + delta_accumulators self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr = lr * (1. / # pylint: disable=g-no-augmented-assignment (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay)))) for p, g, a, d_a in zip(params, grads, accumulators, delta_accumulators): # update accumulator new_a = self.rho * a + (1. - self.rho) * K.square(g) self.updates.append(K.update(a, new_a)) # use the new accumulator and the *old* delta_accumulator update = g * K.sqrt(d_a + self.epsilon) / K.sqrt(new_a + self.epsilon) new_p = p - lr * update # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) # update delta_accumulator new_d_a = self.rho * d_a + (1 - self.rho) * K.square(update) self.updates.append(K.update(d_a, new_d_a)) return self.updates
def dropped_inputs(inputs=inputs, rate=self.rate, seed=self.seed): alpha_p = -alpha * scale kept_idx = K.greater_equal(K.random_uniform(noise_shape, seed=seed), rate) kept_idx = K.cast(kept_idx, K.floatx()) a = ((1 - rate) * (1 + rate * alpha_p ** 2)) ** -0.5 b = -a * alpha_p * rate x = inputs * kept_idx + alpha_p * (1 - kept_idx) return a * x + b
def dropped_inputs(inputs=inputs, rate=self.rate, seed=self.seed): alpha_p = -alpha * scale kept_idx = K.greater_equal(K.random_uniform(noise_shape, seed=seed), rate) kept_idx = K.cast(kept_idx, K.floatx()) a = ((1 - rate) * (1 + rate * alpha_p ** 2)) ** -0.5 b = -a * alpha_p * rate x = inputs * kept_idx + alpha_p * (1 - kept_idx) return a * x + b
def _preprocess_symbolic_input(x, data_format, mode): """Preprocesses a tensor encoding a batch of images. Arguments: x: Input tensor, 3D or 4D. data_format: Data format of the image tensor. mode: One of "caffe", "tf" or "torch". - caffe: will convert the images from RGB to BGR, then will zero-center each color channel with respect to the ImageNet dataset, without scaling. - tf: will scale pixels between -1 and 1, sample-wise. - torch: will scale pixels between 0 and 1 and then will normalize each channel with respect to the ImageNet dataset. Returns: Preprocessed tensor. """ global _IMAGENET_MEAN if mode == 'tf': x /= 127.5 x -= 1. return x if mode == 'torch': x /= 255. mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] else: if data_format == 'channels_first': # 'RGB'->'BGR' if K.ndim(x) == 3: x = x[::-1, ...] else: x = x[:, ::-1, ...] else: # 'RGB'->'BGR' x = x[..., ::-1] mean = [103.939, 116.779, 123.68] std = None if _IMAGENET_MEAN is None: _IMAGENET_MEAN = K.constant(-np.array(mean)) # Zero-center by mean pixel if K.dtype(x) != K.dtype(_IMAGENET_MEAN): x = K.bias_add(x, K.cast(_IMAGENET_MEAN, K.dtype(x)), data_format) else: x = K.bias_add(x, _IMAGENET_MEAN, data_format) if std is not None: x /= std return x
def _preprocess_symbolic_input(x, data_format, mode): """Preprocesses a tensor encoding a batch of images. Arguments: x: Input tensor, 3D or 4D. data_format: Data format of the image tensor. mode: One of "caffe", "tf" or "torch". - caffe: will convert the images from RGB to BGR, then will zero-center each color channel with respect to the ImageNet dataset, without scaling. - tf: will scale pixels between -1 and 1, sample-wise. - torch: will scale pixels between 0 and 1 and then will normalize each channel with respect to the ImageNet dataset. Returns: Preprocessed tensor. """ global _IMAGENET_MEAN if mode == 'tf': x /= 127.5 x -= 1. return x if mode == 'torch': x /= 255. mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] else: if data_format == 'channels_first': # 'RGB'->'BGR' if K.ndim(x) == 3: x = x[::-1, ...] else: x = x[:, ::-1, ...] else: # 'RGB'->'BGR' x = x[..., ::-1] mean = [103.939, 116.779, 123.68] std = None if _IMAGENET_MEAN is None: _IMAGENET_MEAN = K.constant(-np.array(mean)) # Zero-center by mean pixel if K.dtype(x) != K.dtype(_IMAGENET_MEAN): x = K.bias_add(x, K.cast(_IMAGENET_MEAN, K.dtype(x)), data_format) else: x = K.bias_add(x, _IMAGENET_MEAN, data_format) if std is not None: x /= std return x
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr = lr * (1. / # pylint: disable=g-no-augmented-assignment (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay)))) t = K.cast(self.iterations, K.floatx()) + 1 lr_t = lr * ( K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t))) ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] if self.amsgrad: vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] else: vhats = [K.zeros(1) for _ in params] self.weights = [self.iterations] + ms + vs + vhats for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats): m_t = (self.beta_1 * m) + (1. - self.beta_1) * g v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) if self.amsgrad: vhat_t = K.maximum(vhat, v_t) p_t = p - lr_t * m_t / (K.sqrt(vhat_t) + self.epsilon) self.updates.append(K.update(vhat, vhat_t)) else: p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon) self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) new_p = p_t # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr = lr * ( 1. / # pylint: disable=g-no-augmented-assignment (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay)))) t = K.cast(self.iterations, K.floatx()) + 1 lr_t = lr / (1. - K.pow(self.beta_1, t)) shapes = [K.int_shape(p) for p in params] # zero init of 1st moment ms = [K.zeros(shape) for shape in shapes] # zero init of exponentially weighted infinity norm us = [K.zeros(shape) for shape in shapes] self.weights = [self.iterations] + ms + us for p, g, m, u in zip(params, grads, ms, us): m_t = (self.beta_1 * m) + (1. - self.beta_1) * g u_t = K.maximum(self.beta_2 * u, K.abs(g)) p_t = p - lr_t * m_t / (u_t + self.epsilon) self.updates.append(K.update(m, m_t)) self.updates.append(K.update(u, u_t)) new_p = p_t # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] t = K.cast(self.iterations, K.floatx()) + 1 # Due to the recommendations in [2], i.e. warming momentum schedule momentum_cache_t = self.beta_1 * ( 1. - 0.5 * (K.pow(K.cast_to_floatx(0.96), t * self.schedule_decay))) momentum_cache_t_1 = self.beta_1 * ( 1. - 0.5 * (K.pow(K.cast_to_floatx(0.96), (t + 1) * self.schedule_decay))) m_schedule_new = self.m_schedule * momentum_cache_t m_schedule_next = self.m_schedule * momentum_cache_t * momentum_cache_t_1 self.updates.append((self.m_schedule, m_schedule_new)) shapes = [K.int_shape(p) for p in params] ms = [K.zeros(shape) for shape in shapes] vs = [K.zeros(shape) for shape in shapes] self.weights = [self.iterations] + ms + vs for p, g, m, v in zip(params, grads, ms, vs): # the following equations given in [1] g_prime = g / (1. - m_schedule_new) m_t = self.beta_1 * m + (1. - self.beta_1) * g m_t_prime = m_t / (1. - m_schedule_next) v_t = self.beta_2 * v + (1. - self.beta_2) * K.square(g) v_t_prime = v_t / (1. - K.pow(self.beta_2, t)) m_t_bar = (1. - momentum_cache_t ) * g_prime + momentum_cache_t_1 * m_t_prime self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) p_t = p - self.lr * m_t_bar / (K.sqrt(v_t_prime) + self.epsilon) new_p = p_t # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def dropped_inputs(inputs=inputs, rate=self.rate, seed=self.seed): # pylint: disable=missing-docstring alpha = 1.6732632423543772848170429916717 scale = 1.0507009873554804934193349852946 alpha_p = -alpha * scale kept_idx = K.greater_equal( K.random_uniform(noise_shape, seed=seed), rate) kept_idx = K.cast(kept_idx, K.floatx()) # Get affine transformation params a = ((1 - rate) * (1 + rate * alpha_p**2))**-0.5 b = -a * alpha_p * rate # Apply mask x = inputs * kept_idx + alpha_p * (1 - kept_idx) # Do affine transformation return a * x + b
def dropped_inputs(inputs=inputs, rate=self.rate, seed=self.seed): # pylint: disable=missing-docstring alpha = 1.6732632423543772848170429916717 scale = 1.0507009873554804934193349852946 alpha_p = -alpha * scale kept_idx = K.greater_equal( K.random_uniform(noise_shape, seed=seed), rate) kept_idx = K.cast(kept_idx, K.floatx()) # Get affine transformation params a = ((1 - rate) * (1 + rate * alpha_p**2))**-0.5 b = -a * alpha_p * rate # Apply mask x = inputs * kept_idx + alpha_p * (1 - kept_idx) # Do affine transformation return a * x + b
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] t = K.cast(self.iterations, K.floatx()) + 1 # Due to the recommendations in [2], i.e. warming momentum schedule momentum_cache_t = self.beta_1 * ( 1. - 0.5 * (K.pow(K.cast_to_floatx(0.96), t * self.schedule_decay))) momentum_cache_t_1 = self.beta_1 * ( 1. - 0.5 * (K.pow(K.cast_to_floatx(0.96), (t + 1) * self.schedule_decay))) m_schedule_new = self.m_schedule * momentum_cache_t m_schedule_next = self.m_schedule * momentum_cache_t * momentum_cache_t_1 self.updates.append((self.m_schedule, m_schedule_new)) shapes = [K.int_shape(p) for p in params] ms = [K.zeros(shape) for shape in shapes] vs = [K.zeros(shape) for shape in shapes] self.weights = [self.iterations] + ms + vs for p, g, m, v in zip(params, grads, ms, vs): # the following equations given in [1] g_prime = g / (1. - m_schedule_new) m_t = self.beta_1 * m + (1. - self.beta_1) * g m_t_prime = m_t / (1. - m_schedule_next) v_t = self.beta_2 * v + (1. - self.beta_2) * K.square(g) v_t_prime = v_t / (1. - K.pow(self.beta_2, t)) m_t_bar = ( 1. - momentum_cache_t) * g_prime + momentum_cache_t_1 * m_t_prime self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) p_t = p - self.lr * m_t_bar / (K.sqrt(v_t_prime) + self.epsilon) new_p = p_t # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def _preprocess_symbolic_input(x, data_format, mode): """Preprocesses a symbolic image tensor. Arguments: x: symoblic tensor, 3D or 4D. data_format: data format of the image tensor. mode: One of "caffe", "tf". - caffe: will convert the images from RGB to BGR, then will zero-center each color channel with respect to the ImageNet dataset, without scaling. - tf: will scale pixels between -1 and 1, sample-wise. Returns: Preprocessed tensor. """ global _IMAGENET_MEAN if mode == 'tf': x /= 127.5 x -= 1. return x if data_format == 'channels_first': # 'RGB'->'BGR' if K.ndim(x) == 3: x = x[::-1, ...] else: x = x[:, ::-1, ...] else: # 'RGB'->'BGR' x = x[..., ::-1] if _IMAGENET_MEAN is None: _IMAGENET_MEAN = K.constant(-np.array([103.939, 116.779, 123.68])) # Zero-center by mean pixel if K.dtype(x) != K.dtype(_IMAGENET_MEAN): x = K.bias_add(x, K.cast(_IMAGENET_MEAN, K.dtype(x)), data_format) else: x = K.bias_add(x, _IMAGENET_MEAN, data_format) return x
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) shapes = [K.int_shape(p) for p in params] accumulators = [K.zeros(shape) for shape in shapes] self.weights = accumulators self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr *= (1. / (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay)))) for p, g, a in zip(params, grads, accumulators): new_a = a + K.square(g) # update accumulator self.updates.append(K.update(a, new_a)) new_p = p - lr * g / (K.sqrt(new_a) + self.epsilon) # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) accumulators = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] self.weights = accumulators self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr = lr * (1. / # pylint: disable=g-no-augmented-assignment (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay)))) for p, g, a in zip(params, grads, accumulators): # update accumulator new_a = self.rho * a + (1. - self.rho) * K.square(g) self.updates.append(K.update(a, new_a)) new_p = p - lr * g / (K.sqrt(new_a) + self.epsilon) # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def __call__(self, w): w *= K.cast(w >= 0., K.floatx()) return w
def __call__(self, w): w *= K.cast(w >= 0., K.floatx()) return w
def _current_learning_rate(y_true, y_pred): return optimizer.lr / (1. + optimizer.decay * K.cast(optimizer.iterations, K.dtype(optimizer.decay)))
def __call__(self, w): w *= K.cast(K.greater_equal(w, 0.), K.floatx()) return w
def call(self, inputs): if K.dtype(inputs) != 'int32': inputs = K.cast(inputs, 'int32') out = K.gather(self.embeddings, inputs) return out
def __call__(self, w): return w * K.cast(K.greater_equal(w, 0.), K.floatx())
def sparse_categorical_accuracy(y_true, y_pred): return K.cast( K.equal( K.max(y_true, axis=-1), K.cast(K.argmax(y_pred, axis=-1), K.floatx())), K.floatx())
def sparse_categorical_accuracy(y_true, y_pred): return K.cast( K.equal(K.max(y_true, axis=-1), K.cast(K.argmax(y_pred, axis=-1), K.floatx())), K.floatx())
def call(self, inputs, mask=None): return inputs * K.cast(inputs > self.theta, K.floatx())
def sparse_top_k_categorical_accuracy(y_true, y_pred, k=5): return K.mean(K.in_top_k(y_pred, K.cast(K.max(y_true, axis=-1), 'int32'), k), axis=-1)
def call(self, inputs, mask=None): return inputs * K.cast(inputs > self.theta, K.floatx())
def call(self, inputs, mask=None): return inputs * K.cast(K.greater(inputs, self.theta), K.floatx())
def _model_loss(model, inputs, targets): """Calculates the loss for a given model. Arguments: model: The model on which metrics are being calculated. inputs: The inputs of the given model. This is typically the mini batch of data that is fed to the model. targets: The predictions or targets of the given model. Returns: Returns the model output, total loss and loss value calculated using the specified loss function. The total loss includes regularization losses and applies masking and sample weighting to the loss value. """ total_loss = 0 outs = model(inputs) if not isinstance(outs, list): outs = [outs] if not isinstance(targets, list): targets = [targets] loss_metrics = [] with K.name_scope('loss'): for i, loss_fn in enumerate(model.loss_functions): # compute the loss output_loss = _eager_loss_fn(outs[i], targets[i], loss_fn, model.output_names[i]) loss_metrics.append(K.mean(output_loss)) mask = outs[i]._keras_mask # adapted from weighted_loss_fn if mask is not None: # mask should have the same shape as output_loss output_loss *= mask # the loss per batch should be proportional # to the number of unmasked samples. output_loss /= K.mean(mask) # adapted from weighted_loss_fn # apply sample weighting if model.sample_weights: # reduce score_array to same ndim as weight array ndim = K.ndim(output_loss) weight_ndim = K.ndim(model.sample_weights) output_loss = K.mean(output_loss, axis=list(range(weight_ndim, ndim))) output_loss *= model.sample_weights output_loss /= K.mean( K.cast(K.not_equal(model.sample_weights, 0), K.floatx())) output_loss = K.mean(output_loss) loss_weight = model.loss_weights_list[i] if total_loss is None: total_loss = loss_weight * output_loss else: total_loss += loss_weight * output_loss total_loss = K.mean(total_loss) # Add regularization losses custom_losses = [] for layer in model.layers: if layer.losses: custom_losses += layer.losses if custom_losses: total_loss += sum(custom_losses) return outs, total_loss, loss_metrics
def call(self, inputs): boolean_mask = K.any( K.not_equal(inputs, self.mask_value), axis=-1, keepdims=True) return inputs * K.cast(boolean_mask, inputs.dtype)
def _model_loss(model, inputs, targets): """Calculates the loss for a given model. Arguments: model: The model on which metrics are being calculated. inputs: The inputs of the given model. This is typically the mini batch of data that is fed to the model. targets: The predictions or targets of the given model. Returns: Returns the model output, total loss and loss value calculated using the specified loss function. The total loss includes regularization losses and applies masking and sample weighting to the loss value. """ total_loss = 0 if len(inputs) == 1: outs = model.call(inputs[0]) else: outs = model.call(inputs) if not isinstance(outs, list): outs = [outs] if not isinstance(targets, list): targets = [targets] loss_metrics = [] with K.name_scope('loss'): for i, loss_fn in enumerate(model.loss_functions): # compute the loss output_loss = _eager_loss_fn(outs[i], targets[i], loss_fn, model.output_names[i]) loss_metrics.append(K.mean(output_loss)) mask = outs[i]._keras_mask # adapted from weighted_loss_fn if mask is not None: # mask should have the same shape as output_loss output_loss *= mask # the loss per batch should be proportional # to the number of unmasked samples. output_loss /= K.mean(mask) # adapted from weighted_loss_fn # apply sample weighting if model.sample_weights: # reduce score_array to same ndim as weight array ndim = K.ndim(output_loss) weight_ndim = K.ndim(model.sample_weights) output_loss = K.mean(output_loss, axis=list(range(weight_ndim, ndim))) output_loss *= model.sample_weights output_loss /= K.mean(K.cast(K.not_equal(model.sample_weights, 0), K.floatx())) output_loss = K.mean(output_loss) loss_weight = model.loss_weights_list[i] if total_loss is None: total_loss = loss_weight * output_loss else: total_loss += loss_weight * output_loss total_loss = K.mean(total_loss) # Add regularization losses custom_losses = [] for layer in model.layers: if layer.losses: custom_losses += layer.losses if custom_losses: total_loss += sum(custom_losses) return outs, total_loss, loss_metrics
def call(self, inputs): if K.dtype(inputs) != 'int32': inputs = K.cast(inputs, 'int32') out = K.gather(self.embeddings, inputs) return out
def call(self, inputs): boolean_mask = K.any(K.not_equal(inputs, self.mask_value), axis=-1, keepdims=True) return inputs * K.cast(boolean_mask, inputs.dtype)
def sparse_top_k_categorical_accuracy(y_true, y_pred, k=5): return K.mean(K.in_top_k(y_pred, K.cast(K.max(y_true, axis=-1), 'int32'), k), axis=-1)
def call(self, inputs, mask=None): return inputs * K.cast(K.greater(inputs, self.theta), K.floatx())