def gradient_penalty_loss(y_true, y_pred, averaged_samples, gradient_penalty_weight): """Calculates the gradient penalty loss for a batch of "averaged" samples. In Improved WGANs, the 1-Lipschitz constraint is enforced by adding a term to the loss function that penalizes the network if the gradient norm moves away from 1. However, it is impossible to evaluate this function at all points in the input space. The compromise used in the paper is to choose random points on the lines between real and generated samples, and check the gradients at these points. Note that it is the gradient w.r.t. the input averaged samples, not the weights of the discriminator, that we're penalizing! In order to evaluate the gradients, we must first run samples through the generator and evaluate the loss. Then we get the gradients of the discriminator w.r.t. the input averaged samples. The l2 norm and penalty can then be calculated for this gradient. Note that this loss function requires the original averaged samples as input, but Keras only supports passing y_true and y_pred to loss functions. To get around this, we make a partial() of the function with the averaged_samples argument, and use that for model training.""" # first get the gradients: # assuming: - that y_pred has dimensions (batch_size, 1) # - averaged_samples has dimensions (batch_size, nbr_features) # gradients afterwards has dimension (batch_size, nbr_features), basically # a list of nbr_features-dimensional gradient vectors gradients = K.gradients(y_pred, averaged_samples)[0] # compute the euclidean norm by squaring ... gradients_sqr = K.square(gradients) # ... summing over the rows ... gradients_sqr_sum = K.sum(gradients_sqr, axis=np.arange(1, len(gradients_sqr.shape))) # ... and sqrt gradient_l2_norm = K.sqrt(gradients_sqr_sum) # compute lambda * (1 - ||grad||)^2 still for each single sample gradient_penalty = gradient_penalty_weight * K.square(1 - gradient_l2_norm) # return the mean as loss over all the batch samples return K.mean(gradient_penalty)
def margin_loss(y_true, y_pred): """ Margin loss for Eq.(4). When y_true[i, :] contains not just one `1`, this loss should work too. Not test it. :param y_true: [None, n_classes] :param y_pred: [None, num_capsule] :return: a scalar loss value. """ L = y_true * KTF.square(KTF.maximum(0., 0.9 - y_pred)) + \ 0.5 * (1 - y_true) * KTF.square(KTF.maximum(0., y_pred - 0.1)) return KTF.mean(KTF.sum(L, 1))
def total_variation_loss(x): assert K.ndim(x) == 4 if K.image_dim_ordering() == 'th': a = K.square(x[:, :, :img_width - 1, :img_height - 1] - x[:, :, 1:, :img_height - 1]) b = K.square(x[:, :, :img_width - 1, :img_height - 1] - x[:, :, :img_width - 1, 1:]) else: a = K.square(x[:, :img_width - 1, :img_height - 1, :] - x[:, 1:, :img_height - 1, :]) b = K.square(x[:, :img_width - 1, :img_height - 1, :] - x[:, :img_width - 1, 1:, :]) return K.sum(K.pow(a + b, 1.25))
def test_function(self): val = np.random.random((4, 2)) input_val = np.random.random((4, 2)) xth = KTH.variable(val) xtf = KTF.variable(val) yth = KTH.placeholder(ndim=2) ytf = KTF.placeholder(ndim=2) exp_th = KTH.square(xth) + yth exp_tf = KTF.square(xtf) + ytf update_th = xth * 2 update_tf = xtf * 2 fth = KTH.function([yth], [exp_th], updates=[(xth, update_th)]) ftf = KTF.function([ytf], [exp_tf], updates=[(xtf, update_tf)]) function_outputs_th = fth([input_val])[0] function_outputs_tf = ftf([input_val])[0] assert function_outputs_th.shape == function_outputs_tf.shape assert_allclose(function_outputs_th, function_outputs_tf, atol=1e-05) new_val_th = KTH.get_value(xth) new_val_tf = KTF.get_value(xtf) assert new_val_th.shape == new_val_tf.shape assert_allclose(new_val_th, new_val_tf, atol=1e-05)
def loss_mse_select_clipped(y_true, y_pred): wei = y_true[:, :, :, :, -1:] pred = y_pred[:, :, :, :, :-1] true = y_true[:, :, :, :, :-1] out = K.square(tf.clip_by_value(pred - true, -5, 5)) * wei return tf.reduce_sum(out, axis=None) / (tf.reduce_sum(wei, axis=None) * 4 ) #4=numPar
def custom_mean_squared_loss(y_true, y_pred): print(y_true) print(y_pred) diff = K.abs(y_true - y_pred) angle_diff = K.minimum(diff[:, :, 6:], 360 - diff[:, :, 6:]) error = tf.concat([diff[:, :, :6], angle_diff], axis=-1) return K.mean(K.square(error), axis=-1)
def denseloss(y_true, y_pred, e=1000): Le = KTF.mean(KTF.square(y_pred-y_true), axis=-1) Lc = get_avgpoolLoss(y_true, y_pred, 1) Lc += get_avgpoolLoss(y_true, y_pred, 2) Lc += get_avgpoolLoss(y_true, y_pred, 4) shp = KTF.get_variable_shape(y_pred) Lc = Lc / (shp[1] * shp[2]) return Le + e * Lc
def euclidean_distance_angles_biwi(y_true, y_pred): diff = y_pred - y_true weights = theano.shared( np.expand_dims(3 * np.array([0.2, 0.35, 0.45]), axis=0)) weights = T.patternbroadcast(weights, (True, False)) diff = diff * weights return K.sqrt(K.sum(K.square(diff), axis=-1, keepdims=True))
def _compute_cost_huber(self, q, a, r, t, q2): preds = slice_tensor_tensor(q, a) bootstrap = K.max if not self.use_mean else K.mean targets = r + (1 - t) * self.gamma * bootstrap(q2, axis=1) err = targets - preds cond = K.abs(err) > 1.0 L2 = 0.5 * K.square(err) L1 = (K.abs(err) - 0.5) cost = tf.where(cond, L2, L1) return K.mean(cost)
def _attention_regularizer(self, attention): batch_size = K.cast(K.shape(attention)[0], K.floatx()) input_len = K.shape(attention)[-1] indices = K.expand_dims(K.arange(0, input_len), axis=0) diagonal = K.expand_dims(K.arange(0, input_len), axis=-1) eye = K.cast(K.equal(indices, diagonal), K.floatx()) return self.attention_regularizer_weight * K.sum( K.square( K.batch_dot(attention, K.permute_dimensions(attention, (0, 2, 1))) - eye)) / batch_size
def content_loss(base, combination): channel_dim = 0 if K.image_dim_ordering() == 'th' else -1 channels = K.shape(base)[channel_dim] size = img_width * img_height if args.content_loss_type == 1: multiplier = 1 / (2.0 * (channels**0.5) * (size**0.5)) elif args.content_loss_type == 2: multiplier = 1 / (channels * size) else: multiplier = 1.0 return multiplier * K.sum(K.square(combination - base))
def style_loss(style, combination, mask_path=None, nb_channels=None): assert K.ndim(style) == 3 assert K.ndim(combination) == 3 if mask_path is not None: style_mask = K.variable(load_mask(mask_path, nb_channels)) style = style * K.stop_gradient(style_mask) combination = combination * K.stop_gradient(style_mask) del style_mask S = gram_matrix(style) C = gram_matrix(combination) channels = 3 size = img_width * img_height return K.sum(K.square(S - C)) / (4.0 * (channels**2) * (size**2))
def content_loss(base, combination): return KTF.sum(KTF.square(combination - base))
def style_loss(style, combination): S = gram_matrix(style) C = gram_matrix(combination) channels = 3 size = img_height * img_width return KTF.sum(KTF.square(S - C)) / (4. * (channels**2) * (size**2))
def yolo_loss(self, args, anchors, num_classes, ignore_thresh=.5, print_loss=False): '''Return yolo_loss tensor Parameters ---------- yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body y_true: list of array, the output of preprocess_true_boxes anchors: array, shape=(N, 2), wh num_classes: integer ignore_thresh: float, the iou threshold whether to ignore object confidence loss Returns ------- loss: tensor, shape=(1,) ''' num_layers = len(anchors) // 3 # default setting yolo_outputs = args[:num_layers] y_true = args[num_layers:] anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2] ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]] input_shape = K.cast( K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) grid_shapes = [ K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers) ] loss = 0 m = K.shape(yolo_outputs[0])[0] # batch size, tensor mf = K.cast(m, K.dtype(yolo_outputs[0])) for l in range(num_layers): object_mask = y_true[l][..., 4:5] true_class_probs = y_true[l][..., 5:] grid, raw_pred, pred_xy, pred_wh = self.yolo_head( yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) pred_box = K.concatenate([pred_xy, pred_wh]) # Darknet raw box to calculate loss. raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) raw_true_wh = K.switch( object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write( b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) return b + 1, ignore_mask _, ignore_mask = K.control_flow_ops.while_loop( lambda b, *args: b < m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) # K.binary_crossentropy is helpful to avoid exp overflow. xy_loss = object_mask * box_loss_scale * K.binary_crossentropy( raw_true_xy, raw_pred[..., 0:2], from_logits=True) wh_loss = object_mask * box_loss_scale * 0.5 * K.square( raw_true_wh - raw_pred[..., 2:4]) confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \ (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask class_loss = object_mask * K.binary_crossentropy( true_class_probs, raw_pred[..., 5:], from_logits=True) xy_loss = K.sum(xy_loss) / mf wh_loss = K.sum(wh_loss) / mf confidence_loss = K.sum(confidence_loss) / mf class_loss = K.sum(class_loss) / mf loss += xy_loss + wh_loss + confidence_loss + class_loss if print_loss: loss = tf.Print(loss, [ loss, xy_loss, wh_loss, confidence_loss, class_loss, K.sum(ignore_mask) ], message='loss: ') return loss
def c_loss(ytrue, ypred): loss = mae(ytrue, ypred) print(type(loss)) loss += self.loss_weight * K.square((K.mean(ypred) - self.center)) return loss
def MyReg(weight_matrix): # print('lambdaq:............................................................................................................',lambdaq) return lambdaq * K.sum(K.square(weight_matrix)-1)
def root_mean_squared_error(y_true, y_pred): return KTF.sqrt(KTF.mean(KTF.square(y_pred - y_true)))
def normalize(self, x): # utility function to normalize a tensor by its L2 norm return x / (KTF.sqrt(KTF.mean(KTF.square(x)) + 1e-5))
def total_variation_loss(x): a = KTF.square(x[:, :img_height - 1, :img_width - 1, :] - x[:, 1:, :img_width - 1, :]) b = KTF.square(x[:, :img_height - 1, :img_width - 1, :] - x[:, :img_height - 1, 1:, :]) return KTF.sum(KTF.pow(a + b, 1.25))
def loss_func(y_true, y_pred): return ktf.sqrt(ktf.mean(ktf.square(y_pred - y_true)))
def get_updates(self, params, loss): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr *= (1. / (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay)))) t = K.cast(self.iterations, K.floatx()) + 1 lr_t = lr * K.sqrt(1. - K.pow(self.beta_2, t)) / ( 1. - K.pow(self.beta_1, t)) shapes = [K.get_variable_shape(p) for p in params] ms = [K.zeros(shape) for shape in shapes] vs = [K.zeros(shape) for shape in shapes] self.weights = [self.iterations] + ms + vs for p, g, m, v in zip(params, grads, ms, vs): # if a weight tensor (len > 1) use weight normalized parameterization # this is the only part changed w.r.t. keras.optimizers.Adam ps = K.get_variable_shape(p) if len(ps) > 1: # get weight normalization parameters V, V_norm, V_scaler, g_param, grad_g, grad_V = get_weightnorm_params_and_grads( p, g) # Adam containers for the 'g' parameter V_scaler_shape = K.get_variable_shape(V_scaler) m_g = K.zeros(V_scaler_shape) v_g = K.zeros(V_scaler_shape) # update g parameters m_g_t = (self.beta_1 * m_g) + (1. - self.beta_1) * grad_g v_g_t = (self.beta_2 * v_g) + (1. - self.beta_2) * K.square(grad_g) new_g_param = g_param - lr_t * m_g_t / (K.sqrt(v_g_t) + self.epsilon) self.updates.append(K.update(m_g, m_g_t)) self.updates.append(K.update(v_g, v_g_t)) # update V parameters m_t = (self.beta_1 * m) + (1. - self.beta_1) * grad_V v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(grad_V) new_V_param = V - lr_t * m_t / (K.sqrt(v_t) + self.epsilon) self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) # if there are constraints we apply them to V, not W # if p in constraints: # c = constraints[p] # new_V_param = c(new_V_param) # wn param updates --> W updates add_weightnorm_param_updates(self.updates, new_V_param, new_g_param, p, V_scaler) else: # do optimization normally m_t = (self.beta_1 * m) + (1. - self.beta_1) * g v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon) self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) new_p = p_t # apply constraints # if p in constraints: # c = constraints[p] # new_p = c(new_p) self.updates.append(K.update(p, new_p)) return self.updates