def focal_loss(y_true, y_pred): """ :return: loss value Focal loss is defined here: https://arxiv.org/abs/1708.02002 Using this provides improved fidelity in unbalanced datasets: Tekawade et al. https://doi.org/10.1117/12.2540442 Parameters ---------- y_true : tensor Ground truth tensor of shape (batch_size, n_rows, n_cols, n_channels) y_pred : tensor Predicted tensor of shape (batch_size, n_rows, n_cols, n_channels) """ pt_1, pt_0 = _binary_lossmap(y_true, y_pred) loss_map = -alpha * K.log(pt_1 + eps) * K.pow(1. - pt_1, gamma) - ( 1 - alpha) * K.log(1. - pt_0 + eps) * K.pow(pt_0, gamma) return tf.reduce_mean(loss_map)
def binary_focal_loss_fixed(self, y_true, y_pred): gamma = 2. alpha = .25 y_true = tf.cast(y_true, tf.float32) epsilon = K.epsilon() y_pred = K.clip(y_pred, epsilon, 1.0 - epsilon) p_t = tf.where(K.equal(y_true, 1), y_pred, 1 - y_pred) alpha_factor = K.ones_like(y_true) * alpha alpha_t = tf.where(K.equal(y_true, 1), alpha_factor, 1 - alpha_factor) cross_entropy = -K.log(p_t) weight = alpha_t * K.pow((1 - p_t), gamma) loss = weight * cross_entropy loss = K.mean(K.sum(loss, axis=1)) return loss
def generalized_loss(y_true, y_pred, alpha=1.0, beta=1.0 / 255.0): """ generalized function used to return a large variety of mathematical loss functions primary benefit is smooth, differentiable version of L1 loss Barron, J. A More General Robust Loss Function https://arxiv.org/pdf/1701.03077.pdf Parameters: alpha: penalty factor. larger number give larger weight to large deviations beta: scale factor used to adjust to the input scale (i.e. inputs of mean 1e-4 or 256 ) Return: a loss value from the results of function(y_pred - y_true) Example: a=1.0, x>>c , c=1.0/255.0 will give a smoothly differentiable version of L1 / MAE loss a=1.999999 (lim as a->2), beta=1.0/255.0 will give L2 / RMSE loss """ diff = y_pred - y_true second = ( K.pow(K.pow(diff / beta, 2.) / K.abs(2. - alpha) + 1., (alpha / 2.)) - 1.) loss = (K.abs(2. - alpha) / alpha) * second loss = K.mean(loss, axis=-1) * beta return loss
def total_variation_loss(x): #assert K.ndim(x) == 4 _,img_nrows,img_ncols,_ = x.shape if K.image_data_format() == 'channels_first': a = K.square( x[:, :, :img_nrows - 1, :img_ncols - 1] - x[:, :, 1:, :img_ncols - 1]) b = K.square( x[:, :, :img_nrows - 1, :img_ncols - 1] - x[:, :, :img_nrows - 1, 1:]) else: a = K.square( x[:, :img_nrows - 1, :img_ncols - 1, :] - x[:, 1:, :img_ncols - 1, :]) b = K.square( x[:, :img_nrows - 1, :img_ncols - 1, :] - x[:, :img_nrows - 1, 1:, :]) return K.sum(K.pow(a + b, 1.25))
def loss_function(y_true, y_pred): # Clip values to prevent division by zero error epsilon = K.epsilon() y_pred = K.clip(y_pred, epsilon, 1. - epsilon) axis = identify_axis(y_true.get_shape()) # Calculate true positives (tp), false negatives (fn) and false positives (fp) tp = K.sum(y_true * y_pred, axis=axis) fn = K.sum(y_true * (1-y_pred), axis=axis) fp = K.sum((1-y_true) * y_pred, axis=axis) tversky_class = (tp + smooth)/(tp + delta*fn + (1-delta)*fp + smooth) # Average class scores focal_tversky_loss = K.mean(K.pow((1-tversky_class), gamma)) return focal_tversky_loss
def call(self, x): if (self.size == None) or (self.mode == 'sum'): self.size = int(x.shape[-1]) position_j = 1. / K.pow( 10000., 2 * K.arange(self.size / 2, dtype='float32') / self.size ) position_j = K.expand_dims(position_j, 0) #按照x的1维度累计求和,与arange一样,生成序列。只不过按照x的实际长度来 position_i = tf.cumsum(K.ones_like(x[:,:,0]), 1)-1 position_i = K.expand_dims(position_i, 2) position_ij = K.dot(position_i, position_j) position_ij = K.concatenate([K.cos(position_ij), K.sin(position_ij)], 2) if self.mode == 'sum': return position_ij + x elif self.mode == 'concat': return K.concatenate([position_ij, x], 2)
def scaleInvariantError(y_true, y_pred): # y : prediction # y* : ground truth # 1/2n^2 * sum[i,j] ((log(yi)-log(yj) - (log(y*i)-log(y*j))^2 ) n = l * h * batch_size img = K.log(y_true[:, :, :, 0] + 1e-4) - K.log( y_pred[:, :, :, 0] + 1e-4) #(?,120,160) d = K.expand_dims(img, 3) #(?,120,160,1) sumVal = K.sum(K.sum(K.sum(K.sum(K.pow(d, 2))))) / (n) sndTer = (K.sum(K.sum(K.sum(K.sum(d))))**2) / ((n)**2) result = sumVal - sndTer return result
def focal_crossentropy(y_true, y_pred): bce = K.binary_crossentropy(y_true, y_pred) y_pred = K.clip(y_pred, K.epsilon(), 1. - K.epsilon()) p_t = (y_true * y_pred) + ((1 - y_true) * (1 - y_pred)) alpha_factor = 1 modulating_factor = 1 alpha_factor = y_true * alpha + ((1 - alpha) * (1 - y_true)) modulating_factor = K.pow((1 - p_t), gamma) # compute the final loss and return return K.mean(alpha_factor * modulating_factor * bce, axis=-1)
def gelu_tanh(x): """Gaussian Error Linear Unit. This is a smoother version of the RELU. Original paper: https://arxiv.org/abs/1606.08415 Args: x: float Tensor to perform activation. Returns: `x` with the GELU activation applied. """ cdf = 0.5 * (1.0 + K.tanh( (np.sqrt(2 / np.pi) * (x + 0.044715 * K.pow(x, 3))))) return x * cdf
def my_loss_l2(y_true, y_pred): """ first takes the element-wise squared value of true and predicted values and then multiplies this with the coverage-matrix in order to weight covered pixels with 1 and non-covered ones with 0 :return: the loss of covered pixels """ covered_area = y_true[:, :, :, -3:] y_true = y_true[:, :, :, :-3] l2 = K.sum(K.pow(y_true - y_pred, 2) * covered_area) nonzero = K.cast(tf.math.count_nonzero(covered_area, keepdims=False), 'float32') return l2 / nonzero
def _loss(y_true: tf.Tensor, y_pred: tf.Tensor) -> tf.Tensor: """focal loss calculating function Args: y_true (tf.Tensor): A tensor of the same shape as `y_pred`, holding ground truth values y_pred (tf.Tensor): predicted logits Returns: [type]: [description] """ # Clip the prediction value to prevent NaN's and Inf's epsilon = K.epsilon() y_pred = K.clip(y_pred, epsilon, 1.0 - epsilon) # Calculate Focal Loss loss = -alpha * y_true * K.pow(1 - y_pred, gamma) * K.log(y_pred) - alpha * ( 1 - y_true ) * K.pow(y_pred, gamma) * K.log(1 - y_pred) # Compute mean loss in mini_batch return K.mean(K.sum(loss, axis=-1))
def call(self, x): x=tf.transpose(x,[0,2,1]) output = self._spectrogram_mono(x[:, 0:1, :]) if self.is_mono is False: for ch_idx in range(1, self.n_ch): output = K.concatenate((output, self._spectrogram_mono(x[:, ch_idx:ch_idx + 1, :])), axis=self.ch_axis_idx) if self.power_spectrogram != 2.0: output = K.pow(K.sqrt(output), self.power_spectrogram) if self.return_decibel_spectrogram: output = backend_keras.amplitude_to_decibel(output) return output
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] wd = self.wd # decoupled weight decay (3/4) learning_rate = self.learning_rate if self.initial_decay > 0: learning_rate *= ( 1. / (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay)))) t = K.cast(self.iterations, K.floatx()) + 1 learning_rate_t = learning_rate * (K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t))) ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] self.weights = [self.iterations] + ms + vs for p, g, m, v in zip(params, grads, ms, vs): m_t = (self.beta_1 * m) + (1. - self.beta_1) * g v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) p_t = p - learning_rate_t * m_t / ( K.sqrt(v_t) + self.epsilon ) - learning_rate * wd * p # decoupled weight decay (4/4) self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) new_p = p_t # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def call(self, inputs, mask=None): inputs, pos_input = inputs batch_size, seq_len, output_dim = self._get_shape(inputs) if self.mode == self.MODE_EXPAND: pos_input = inputs if K.dtype(pos_input) != K.floatx(): pos_input = K.cast(pos_input, K.floatx()) evens = K.arange(0, output_dim // 2) * 2 odds = K.arange(0, output_dim // 2) * 2 + 1 even_embd = K.sin( K.dot( K.expand_dims(pos_input, -1), K.expand_dims(1.0 / K.pow( 10000.0, K.cast(evens, K.floatx()) / K.cast(output_dim, K.floatx()) ), 0) ) ) odd_embd = K.cos( K.dot( K.expand_dims(pos_input, -1), K.expand_dims(1.0 / K.pow( 10000.0, K.cast((odds - 1), K.floatx()) / K.cast(output_dim, K.floatx()) ), 0) ) ) embd = K.stack([even_embd, odd_embd], axis=-1) output = K.reshape(embd, [-1, seq_len, output_dim]) if self.mode == self.MODE_CONCAT: output = K.concatenate([inputs, output], axis=-1) if self.mode == self.MODE_ADD: output += inputs return output
def call(self, x): mask = K.expand_dims(K.cast(K.arange(start =0, stop= K.shape(x)[1] +1), 'float32'), axis=-1); bins = K.expand_dims(K.cast(K.arange(self.embedding_size // 2) * 2, 'float32'), axis=0); evens = K.dot(mask, 1.0 / K.pow( 10000.0, bins / self.embedding_size)); odds = tf.identity(evens); evens = K.sin(evens)[1:, :]; odds = K.cos(odds)[1:, :]; pos = K.reshape(K.stack([evens, odds], axis=2), (-1, K.shape(x)[1], self.embedding_size)); y = K.expand_dims(x, axis=-1); return pos * y;
def _compute_sensitivities(y_true, y_pred): """ Compute the weighted sensitivities :param y_true: true class. :param y_pred: predicted class. :return: Weighted sensitivities value. """ diff = (1.0 - K.pow(y_true - y_pred, 2)) / 2.0 # [0,1] diff_class = K.sum(diff, axis=1) # vector of size N sum = K.sum(diff_class) # total sum of that vector sensitivities = diff_class / sum return sensitivities
def full_affinity(input_x, scale): """Calculates the symmetrized full Gaussian affinity matrix, scaled by a provided scale. Args: input_x: input dataset of size n x d scale: provided scale Returns: n x n affinity matrix """ sigma = K.variable(scale) dist_x = squared_distance(input_x) sigma_squared = K.expand_dims(K.pow(sigma, 2), -1) weight_mat = K.exp(-dist_x / (2 * sigma_squared)) return weight_mat
def call(self, x): x = self.conv0(x) x = K.pow(x, 2) x = K.relu(self.bn1(self.conv1(x))) x = K.relu(self.bn2(self.conv2(x))) x = K.relu(self.bn3(self.conv3(x))) x = K.relu(self.bn4(self.conv4(x))) x = K.relu(self.bn5(self.conv5(x))) x = K.relu(self.bn6(self.conv6(x))) x = K.relu(self.bn7(self.conv7(x))) x = self.bn8(self.conv8(x)) x = self.pool(x) x = K.reshape(x, (-1, 2)) x = self.fc(x) return x
def focal_loss_binary(y_true, y_pred): """Binary cross-entropy focal loss """ gamma = 2.0 alpha = 0.25 pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred)) pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred)) epsilon = K.epsilon() # clip to prevent NaN and Inf pt_1 = K.clip(pt_1, epsilon, 1. - epsilon) pt_0 = K.clip(pt_0, epsilon, 1. - epsilon) weight = alpha * K.pow(1. - pt_1, gamma) fl1 = -K.sum(weight * K.log(pt_1)) weight = (1 - alpha) * K.pow(pt_0, gamma) fl0 = -K.sum(weight * K.log(1. - pt_0)) return fl1 + fl0
def generalized_dice_loss(y_true, y_pred): wc = 1 / (K.pow(K.sum(y_true, axis=(0, 1, 2)), 2)) y_true = K.clip(y_true, K.epsilon(), 1 - K.epsilon()) y_pred = K.clip(y_pred, K.epsilon(), 1 - K.epsilon()) product = y_true * y_pred addition = y_true + y_pred num = wc[0] * (product)[:, :, :, 0] + wc[1] * (product)[:, :, :, 1] dem = wc[0] * (addition)[:, :, :, 0] + wc[1] * (addition)[:, :, :, 1] gdl = 1 - 2 * (K.sum(num)) / (K.sum(dem)) return gdl
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr *= (1. / (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay)))) t = K.cast(self.iterations, K.floatx()) + 1 lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t))) ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] self.weights = [self.iterations] + ms + vs + vhats for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats): m_t = (self.beta_1 * m) + (1. - self.beta_1) * g v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) vhat_t = K.maximum(vhat, v_t) p_t = p - lr_t * m_t / (K.sqrt(vhat_t) + self.epsilon) self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) self.updates.append(K.update(vhat, vhat_t)) new_p = p_t # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def loss_uncertainty_gaussian_likelihood_dir(y_true, y_pred): """ Loss function that calculates something similar to a Gaussian Likelihood for predicted directions. Requires that y_pred contains three predicted values (labels): dir_x, dir_y, dir_z. y_true & y_pred are expected to contain the predicted/true label and the predicted std for the label. L = ln(std ** 2) + (y_label_pred - y_label_true) / (std ** 2) Returns ------- loss : Gaussian Likelihood loss for the directional error """ # order in y_pred: 1) pred label 2) pred label error # prevent that the gradient flows back over the label network y_pred_dir_x, y_pred_dir_y, y_pred_dir_z = K.stop_gradient( y_pred[:, 0]), K.stop_gradient(y_pred[:, 1]), K.stop_gradient(y_pred[:, 2]) y_pred_std_dir_x, y_pred_std_dir_y, y_pred_std_dir_z = y_pred[:, 3], y_pred[:, 4], y_pred[:, 5] y_true_dir_x, y_true_dir_y, y_true_dir_z = y_true[:, 0], y_true[:, 1], y_true[:, 2] # equal to a lower std limit of 1e-3 eps = tf.constant(1e-6, dtype="float32") loss_dir_x = K.log(K.pow(y_pred_std_dir_x, 2) + eps) + K.pow( y_pred_dir_x - y_true_dir_x, 2) / (K.pow(y_pred_std_dir_x, 2) + eps) loss_dir_y = K.log(K.pow(y_pred_std_dir_y, 2) + eps) + K.pow( y_pred_dir_y - y_true_dir_y, 2) / (K.pow(y_pred_std_dir_y, 2) + eps) loss_dir_z = K.log(K.pow(y_pred_std_dir_z, 2) + eps) + K.pow( y_pred_dir_z - y_true_dir_z, 2) / (K.pow(y_pred_std_dir_z, 2) + eps) loss = loss_dir_x + loss_dir_y + loss_dir_z return loss
def sigmoid_focal_crossentropy(y_true, y_pred, alpha=0.25, gamma=2.0, from_logits=False): """ Args y_true: true targets tensor. y_pred: predictions tensor. alpha: balancing factor. gamma: modulating factor. Returns: Weighted loss float `Tensor`. If `reduction` is `NONE`,this has the same shape as `y_true`; otherwise, it is scalar. """ if gamma and gamma < 0: raise ValueError( "Value of gamma should be greater than or equal to zero") y_pred = tf.convert_to_tensor(y_pred) y_true = tf.cast(y_true, y_pred.dtype) # Get the binary cross_entropy bce = K.binary_crossentropy(y_true, y_pred, from_logits=from_logits) # If logits are provided then convert the predictions into probabilities if from_logits: y_pred = K.sigmoid(y_pred) else: y_pred = K.clip(y_pred, K.epsilon(), 1. - K.epsilon()) p_t = (y_true * y_pred) + ((1 - y_true) * (1 - y_pred)) alpha_factor = 1 modulating_factor = 1 if alpha: alpha = tf.convert_to_tensor(alpha, dtype=K.floatx()) alpha_factor = y_true * alpha + ((1 - alpha) * (1 - y_true)) if gamma: gamma = tf.convert_to_tensor(gamma, dtype=K.floatx()) modulating_factor = K.pow((1 - p_t), gamma) # compute the final loss and return return K.mean(alpha_factor * modulating_factor * bce, axis=-1, keepdims=True)
def call(self, y_true, y_pred): # Scale predictions so that the class probas of each sample sum to 1 y_pred /= K.sum(y_pred, axis=-1, keepdims=True) # Clip the prediction value to prevent NaN's and Inf's epsilon = K.epsilon() y_pred = K.clip(y_pred, epsilon, 1. - epsilon) # Calculate Cross Entropy cross_entropy = -y_true * K.log(y_pred) # Calculate Focal Loss loss = self.alpha * K.pow(1. - y_pred, self.gamma) * cross_entropy # Sum the losses in mini_batch return K.sum(loss, axis=1)
def call(self, x): if (self.size == None) or (self.mode == 'sum'): self.size = int(x.shape[-1]) batch_size,seq_len = K.shape(x)[0],K.shape(x)[1] position_j = 1. / K.pow(10000., \ 2 * K.arange(self.size / 2, dtype='float32' \ ) / self.size) position_j = K.expand_dims(position_j, 0) position_i = K.cumsum(K.ones_like(x[:,:,0]), 1)-1 #K.arange不支持变长,只好用这种方法生成 position_i = K.expand_dims(position_i, 2) position_ij = K.dot(position_i, position_j) position_ij = K.concatenate([K.cos(position_ij), K.sin(position_ij)], 2) if self.mode == 'sum': return position_ij + x elif self.mode == 'concat': return K.concatenate([position_ij, x], 2)
def euclidean_dist_mts(x, y): # x: n * L * d # y: m * L * d n = x.shape[0] l = x.shape[1] d = x.shape[2] m = y.shape[0] assert d == y.shape[2] x = K.reshape(x, shape=(n, l * d)) y = K.reshape(y, shape=(m, l * d)) x = K.repeat(x, m) # n * m * d' y = K.expand_dims(y, axis=0) # 1 * m * d' return K.sum(K.pow(x - y, 2), axis=2) # n * m
def focal_loss(y_true, y_pred): # Define epsilon so that the backpropagation will not result in NaN for 0 divisor case epsilon = backend.epsilon() # Add the epsilon to prediction value #y_pred = y_pred + epsilon # Clip the prediction value y_pred = backend.clip(y_pred, epsilon, 1.0 - epsilon) # Calculate cross entropy cross_entropy = -y_true * backend.log(y_pred) # Calculate weight that consists of modulating factor and weighting factor weight = alpha * y_true * backend.pow((1 - y_pred), gamma) # Calculate focal loss loss = weight * cross_entropy # Sum the losses in mini_batch loss = backend.sum(loss, axis=1) return loss
def focal_loss(y_true, y_pred, gamma=2.0): ''' Args: y_true: label map of size B x H x W x 1 y_pred: feature map of size B x H x W x C, 'softmax' activated ''' y_true_onehot = tf.cast(tf.squeeze(y_true, axis=-1), tf.int32) y_true_onehot = K.cast_to_floatx(K.one_hot(y_true_onehot, y_pred.shape[-1])) y_pred = K.cast_to_floatx(K.clip(y_pred, K.epsilon(), 1.0-K.epsilon())) # cross entropy ce = -1 * y_true_onehot * K.log(y_pred) # weight weight = K.pow((1-y_pred), gamma) * y_true_onehot # compute the focal loss ce = tf.reduce_sum(weight * ce, axis=-1) return tf.reduce_mean(ce)
def call(self, x): s, s_hat = x # Compute the variables defined in the class comment S2 = K.sum(s) S1 = s_hat[0, 1] N = s_hat[0, 0] # Compute the unbiased weights a2 = (S1 + S2) / N / s # Compute the biased weights and the scaling factor t a1 = K.pow(a2, self.k) sT = K.transpose(s) t = K.dot(sT, a2) / K.dot(sT, a1) return K.stop_gradient([a1 * t])[0]
def generalized_dice(y_true, y_pred): """ Generalized Dice Score https://arxiv.org/pdf/1707.03237 """ y_true = K.reshape(y_true, shape=(-1, 4)) y_pred = K.reshape(y_pred, shape=(-1, 4)) sum_p = K.sum(y_pred, -2) sum_r = K.sum(y_true, -2) sum_pr = K.sum(y_true * y_pred, -2) weights = K.pow(K.square(sum_r) + K.epsilon(), -1) generalized_dice = (2 * K.sum(weights * sum_pr)) / (K.sum(weights * (sum_r + sum_p))) return generalized_dice