def visualize_cam_with_losses(input_tensor, losses, seed_input, penultimate_layer, grad_modifier=None): """Generates a gradient based class activation map (CAM) by using positive gradients of `input_tensor` with respect to weighted `losses`. For details on grad-CAM, see the paper: [Grad-CAM: Why did you say that? Visual Explanations from Deep Networks via Gradient-based Localization] (https://arxiv.org/pdf/1610.02391v1.pdf). Unlike [class activation mapping](https://arxiv.org/pdf/1512.04150v1.pdf), which requires minor changes to network architecture in some instances, grad-CAM has a more general applicability. Compared to saliency maps, grad-CAM is class discriminative; i.e., the 'cat' explanation exclusively highlights cat regions and not the 'dog' region and vice-versa. Args: input_tensor: An input tensor of shape: `(samples, channels, image_dims...)` if `image_data_format= channels_first` or `(samples, image_dims..., channels)` if `image_data_format=channels_last`. losses: List of ([Loss](vis.losses.md#Loss), weight) tuples. seed_input: The model input for which activation map needs to be visualized. penultimate_layer: The pre-layer to `layer_idx` whose feature maps should be used to compute gradients with respect to filter output. grad_modifier: gradient modifier to use. See [grad_modifiers](vis.grad_modifiers.md). If you don't specify anything, gradients are unchanged (Default value = None) Returns: The normalized gradients of `seed_input` with respect to weighted `losses`. """ penultimate_output = penultimate_layer.output opt = Optimizer(input_tensor, losses, wrt_tensor=penultimate_output, norm_grads=False) _, grads, penultimate_output_value = opt.minimize( seed_input, max_iter=1, grad_modifier=grad_modifier, verbose=False) # For numerical stability. Very small grad values along with small penultimate_output_value can cause # w * penultimate_output_value to zero out, even for reasonable fp precision of float32. grads = grads / (np.max(grads) + K.epsilon()) # Average pooling across all feature maps. # This captures the importance of feature map (channel) idx to the output. channel_idx = 1 if K.image_data_format() == 'channels_first' else -1 other_axis = np.delete(np.arange(len(grads.shape)), channel_idx) weights = np.mean(grads, axis=tuple(other_axis)) # Generate heatmap by computing weight * output over feature maps output_dims = utils.get_img_shape(penultimate_output_value)[2:] heatmap = np.zeros(shape=output_dims, dtype=K.floatx()) for i, w in enumerate(weights): if channel_idx == -1: heatmap += w * penultimate_output_value[0, ..., i] else: heatmap += w * penultimate_output_value[0, i, ...] # ReLU thresholding to exclude pattern mismatch information (negative gradients). heatmap = np.maximum(heatmap, 0) # The penultimate feature map size is definitely smaller than input image. input_dims = utils.get_img_shape(input_tensor)[2:] # Figure out the zoom factor. zoom_factor = [ i / (j * 1.0) for i, j in iter(zip(input_dims, output_dims)) ] heatmap = zoom(heatmap, zoom_factor) return utils.normalize(heatmap)
def cosine_distance(vects): x, y = vects x = K.l2_normalize(x, axis=-1) y = K.l2_normalize(y, axis=-1) sum_square=K.sum(x * y, axis=-1, keepdims=True) return K.maximum(sum_square, K.epsilon())
def logsum(prob_ll, atl): # safe computation using the log sum exp trick (NOTE: this does not normalize p) # https://www.xarg.org/2016/06/the-log-sum-exp-trick-in-machine-learning logpdf = prob_ll + K.log(atl + K.epsilon()) alpha = tf.reduce_max(logpdf, -1, keepdims=True) return alpha + tf.log(tf.reduce_sum(K.exp(logpdf-alpha), -1, keepdims=True) + K.epsilon())
def recall5(y_true, y_pred): tp = tf.reduce_sum( tf.cast(tf.math.logical_and(y_true > 5, y_pred > 5), tf.float32)) total_true = tf.reduce_sum(tf.cast(y_true > 5, tf.float32)) return tp / (total_true + K.epsilon())
def __init__(self, eps=K.epsilon(), **kwargs): self.eps = eps # placeholders for layer parameters self.gain = None self.bias = None super().__init__(**kwargs)
def recall_negatives(y_true, y_pred): """Also called specificity of true negative rate.""" true_negatives_computed = true_negatives(y_true, y_pred) possible_negatives = negatives(y_true, y_pred) return true_negatives_computed / (possible_negatives + K.epsilon())
def loss_fn(y_true, y_pred): y_pred = K.clip(y_pred, K.epsilon(), 1-K.epsilon()) crossentropy = y_true*K.log(y_pred) + (1-y_true)*K.log(1-y_pred) return -K.mean(crossentropy * advantage)
def euclid_dis(vects): x, y = vects sum_square = K.sum(K.square(x - y), axis=1, keepdims=True) return K.sqrt(K.maximum(sum_square, K.epsilon()))
def custom_recall(y_true, y_pred): """クラス1のRecall""" true_positives = K.sum(y_true * y_pred) total_positives = K.sum(y_true) return true_positives / (total_positives + K.epsilon())
def n_c_angular_distance(vects): x_a, x_p, x_n = vects return K.sqrt(K.maximum(K.sum(K.square(x_n - ((x_a + x_p) / K.constant(2))), axis=1, keepdims=True), K.epsilon()))
def a_p_angular_distance(vects): x_a, x_p, x_n = vects return K.sqrt(K.maximum(K.sum(K.square(x_a - x_p), axis=1, keepdims=True), K.epsilon()))
def r2(y_true, y_pred): SS_res = K.sum(K.square(y_true - y_pred)) SS_tot = K.sum(K.square(y_true - K.mean(y_true))) return 1 - SS_res / (SS_tot + K.epsilon())
from tensorflow.keras.layers import ( Input, Flatten, Dense, Reshape, Dropout, LeakyReLU, Conv2DTranspose, Conv2D, BatchNormalization, Activation, ) from tensorflow.keras.models import Model import tensorflow.keras.backend as K _EPSILON = K.epsilon() def make_trainable(net, val): net.trainable = val for l in net.layers: l.trainable = val def _loss_generator(y_true, y_pred): y_pred = K.clip(y_pred, _EPSILON, 1.0 - _EPSILON) out = -(K.log(y_pred)) return K.mean(out, axis=-1) GAN_noise_size = 128
def f1(y_true, y_pred): p = precision(y_true, y_pred) r = recall(y_true, y_pred) return 2 * ((p * r) / (p + r + K.epsilon()))
def precision_negatives(y_true, y_pred): """Number of correct negatives out all predicted negatives.""" true_negatives_computed = true_negatives(y_true, y_pred) predicted_negatives = K.sum(K.round(K.clip(1 - y_pred, 0, 1))) precision = true_negatives_computed / (predicted_negatives + K.epsilon()) return precision
def custom_precision(y_true, y_pred): """クラス1のPrecision""" total_1_predictions = K.sum(y_pred) total_true_predictions = K.sum(y_true * y_pred) return total_true_predictions / (total_1_predictions + K.epsilon())
def recall_positives(y_true, y_pred): """Also called sensitivity or true positive rate.""" true_positives_computed = true_positives(y_true, y_pred) possible_positives = positives(y_true, y_pred) return true_positives_computed / (possible_positives + K.epsilon())
def keras_distance(p, q): """ Distance used in loss function.""" p = K.clip(p, K.epsilon(), 1) q = K.clip(q, K.epsilon(), 1) return K.sum(p * K.log(p / q), axis=-1)
def loss_fn(y_true, y_pred): y_pred = K.clip(y_pred, K.epsilon(), 1-K.epsilon()) crossentropy = K.sum(y_true*K.log(y_pred), axis=1, keepdims=True) return -K.mean(crossentropy * advantage)
def sensitivity(y_true, y_pred): true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) possible_positives = K.sum(K.round(K.clip(y_true, 0, 1))) return true_positives / (possible_positives + K.epsilon())
def precision1(y_true, y_pred): tp = tf.reduce_sum( tf.cast(tf.math.logical_and(y_true > 1, y_pred > 1), tf.float32)) total_pred = tf.reduce_sum(tf.cast(y_pred > 1, tf.float32)) return tp / (total_pred + K.epsilon())
def specificity(y_true, y_pred): true_negatives = K.sum(K.round(K.clip((1 - y_true) * (1 - y_pred), 0, 1))) possible_negatives = K.sum(K.round(K.clip(1 - y_true, 0, 1))) return true_negatives / (possible_negatives + K.epsilon())
def relu(x, alpha=0.0, max_value=None, threshold=0.0, mode="diag"): """Rectified Linear Unit. Assumed Density Filtering (ADF) version of the Keras `relu` activation. Parameters ---------- x : list or tuple Input tensors (means and covariances). alpha: float, optional Slope of negative section. Default is ``0.0``. Currently no value other than the default is supported for ADF. max_value: float, optional Saturation threshold. Default is `None`. Currently no value other than the default is supported for ADF. threshold: float, optional Threshold value for thresholded activation. Default is ``0.0``. Currently no value other than the default is supported for ADF. mode: {"diag", "diagonal", "lowrank", "half", "full"} Covariance computation mode. Default is "diag". Returns ------- list List of transformed means and covariances, according to the ReLU activation: ``max(x, 0)``. """ if not alpha == 0.0: raise NotImplementedError( "The relu activation function with alpha other than 0.0 has" "not been implemented for ADF layers yet." ) if max_value is not None: raise NotImplementedError( "The relu activation function with max_value other than `None` " "has not been implemented for ADF layers yet." ) if not threshold == 0.0: raise NotImplementedError( "The relu activation function with threshold other than 0.0 has" "not been implemented for ADF layers yet." ) if not isinstance(x, list) and len(x) == 2: raise ValueError( "The relu activation function expects a list of " "exactly two input tensors, but got: %s" % x ) means, covariances = x means_shape = means.get_shape().as_list() means_rank = len(means_shape) cov_shape = covariances.get_shape().as_list() cov_rank = len(cov_shape) EPS = K.cast(K.epsilon(), covariances.dtype) # treat inputs according to rank and mode if means_rank == 1: # if rank(mean)=1, treat as single vector, no reshapes necessary pass elif means_rank == 2: # if rank(mean)=2, treat as batch of vectors, no reshapes necessary pass else: # if rank(mean)=2+n, treat as batch of rank=n tensors + channels means = K.reshape(means, [-1] + [K.prod(means_shape[1:])],) if mode == "diag": covariances = K.reshape( covariances, [-1] + [K.prod(cov_shape[1:])], ) elif mode == "half": covariances = K.reshape( covariances, [-1] + [cov_shape[1]] + [K.prod(cov_shape[2:])], ) elif mode == "full": covariances = K.reshape( covariances, [-1] + [K.prod(cov_shape[1 : (cov_rank - 1) // 2 + 1])] + [K.prod(cov_shape[(cov_rank - 1) // 2 + 1 :])], ) if mode == "diag": covariances = covariances + EPS std = K.sqrt(covariances) div = means / std gd_div = _gauss_density(div) gc_div = _gauss_cumulative(div) new_means = K.maximum( means, K.maximum(K.zeros_like(means), means * gc_div + std * gd_div), ) new_covariances = ( K.square(means) * gc_div + covariances * gc_div + means * std * gd_div - K.square(new_means) ) new_covariances = K.maximum( K.zeros_like(new_covariances), new_covariances ) elif mode == "half": variances = K.sum(K.square(covariances), axis=1) + EPS std = K.sqrt(variances) div = means / std gd_div = _gauss_density(div) gc_div = _gauss_cumulative(div) new_means = K.maximum( means, K.maximum(K.zeros_like(means), means * gc_div + std * gd_div), ) gc_div = K.expand_dims(gc_div, 1) new_covariances = covariances * gc_div elif mode == "full": variances = array_ops.matrix_diag_part(covariances) + EPS std = K.sqrt(variances) div = means / std gd_div = _gauss_density(div) gc_div = _gauss_cumulative(div) new_means = K.maximum( means, K.maximum(K.zeros_like(means), means * gc_div + std * gd_div), ) gc_div = K.expand_dims(gc_div, 1) new_covariances = covariances * gc_div new_covariances = K.permute_dimensions(new_covariances, [0, 2, 1]) new_covariances = new_covariances * gc_div new_covariances = K.permute_dimensions(new_covariances, [0, 2, 1]) # undo reshapes if necessary new_means = K.reshape(new_means, [-1] + means_shape[1:]) new_covariances = K.reshape(new_covariances, [-1] + cov_shape[1:]) return [new_means, new_covariances]
def recall_m(y_true, y_pred): true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) possible_positives = K.sum(K.round(K.clip(y_true, 0, 1))) recall = true_positives / (possible_positives + K.epsilon()) return recall
def manhattan_distance(vects): x, y = vects sum_square = K.exp(-K.sum(K.abs(x - y), axis=1, keepdims=True)) return K.maximum(sum_square, K.epsilon())
def precision_m(y_true, y_pred): true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1))) precision = true_positives / (predicted_positives + K.epsilon()) return precision
def euclidean_distance(vects): x, y = vects sum_square = K.sum(K.square(x - y), axis=1, keepdims=True) return K.maximum(K.sqrt(sum_square), K.epsilon())
def f1_m(y_true, y_pred): precision = precision_m(y_true, y_pred) recall = recall_m(y_true, y_pred) return 2 * ((precision * recall) / (precision + recall + K.epsilon()))
def dice(self, y_true, y_pred): """ compute dice for given Tensors """ if self.crop_indices is not None: y_true = utils.batch_gather(y_true, self.crop_indices) y_pred = utils.batch_gather(y_pred, self.crop_indices) if self.input_type == 'prob': # We assume that y_true is probabilistic, but just in case: if self.re_norm: y_true = tf.div_no_nan(y_true, K.sum(y_true, axis=-1, keepdims=True)) y_true = K.clip(y_true, K.epsilon(), 1) # make sure pred is a probability if self.re_norm: y_pred = tf.div_no_nan(y_pred, K.sum(y_pred, axis=-1, keepdims=True)) y_pred = K.clip(y_pred, K.epsilon(), 1) # Prepare the volumes to operate on # If we're doing 'hard' Dice, then we will prepare one-hot-based matrices of size # [batch_size, nb_voxels, nb_labels], where for each voxel in each batch entry, # the entries are either 0 or 1 if self.dice_type == 'hard': # if given predicted probability, transform to "hard max"" if self.input_type == 'prob': if self.approx_hard_max: y_pred_op = _hard_max(y_pred, axis=-1) y_true_op = _hard_max(y_true, axis=-1) else: y_pred_op = _label_to_one_hot(K.argmax(y_pred, axis=-1), self.nb_labels) y_true_op = _label_to_one_hot(K.argmax(y_true, axis=-1), self.nb_labels) # if given predicted label, transform to one hot notation else: assert self.input_type == 'max_label' y_pred_op = _label_to_one_hot(y_pred, self.nb_labels) y_true_op = _label_to_one_hot(y_true, self.nb_labels) # If we're doing soft Dice, require prob output, and the data already is as we need it # [batch_size, nb_voxels, nb_labels] else: assert self.input_type == 'prob', "cannot do soft dice with max_label input" y_pred_op = y_pred y_true_op = y_true # reshape to [batch_size, nb_voxels, nb_labels] batch_size = K.shape(y_true)[0] y_pred_op = K.reshape(y_pred_op, [batch_size, -1, K.shape(y_true)[-1]]) y_true_op = K.reshape(y_true_op, [batch_size, -1, K.shape(y_true)[-1]]) # compute dice for each entry in batch. # dice will now be [batch_size, nb_labels] top = 2 * K.sum(y_true_op * y_pred_op, 1) bottom = K.sum(K.square(y_true_op), 1) + K.sum(K.square(y_pred_op), 1) # make sure we have no 0s on the bottom. K.epsilon() bottom = K.maximum(bottom, self.area_reg) return top / bottom
def norm(t): return K.sqrt( K.maximum(K.sum(K.square(t), axis=1, keepdims=True), K.epsilon()))