def test_get_img_shape_on_2d_image(): n = 5 channels = 4 dim1 = 1 dim2 = 2 K.set_image_data_format('channels_first') assert (n, channels, dim1, dim2) == utils.get_img_shape(K.ones(shape=(n, channels, dim1, dim2))) K.set_image_data_format('channels_last') assert (n, channels, dim1, dim2) == utils.get_img_shape(K.ones(shape=(n, dim1, dim2, channels)))
def test_get_img_shape_on_2d_image(): n = 5 channels = 4 dim1 = 1 dim2 = 2 K.set_image_data_format('channels_first') assert (n, channels, dim1, dim2) == utils.get_img_shape( K.ones(shape=(n, channels, dim1, dim2))) K.set_image_data_format('channels_last') assert (n, channels, dim1, dim2) == utils.get_img_shape( K.ones(shape=(n, dim1, dim2, channels)))
def visualize_cam(model, layer_idx, filter_indices, seed_img, penultimate_layer_idx=None, alpha=0): """Generates a gradient based class activation map (CAM) as described in paper [Grad-CAM: Why did you say that? Visual Explanations from Deep Networks via Gradient-based Localization](https://arxiv.org/pdf/1610.02391v1.pdf). Unlike [class activation mapping](https://arxiv.org/pdf/1512.04150v1.pdf), which requires minor changes to network architecture in some instances, grad-CAM has a more general applicability. Compared to saliency maps, grad-CAM is class discriminative; i.e., the 'cat' explanation exclusively highlights cat regions and not the 'dog' region and vice-versa. Args: model: The `keras.models.Model` instance. Model input is expected to be a 4D image input of shape: `(samples, channels, rows, cols)` if data_format='channels_first' or `(samples, rows, cols, channels)` if data_format='channels_last'. layer_idx: The layer index within `model.layers` whose filters needs to be visualized. filter_indices: filter indices within the layer to be maximized. For `keras.layers.Dense` layer, `filter_idx` is interpreted as the output index. If you are visualizing final `keras.layers.Dense` layer, you tend to get better results with 'linear' activation as opposed to 'softmax'. This is because 'softmax' output can be maximized by minimizing scores for other classes. seed_img: The input image for which activation map needs to be visualized. penultimate_layer_idx: The pre-layer to `layer_idx` whose feature maps should be used to compute gradients wrt filter output. If not provided, it is set to the nearest penultimate `Convolutional` or `Pooling` layer. alpha: The alpha value of image as overlayed onto the heatmap. This value needs to be between [0, 1], with 0 being heatmap only to 1 being image only (Default value = 0.5) Example: If you wanted to visualize attention over 'bird' category, say output index 22 on the final `keras.layers.Dense` layer, then, `filter_indices = [22]`, `layer = dense_layer`. One could also set filter indices to more than one value. For example, `filter_indices = [22, 23]` should (hopefully) show attention map that corresponds to both 22, 23 output categories. Notes: This technique deprecates occlusion maps as it gives similar results, but with one-pass gradient computation as opposed inefficient sliding window approach. Returns: The heatmap image, overlayed with `seed_img` using `alpha`, indicating image regions that, when changed, would contribute the most towards maximizing the output of `filter_indices`. """ if alpha < 0. or alpha > 1.: raise ValueError("`alpha` needs to be between [0, 1]") filter_indices = utils.listify(filter_indices) print("Working on filters: {}".format(pprint.pformat(filter_indices))) # Search for the nearest penultimate `Convolutional` or `Pooling` layer. if penultimate_layer_idx is None: for idx, layer in utils.reverse_enumerate(model.layers[:layer_idx - 1]): if isinstance(layer, (Convolution2D, _Pooling2D)): penultimate_layer_idx = idx break if penultimate_layer_idx is None: raise ValueError( 'Unable to determine penultimate `Convolution2D` or `Pooling2D` ' 'layer for layer_idx: {}'.format(layer_idx)) assert penultimate_layer_idx < layer_idx losses = [(ActivationMaximization(model.layers[layer_idx], filter_indices), 1)] penultimate_output = model.layers[penultimate_layer_idx].output opt = Optimizer(model.input, losses, wrt=penultimate_output) _, grads, penultimate_output_value = opt.minimize(seed_img, max_iter=1, verbose=False) # We are minimizing loss as opposed to maximizing output as with the paper. # So, negative gradients here mean that they reduce loss, maximizing class probability. grads *= -1 # Average pooling across all feature maps. # This captures the importance of feature map (channel) idx to the output s_idx, c_idx, row_idx, col_idx = utils.get_img_indices() weights = np.mean(grads, axis=(s_idx, row_idx, col_idx)) # Generate heatmap by computing weight * output over feature maps s, ch, rows, cols = utils.get_img_shape(penultimate_output) heatmap = np.ones(shape=(rows, cols), dtype=np.float32) for i, w in enumerate(weights): heatmap += w * penultimate_output_value[utils.slicer[0, i, :, :]] # The penultimate feature map size is definitely smaller than input image. s, ch, rows, cols = utils.get_img_shape(model.input) # TODO: Figure out a way to get rid of open cv dependency. # skimage doesn't deal with arbitrary floating point ranges. heatmap = cv2.resize(heatmap, (cols, rows), interpolation=cv2.INTER_CUBIC) # ReLU thresholding, normalize between (0, 1) heatmap = np.maximum(heatmap, 0) heatmap /= np.max(heatmap) heatmap *= 10 heatmap_colored = heatmap # Convert to heatmap and zero out low probabilities for a cleaner output. #heatmap_colored = np.uint8(cm.jet(heatmap)[..., :3] * 10) #heatmap_colored[np.where(heatmap < 0.2)] = 0 #heatmap_colored = np.uint8(seed_img * alpha + heatmap_colored * (1. - alpha)) return heatmap_colored
def visualize_cam_with_losses(input_tensor, losses, seed_input, penultimate_layer, grad_modifier=None): """Generates a gradient based class activation map (CAM) by using positive gradients of `input_tensor` with respect to weighted `losses`. For details on grad-CAM, see the paper: [Grad-CAM: Why did you say that? Visual Explanations from Deep Networks via Gradient-based Localization] (https://arxiv.org/pdf/1610.02391v1.pdf). Unlike [class activation mapping](https://arxiv.org/pdf/1512.04150v1.pdf), which requires minor changes to network architecture in some instances, grad-CAM has a more general applicability. Compared to saliency maps, grad-CAM is class discriminative; i.e., the 'cat' explanation exclusively highlights cat regions and not the 'dog' region and vice-versa. Args: input_tensor: An input tensor of shape: `(samples, channels, image_dims...)` if `image_data_format= channels_first` or `(samples, image_dims..., channels)` if `image_data_format=channels_last`. losses: List of ([Loss](vis.losses#Loss), weight) tuples. seed_input: The model input for which activation map needs to be visualized. penultimate_layer: The pre-layer to `layer_idx` whose feature maps should be used to compute gradients with respect to filter output. grad_modifier: gradient modifier to use. See [grad_modifiers](vis.grad_modifiers.md). If you don't specify anything, gradients are unchanged (Default value = None) Returns: The normalized gradients of `seed_input` with respect to weighted `losses`. """ penultimate_output = penultimate_layer.output opt = Optimizer(input_tensor, losses, wrt_tensor=penultimate_output, norm_grads=False) _, grads, penultimate_output_value = opt.minimize( seed_input, max_iter=1, grad_modifier=grad_modifier, verbose=False) # For numerical stability. Very small grad values along with small penultimate_output_value can cause # w * penultimate_output_value to zero out, even for reasonable fp precision of float32. grads = grads / (np.max(grads) + K.epsilon()) # Average pooling across all feature maps. # This captures the importance of feature map (channel) idx to the output. channel_idx = 1 if K.image_data_format() == 'channels_first' else -1 other_axis = np.delete(np.arange(len(grads.shape)), channel_idx) weights = np.mean(grads, axis=tuple(other_axis)) # Generate heatmap by computing weight * output over feature maps output_dims = utils.get_img_shape(penultimate_output)[2:] heatmap = np.zeros(shape=output_dims, dtype=K.floatx()) for i, w in enumerate(weights): if channel_idx == -1: heatmap += w * penultimate_output_value[0, ..., i] else: heatmap += w * penultimate_output_value[0, i, ...] # ReLU thresholding to exclude pattern mismatch information (negative gradients). heatmap = np.maximum(heatmap, 0) # The penultimate feature map size is definitely smaller than input image. input_dims = utils.get_img_shape(input_tensor)[2:] # Figure out the zoom factor. zoom_factor = [ i / (j * 1.0) for i, j in iter(zip(input_dims, output_dims)) ] heatmap = zoom(heatmap, zoom_factor) return utils.normalize(heatmap)