def test_wrt_tensor_isnt_input_tensor(model_and_losses): model, losses = model_and_losses opt = Optimizer(model.input, losses, wrt_tensor=model.output) opt.minimize() assert not opt.wrt_tensor_is_input_tensor assert opt.wrt_tensor is not None assert opt.wrt_tensor != opt.input_tensor
def generate_heatmap(model, layer_idx, filter_indices, seed_img): losses = [(ActivationMaximization(model.layers[layer_idx], filter_indices), 1)] opt = Optimizer(model.input, losses) grads = opt.minimize(max_iter=1, verbose=False, seed_img=seed_img)[1] # We are minimizing loss as opposed to maximizing output as with the paper. # So, negative gradients here mean that they reduce loss, maximizing class probability. grads *= -1 grads = np.max(np.abs(grads), axis=3, keepdims=True) grads = deprocess_image(grads[0]).astype( 'float32') # Smoothen activation map grads = grads / np.max(grads) * 255 # Convert to heatmap and zero out low probabilities for a cleaner output. heatmap = cv2.applyColorMap(cv2.GaussianBlur(grads, (3, 3), 0), cv2.COLORMAP_JET) heatmap = heatmap.reshape(list(heatmap.shape) + [1]) heatmap[heatmap <= np.mean(heatmap)] = 0 heatmap = np.maximum(heatmap, 0) heatmap = heatmap / np.max(heatmap) * 255 return heatmap
def compute_tcav_with_losses(input_tensor, losses, seed_input, wrt_tensor=None, grad_modifier='absolute'): """ Args: input_tensor: An input tensor of shape: `(samples, channels, image_dims...)` if `image_data_format= channels_first` or `(samples, image_dims..., channels)` if `image_data_format=channels_last`. losses: List of ([Loss](vis.losses#Loss), weight) tuples. seed_input: The model input for which activation map needs to be visualized. wrt_tensor: Short for, with respect to. The gradients of losses are computed with respect to this tensor. When None, this is assumed to be the same as `input_tensor` (Default value: None) ### NB. Here we can introduce our fl(x). The gradients will be computed wrt that tensor. grad_modifier: gradient modifier to use. See [grad_modifiers](vis.grad_modifiers.md). By default `absolute` value of gradients are used. To visualize positive or negative gradients, use `relu` and `negate` respectively. (Default value = 'absolute') Returns: The normalized gradients of `seed_input` with respect to weighted `losses`. ### NB. Here you will have to add the dot product with the normalized direction of the concept vector. """ #print ('wrt_tensor', wrt_tensor) from keras.layers import Reshape #wrt_tensor = Reshape((14,14,1024,))(wrt_tensor) #print 'wrt_tensor', wrt_tensor #return opt = Optimizer(input_tensor, losses, wrt_tensor=wrt_tensor, norm_grads=False) grads = opt.minimize(seed_input=seed_input, max_iter=1, grad_modifier=grad_modifier, verbose=False)[1] channel_idx = 1 if K.image_data_format() == 'channels_first' else -1 #grads = np.max(grads, axis=channel_idx) return utils.normalize(grads)[0]
def visualize_saliency(model, layer_idx, filter_indices, seed_img, alpha=0.5): """Generates an attention heatmap over the `seed_img` for maximizing `filter_indices` output in the given `layer`. For a full description of saliency, see the paper: [Deep Inside Convolutional Networks: Visualising Image Classification Models and Saliency Maps](https://arxiv.org/pdf/1312.6034v2.pdf) Args: model: The `keras.models.Model` instance. Model input is expected to be a 4D image input of shape: `(samples, channels, rows, cols)` if data_format='channels_first' or `(samples, rows, cols, channels)` if data_format='channels_last'. layer_idx: The layer index within `model.layers` whose filters needs to be visualized. filter_indices: filter indices within the layer to be maximized. For `keras.layers.Dense` layer, `filter_idx` is interpreted as the output index. If you are visualizing final `keras.layers.Dense` layer, you tend to get better results with 'linear' activation as opposed to 'softmax'. This is because 'softmax' output can be maximized by minimizing scores for other classes. seed_img: The input image for which activation map needs to be visualized. alpha: The alpha value of image as overlayed onto the heatmap. This value needs to be between [0, 1], with 0 being heatmap only to 1 being image only (Default value = 0.5) Example: If you wanted to visualize attention over 'bird' category, say output index 22 on the final `keras.layers.Dense` layer, then, `filter_indices = [22]`, `layer = dense_layer`. One could also set filter indices to more than one value. For example, `filter_indices = [22, 23]` should (hopefully) show attention map that corresponds to both 22, 23 output categories. Returns: The heatmap image, overlayed with `seed_img` using `alpha`, indicating image regions that, when changed, would contribute the most towards maximizing the output of `filter_indices`. """ if alpha < 0. or alpha > 1.: raise ValueError("`alpha` needs to be between [0, 1]") filter_indices = utils.listify(filter_indices) print("Working on filters: {}".format(pprint.pformat(filter_indices))) losses = [(ActivationMaximization(model.layers[layer_idx], filter_indices), 1)] opt = Optimizer(model.input, losses) grads = opt.minimize(max_iter=1, verbose=False, seed_img=seed_img)[1] # We are minimizing loss as opposed to maximizing output as with the paper. # So, negative gradients here mean that they reduce loss, maximizing class probability. grads *= -1 s, c, row, col = utils.get_img_indices() grads = np.max(np.abs(grads), axis=c) # Normalize and zero out low probabilities for a cleaner output. grads /= np.max(grads) heatmap = np.uint8(cm.jet(grads)[..., :3] * 255) heatmap[np.where(grads < 0.2)] = 0 heatmap = np.uint8(seed_img * alpha + heatmap * (1. - alpha)) return grads
def get_saliency_grads(model, layer_idx, filter_indices, seed_input): # define loss to maximize pixels that need to be changed the least to affect activations the most losses = [ (ActivationMaximization(model.layers[layer_idx], filter_indices), -1) ] input_tensor = model.input[0] # run optimization for input image opt = Optimizer(input_tensor, losses, wrt_tensor=None, norm_grads=False) grads = opt.minimize(seed_input=seed_input, max_iter=1, grad_modifier='absolute', verbose=False)[1] return grads
def generate_opt_gif(): """Example to show how to generate the gif of optimization progress. This example also shows how to use the optimizer directly with losses. """ # Build the VGG16 network with ImageNet weights model = VGG16(weights='imagenet', include_top=True) print('Model loaded.') # The name of the layer we want to visualize # (see model definition in vggnet.py) layer_name = 'predictions' layer_dict = dict([(layer.name, layer) for layer in model.layers[1:]]) output_class = [20] losses = [(ActivationMaximization(layer_dict[layer_name], output_class), 2), (LPNorm(model.input), 10), (TotalVariation(model.input), 10)] opt = Optimizer(model.input, losses) opt.minimize(max_iter=500, verbose=True, callbacks=[GifGenerator('opt_progress')])
def generate_opt_gif(): """Example to show how to generate the gif of optimization progress. """ # Build the VGG16 network with ImageNet weights model = VGG16(weights='imagenet', include_top=True) print('Model loaded.') # The name of the layer we want to visualize # (see model definition in vggnet.py) layer_name = 'predictions' layer_dict = dict([(layer.name, layer) for layer in model.layers[1:]]) output_class = [20] losses = [(ActivationMaximization(layer_dict[layer_name], output_class), 1), (LPNorm(), 10), (TotalVariation(), 1)] opt = Optimizer(model.input, losses) # Jitter is used as a regularizer to create crisper images, but it makes gif animation ugly. opt.minimize(max_iter=500, verbose=True, jitter=0, progress_gif_path='opt_progress')
# Pre-trained model #model = ResNet50(weights='imagenet') ###--------------------------------------------------------------------------------------------------- layer_names = [ "activation_10", "activation_22", "activation_34", "activation_46" ] count = 1 for layer_name in layer_names: layer_dict = dict([(layer.name, layer) for layer in model.layers[1:]]) layer_idx = utils.find_layer_idx(model, layer_name) # Select 50 filters for each layer; either first 50 or randomly selected. filters = np.random.permutation(get_num_filters( model.layers[layer_idx]))[:50] for i in filters: losses = [(ActivationMaximization(layer_dict[layer_name], i), 2), (LPNorm(model.input), 6), (TotalVariation(model.input), 1)] opt = Optimizer(model.input, losses) a, b, c = opt.minimize(max_iter=200, verbose=False, input_modifiers=[Jitter(0.05)]) print(str(count) + '/200 DONE') count += 1 a = Image.fromarray(a.astype("uint8")) a.save('act_max_output/' + layer_name + '_finetuned_' + str(i) + '.png') # change this when pre-trained model is used
from vis.losses import ActivationMaximization from vis.regularizers import TotalVariation, LPNorm from vis.input_modifiers import Jitter from vis.optimizer import Optimizer from vis.callbacks import GifGenerator if __name__ == "__main__": lucky_num = 50756711264384381850616619995309447969109689825336919605444730053665222018857 % (2 ** 32) np.random.seed(lucky_num) set_random_seed(lucky_num) os.environ["CUDA_VISIBLE_DEVICES"] = "2" modelH5 = sys.argv[1] # outputDir = sys.argv[2] model = load_model(modelH5) layerDict = dict([(layer.name, layer) for layer in model.layers[1:]]) layerName = "dense_5" for idx in range(7): outputClass = [0] losses = [(ActivationMaximization(layerDict[layerName], outputClass), 2), (LPNorm(model.input), 10), (TotalVariation(model.input), 10)] opt = Optimizer(model.input, losses) opt.minimize(max_iter=500, verbose=True, input_modifiers=[Jitter()], callbacks=[GifGenerator('OptProgress_%d' % (idx))])
class MakeMovie(object): def __init__(self): self.deg_to_rad = math.pi / 180.0 def run(self, args, parser): ''' Load the images from a tub and create a movie from them. Movie ''' if args.tub is None: print("ERR>> --tub argument missing.") parser.print_help() return if args.type is None and args.model is not None: print("ERR>> --type argument missing. Required when providing a model.") parser.print_help() return if args.salient: if args.model is None: print("ERR>> salient visualization requires a model. Pass with the --model arg.") parser.print_help() conf = os.path.expanduser(args.config) if not os.path.exists(conf): print("No config file at location: %s. Add --config to specify\ location or run from dir containing config.py." % conf) return self.cfg = dk.load_config(conf) self.tub = Tub(args.tub) self.index = self.tub.get_index(shuffled=False) start = args.start self.end = args.end if args.end != -1 else len(self.index) if self.end >= len(self.index): self.end = len(self.index) - 1 num_frames = self.end - start self.iRec = start self.scale = args.scale self.keras_part = None self.do_salient = False if args.model is not None: self.keras_part = get_model_by_type(args.type, cfg=self.cfg) self.keras_part.load(args.model) self.keras_part.compile() if args.salient: self.do_salient = self.init_salient(self.keras_part.model) print('making movie', args.out, 'from', num_frames, 'images') clip = mpy.VideoClip(self.make_frame, duration=((num_frames - 1) / self.cfg.DRIVE_LOOP_HZ)) clip.write_videofile(args.out, fps=self.cfg.DRIVE_LOOP_HZ) def draw_user_input(self, record, img): ''' Draw the user input as a green line on the image ''' import cv2 user_angle = float(record["user/angle"]) user_throttle = float(record["user/throttle"]) height, width, _ = img.shape length = height a1 = user_angle * 45.0 l1 = user_throttle * length mid = width // 2 - 1 p1 = tuple((mid - 2, height - 1)) p11 = tuple((int(p1[0] + l1 * math.cos((a1 + 270.0) * self.deg_to_rad)), int(p1[1] + l1 * math.sin((a1 + 270.0) * self.deg_to_rad)))) # user is green, pilot is blue cv2.line(img, p1, p11, (0, 255, 0), 2) def draw_model_prediction(self, record, img): ''' query the model for it's prediction, draw the predictions as a blue line on the image ''' if self.keras_part is None: return import cv2 expected = self.keras_part.model.inputs[0].shape[1:] actual = img.shape # normalize image before prediction pred_img = img.astype(np.float32) / 255.0 # check input depth if expected[2] == 1 and actual[2] == 3: pred_img = rgb2gray(pred_img) pred_img = pred_img.reshape(pred_img.shape + (1,)) actual = pred_img.shape if expected != actual: print("expected input dim", expected, "didn't match actual dim", actual) return pilot_angle, pilot_throttle = self.keras_part.run(pred_img) height, width, _ = pred_img.shape length = height a2 = pilot_angle * 45.0 l2 = pilot_throttle * length mid = width // 2 - 1 p2 = tuple((mid + 2, height - 1)) p22 = tuple((int(p2[0] + l2 * math.cos((a2 + 270.0) * self.deg_to_rad)), int(p2[1] + l2 * math.sin((a2 + 270.0) * self.deg_to_rad)))) # user is green, pilot is blue cv2.line(img, p2, p22, (0, 0, 255), 2) def draw_steering_distribution(self, record, img): ''' query the model for it's prediction, draw the distribution of steering choices ''' from donkeycar.parts.keras import KerasCategorical if self.keras_part is None or type(self.keras_part) is not KerasCategorical: return import cv2 pred_img = img.reshape((1,) + img.shape) angle_binned, _ = self.keras_part.model.predict(pred_img) x = 4 dx = 4 y = 120 - 4 iArgMax = np.argmax(angle_binned) for i in range(15): p1 = (x, y) p2 = (x, y - int(angle_binned[0][i] * 100.0)) if i == iArgMax: cv2.line(img, p1, p2, (255, 0, 0), 2) else: cv2.line(img, p1, p2, (200, 200, 200), 2) x += dx def init_salient(self, model): # Utility to search for layer index by name. # Alternatively we can specify this as -1 since it corresponds to the last layer. first_output_name = None for i, layer in enumerate(model.layers): if first_output_name is None and "dropout" not in layer.name.lower() and "out" in layer.name.lower(): first_output_name = layer.name layer_idx = i if first_output_name is None: print("Failed to find the model layer named with 'out'. Skipping salient.") return False print("####################") print("Visualizing activations on layer:", first_output_name) print("####################") # ensure we have linear activation model.layers[layer_idx].activation = activations.linear # build salient model and optimizer sal_model = utils.apply_modifications(model) modifier_fn = get('guided') sal_model_mod = modifier_fn(sal_model) losses = [ (ActivationMaximization(sal_model_mod.layers[layer_idx], None), -1) ] self.opt = Optimizer(sal_model_mod.input, losses, norm_grads=False) return True def compute_visualisation_mask(self, img): grad_modifier = 'absolute' grads = self.opt.minimize(seed_input=img, max_iter=1, grad_modifier=grad_modifier, verbose=False)[1] channel_idx = 1 if K.image_data_format() == 'channels_first' else -1 grads = np.max(grads, axis=channel_idx) res = utils.normalize(grads)[0] return res def draw_salient(self, img): import cv2 alpha = 0.004 beta = 1.0 - alpha expected = self.keras_part.model.inputs[0].shape[1:] actual = img.shape pred_img = img.astype(np.float32) / 255.0 # check input depth if expected[2] == 1 and actual[2] == 3: pred_img = rgb2gray(pred_img) pred_img = pred_img.reshape(pred_img.shape + (1,)) salient_mask = self.compute_visualisation_mask(pred_img) z = np.zeros_like(salient_mask) salient_mask_stacked = np.dstack((z, z)) salient_mask_stacked = np.dstack((salient_mask_stacked, salient_mask)) blend = cv2.addWeighted(img.astype('float32'), alpha, salient_mask_stacked, beta, 0.0) return blend def make_frame(self, t): ''' Callback to return an image from from our tub records. This is called from the VideoClip as it references a time. We don't use t to reference the frame, but instead increment a frame counter. This assumes sequential access. ''' if self.iRec >= self.end or self.iRec >= len(self.index): return None rec_ix = self.index[self.iRec] rec = self.tub.get_record(rec_ix) image = rec['cam/image_array'] if self.cfg.ROI_CROP_TOP != 0 or self.cfg.ROI_CROP_BOTTOM != 0: image = img_crop(image, self.cfg.ROI_CROP_TOP, self.cfg.ROI_CROP_BOTTOM) if self.do_salient: image = self.draw_salient(image) image = image * 255 image = image.astype('uint8') self.draw_user_input(rec, image) if self.keras_part is not None: self.draw_model_prediction(rec, image) self.draw_steering_distribution(rec, image) if self.scale != 1: import cv2 h, w, d = image.shape dsize = (w * self.scale, h * self.scale) image = cv2.resize(image, dsize=dsize, interpolation=cv2.INTER_CUBIC) self.iRec += 1 # returns a 8-bit RGB array return image
from vis.losses import ActivationMaximization from vis.regularizers import TotalVariation, LPNorm from vis.input_modifiers import Jitter from vis.optimizer import Optimizer from vis.callbacks import GifGenerator from vis.utils.vggnet import VGG16 # Build the VGG16 network with ImageNet weights model = VGG16(weights='imagenet', include_top=True) print('Model loaded.') # The name of the layer we want to visualize # (see model definition in vggnet.py) layer_name = 'predictions' layer_dict = dict([(layer.name, layer) for layer in model.layers[1:]]) output_class = [20] losses = [(ActivationMaximization(layer_dict[layer_name], output_class), 2), (LPNorm(model.input), 10), (TotalVariation(model.input), 10)] opt = Optimizer(model.input, losses) opt.minimize(max_iter=500, verbose=True, image_modifiers=[Jitter()], callbacks=[GifGenerator('opt_progress')])
def visualize_activation(model, layer_idx, filter_indices=None, seed_img=None, text=None, act_max_weight=1, lp_norm_weight=10, tv_weight=10, **optimizer_params): """Generates stitched input image(s) over all `filter_indices` in the given `layer` that maximize the filter output activation. Args: model: The `keras.models.Model` instance. Model input is expected to be a 4D image input of shape: `(samples, channels, rows, cols)` if data_format='channels_first' or `(samples, rows, cols, channels)` if data_format='channels_last'. layer_idx: The layer index within `model.layers` whose filters needs to be visualized. filter_indices: filter indices within the layer to be maximized. For `keras.layers.Dense` layer, `filter_idx` is interpreted as the output index. If you are visualizing final `keras.layers.Dense` layer, you tend to get better results with 'linear' activation as opposed to 'softmax'. This is because 'softmax' output can be maximized by minimizing scores for other classes. filter indices within the layer to be maximized. If None, all filters are visualized. (Default value = None) An input image is generated for each entry in `filter_indices`. The entry can also be an array. For example, `filter_indices = [[1, 2], 3, [4, 5, 6]]` would generate three input images. The first one would maximize output of filters 1, 2, 3 jointly. A fun use of this might be to generate a dog-fish image by maximizing 'dog' and 'fish' output in final `Dense` layer. For `keras.layers.Dense` layers, `filter_idx` is interpreted as the output index. If you are visualizing final `keras.layers.Dense` layer, you tend to get better results with 'linear' activation as opposed to 'softmax'. This is because 'softmax' output can be maximized by minimizing scores for other classes. seed_img: Seeds the optimization with a starting image. Initialized with a random value when set to None. (Default value = None) text: The text to overlay on top of the generated image. (Default Value = None) act_max_weight: The weight param for `ActivationMaximization` loss. Not used if 0 or None. (Default value = 1) lp_norm_weight: The weight param for `LPNorm` regularization loss. Not used if 0 or None. (Default value = 10) tv_weight: The weight param for `TotalVariation` regularization loss. Not used if 0 or None. (Default value = 10) optimizer_params: The **kwargs for optimizer [params](vis.optimizer.md##optimizerminimize). Will default to reasonable values when required keys are not found. Example: If you wanted to visualize the input image that would maximize the output index 22, say on final `keras.layers.Dense` layer, then, `filter_indices = [22]`, `layer = dense_layer`. If `filter_indices = [22, 23]`, then it should generate an input image that shows features of both classes. Returns: Stitched image output visualizing input images that maximize the filter output(s). (Default value = 10) """ filter_indices = utils.listify(filter_indices) print("Working on filters: {}".format(pprint.pformat(filter_indices))) # Default optimizer kwargs. optimizer_params_default = { 'seed_img': seed_img, 'max_iter': 200, 'verbose': False, 'image_modifiers': _DEFAULT_IMG_MODIFIERS } optimizer_params_default.update(optimizer_params) optimizer_params = optimizer_params_default losses = [(ActivationMaximization(model.layers[layer_idx], filter_indices), act_max_weight), (LPNorm(model.input), lp_norm_weight), (TotalVariation(model.input), tv_weight)] opt = Optimizer(model.input, losses, norm_grads=False) img = opt.minimize(**optimizer_params)[0] if text: img = utils.draw_text(img, text) return img
def visualize_cam(model, layer_idx, filter_indices, seed_img, penultimate_layer_idx=None, alpha=0): """Generates a gradient based class activation map (CAM) as described in paper [Grad-CAM: Why did you say that? Visual Explanations from Deep Networks via Gradient-based Localization](https://arxiv.org/pdf/1610.02391v1.pdf). Unlike [class activation mapping](https://arxiv.org/pdf/1512.04150v1.pdf), which requires minor changes to network architecture in some instances, grad-CAM has a more general applicability. Compared to saliency maps, grad-CAM is class discriminative; i.e., the 'cat' explanation exclusively highlights cat regions and not the 'dog' region and vice-versa. Args: model: The `keras.models.Model` instance. Model input is expected to be a 4D image input of shape: `(samples, channels, rows, cols)` if data_format='channels_first' or `(samples, rows, cols, channels)` if data_format='channels_last'. layer_idx: The layer index within `model.layers` whose filters needs to be visualized. filter_indices: filter indices within the layer to be maximized. For `keras.layers.Dense` layer, `filter_idx` is interpreted as the output index. If you are visualizing final `keras.layers.Dense` layer, you tend to get better results with 'linear' activation as opposed to 'softmax'. This is because 'softmax' output can be maximized by minimizing scores for other classes. seed_img: The input image for which activation map needs to be visualized. penultimate_layer_idx: The pre-layer to `layer_idx` whose feature maps should be used to compute gradients wrt filter output. If not provided, it is set to the nearest penultimate `Convolutional` or `Pooling` layer. alpha: The alpha value of image as overlayed onto the heatmap. This value needs to be between [0, 1], with 0 being heatmap only to 1 being image only (Default value = 0.5) Example: If you wanted to visualize attention over 'bird' category, say output index 22 on the final `keras.layers.Dense` layer, then, `filter_indices = [22]`, `layer = dense_layer`. One could also set filter indices to more than one value. For example, `filter_indices = [22, 23]` should (hopefully) show attention map that corresponds to both 22, 23 output categories. Notes: This technique deprecates occlusion maps as it gives similar results, but with one-pass gradient computation as opposed inefficient sliding window approach. Returns: The heatmap image, overlayed with `seed_img` using `alpha`, indicating image regions that, when changed, would contribute the most towards maximizing the output of `filter_indices`. """ if alpha < 0. or alpha > 1.: raise ValueError("`alpha` needs to be between [0, 1]") filter_indices = utils.listify(filter_indices) print("Working on filters: {}".format(pprint.pformat(filter_indices))) # Search for the nearest penultimate `Convolutional` or `Pooling` layer. if penultimate_layer_idx is None: for idx, layer in utils.reverse_enumerate(model.layers[:layer_idx - 1]): if isinstance(layer, (Convolution2D, _Pooling2D)): penultimate_layer_idx = idx break if penultimate_layer_idx is None: raise ValueError( 'Unable to determine penultimate `Convolution2D` or `Pooling2D` ' 'layer for layer_idx: {}'.format(layer_idx)) assert penultimate_layer_idx < layer_idx losses = [(ActivationMaximization(model.layers[layer_idx], filter_indices), 1)] penultimate_output = model.layers[penultimate_layer_idx].output opt = Optimizer(model.input, losses, wrt=penultimate_output) _, grads, penultimate_output_value = opt.minimize(seed_img, max_iter=1, verbose=False) # We are minimizing loss as opposed to maximizing output as with the paper. # So, negative gradients here mean that they reduce loss, maximizing class probability. grads *= -1 # Average pooling across all feature maps. # This captures the importance of feature map (channel) idx to the output s_idx, c_idx, row_idx, col_idx = utils.get_img_indices() weights = np.mean(grads, axis=(s_idx, row_idx, col_idx)) # Generate heatmap by computing weight * output over feature maps s, ch, rows, cols = utils.get_img_shape(penultimate_output) heatmap = np.ones(shape=(rows, cols), dtype=np.float32) for i, w in enumerate(weights): heatmap += w * penultimate_output_value[utils.slicer[0, i, :, :]] # The penultimate feature map size is definitely smaller than input image. s, ch, rows, cols = utils.get_img_shape(model.input) # TODO: Figure out a way to get rid of open cv dependency. # skimage doesn't deal with arbitrary floating point ranges. heatmap = cv2.resize(heatmap, (cols, rows), interpolation=cv2.INTER_CUBIC) # ReLU thresholding, normalize between (0, 1) heatmap = np.maximum(heatmap, 0) heatmap /= np.max(heatmap) heatmap *= 10 heatmap_colored = heatmap # Convert to heatmap and zero out low probabilities for a cleaner output. #heatmap_colored = np.uint8(cm.jet(heatmap)[..., :3] * 10) #heatmap_colored[np.where(heatmap < 0.2)] = 0 #heatmap_colored = np.uint8(seed_img * alpha + heatmap_colored * (1. - alpha)) return heatmap_colored
# print img.shape # # 20 is the imagenet index corresponding to `ouzel` # grads = visualize_saliency(model, layer_idx, filter_indices=0, seed_input=img[None, :, :, :].transpose(0, 1, 2, 3), grad_modifier='negate') # # visualize grads as heatmap # plt.imshow(grads, cmap='jet') # print grads.shape # plt.show() exit() filter_indices = [1, 2, 3] losses = [(ActivationMaximization(keras_layer, filter_indices), 1), (LPNorm(model.input), 10), (TotalVariation(model.input), 10)] optimizer = Optimizer(model.input, losses) opt_img, grads, _ = optimizer.minimize() print("Finalizou") exit() for layer in model.layers: layers.append(layer.name) for interator in layers: layer_idx = utils.find_layer_idx(model, interator) model.layers[layer_idx].activation = activations.linear model = utils.apply_modifications(model) img = visualize_activation(model, layer_idx, filter_indices=0) imagem = img.swapaxes(0, 2).swapaxes(0, 1)
] #the following code was written to use another library, using a function instead of a class #totalloss=K.variable(0.) #totalloss+=contentLossF(contentBase,layer_dict["block4_conv2"][0, :, :, :]) #print("made content loss") #totalloss+=250*styleLossF(styleBase1,layer_dict["block1_conv1"][0, :, :, :]) #print("made style loss 1") #totalloss+=250*styleLossF(styleBase2,layer_dict["block2_conv1"][0, :, :, :]) #print("made style loss 2") #totalloss+=250*styleLossF(styleBase3,layer_dict["block3_conv1"][0, :, :, :]) #print("made style loss 3") #totalloss+=250*styleLossF(styleBase4,layer_dict["block4_conv1"][0, :, :, :]) #print("loaded losses. Calculating gradients...") #grads = K.gradients(totalloss,trainedModel.input) #print("Calculated gradients...") print("Losses initialized. Initializing optimizer") #first argument are the variables which will be changed by the optimizer and which the output will have partial derivatives taken wrt opt = Optimizer(trainedModel.input,losses) print("Optimizer Initialized. Starting optimization") finaloutput=opt.minimize(seed_img=noise,max_iter=iterations,verbose=True,progress_gif_path='proggif.gif')[0] #finaloutput=deprocess(finaloutput) print("finished") cv2.imshow('window',finaloutput[:, :, ::-1]) cv2.waitKey(0) cv2.destroyAllWindows() #pyplot.imshow(finaloutput) #pyplot.show() print("done with showing")
from vis.losses import ActivationMaximization from vis.regularizers import TotalVariation, LPNorm from vis.optimizer import Optimizer from model import model from keras import activations layer_idx = -1 model.layers[layer_idx].activation = activations.linear model = utils.apply_modifications(model) losses = [(ActivationMaximization(model.layers[-2], 3), 2), (LPNorm(model.input), 10), (TotalVariation(model.input), 10)] opt = Optimizer(model.input, losses) opt.minimize(max_iter=500, verbose=True, callbacks=[GifGenerator('opt_progress')])
def visualize_cam_with_losses(input_tensor, losses, seed_input, penultimate_layer, grad_modifier=None): """Generates a gradient based class activation map (CAM) by using positive gradients of `input_tensor` with respect to weighted `losses`. For details on grad-CAM, see the paper: [Grad-CAM: Why did you say that? Visual Explanations from Deep Networks via Gradient-based Localization] (https://arxiv.org/pdf/1610.02391v1.pdf). Unlike [class activation mapping](https://arxiv.org/pdf/1512.04150v1.pdf), which requires minor changes to network architecture in some instances, grad-CAM has a more general applicability. Compared to saliency maps, grad-CAM is class discriminative; i.e., the 'cat' explanation exclusively highlights cat regions and not the 'dog' region and vice-versa. Args: input_tensor: An input tensor of shape: `(samples, channels, image_dims...)` if `image_data_format= channels_first` or `(samples, image_dims..., channels)` if `image_data_format=channels_last`. losses: List of ([Loss](vis.losses#Loss), weight) tuples. seed_input: The model input for which activation map needs to be visualized. penultimate_layer: The pre-layer to `layer_idx` whose feature maps should be used to compute gradients with respect to filter output. grad_modifier: gradient modifier to use. See [grad_modifiers](vis.grad_modifiers.md). If you don't specify anything, gradients are unchanged (Default value = None) Returns: The normalized gradients of `seed_input` with respect to weighted `losses`. """ penultimate_output = penultimate_layer.output opt = Optimizer(input_tensor, losses, wrt_tensor=penultimate_output, norm_grads=False) _, grads, penultimate_output_value = opt.minimize( seed_input, max_iter=1, grad_modifier=grad_modifier, verbose=False) # For numerical stability. Very small grad values along with small penultimate_output_value can cause # w * penultimate_output_value to zero out, even for reasonable fp precision of float32. grads = grads / (np.max(grads) + K.epsilon()) # Average pooling across all feature maps. # This captures the importance of feature map (channel) idx to the output. channel_idx = 1 if K.image_data_format() == 'channels_first' else -1 other_axis = np.delete(np.arange(len(grads.shape)), channel_idx) weights = np.mean(grads, axis=tuple(other_axis)) # Generate heatmap by computing weight * output over feature maps output_dims = utils.get_img_shape(penultimate_output)[2:] heatmap = np.zeros(shape=output_dims, dtype=K.floatx()) for i, w in enumerate(weights): if channel_idx == -1: heatmap += w * penultimate_output_value[0, ..., i] else: heatmap += w * penultimate_output_value[0, i, ...] # ReLU thresholding to exclude pattern mismatch information (negative gradients). heatmap = np.maximum(heatmap, 0) # The penultimate feature map size is definitely smaller than input image. input_dims = utils.get_img_shape(input_tensor)[2:] # Figure out the zoom factor. zoom_factor = [ i / (j * 1.0) for i, j in iter(zip(input_dims, output_dims)) ] heatmap = zoom(heatmap, zoom_factor) return utils.normalize(heatmap)
def salience_visualization(model, save_directory, conn_name, output_x, output_y, output_f, verbose=True, clas=None, perc_output=1): from vis.visualization import visualize_saliency from vis.losses import ActivationMaximization from vis.optimizer import Optimizer from vis.utils import utils from vis.backprop_modifiers import get from keras import activations if clas != None: output_y = np.zeros(output_y.shape) + clas # Utility to search for layer index by name. # Alternatively we can specify this as -1 since it corresponds to the last layer. #layer_idx = utils.find_layer_idx(model, 'preds') layer_idx = -1 # Swap softmax with linear model.layers[layer_idx].activation = activations.linear model = utils.apply_modifications(model) ### modifier = 'guided' # can be None (AKA vanilla) or 'relu' save_grads_path = os.path.join(save_directory, 'salience', conn_name) if not os.path.isdir(os.path.join(save_directory, 'salience')): os.mkdir(os.path.join(save_directory, 'salience')) if not os.path.isdir(save_grads_path): os.mkdir(save_grads_path) print("Outputting saliency maps") if False: for the_file in os.listdir(save_grads_path): file_path = os.path.join(save_grads_path, the_file) try: if os.path.isfile(file_path): os.unlink(file_path) except Exception as e: print(e) modifier_fn = get(modifier) model = modifier_fn(model) for idx in range(output_x.shape[0]): if verbose: update_progress(float(idx) / output_x.shape[0]) if float(idx) / output_x.shape[0] > perc_output: break #savename = os.path.join(save_grads_path,output_f[idx]) #if os.path.isfile(savename): # continue losses = [(ActivationMaximization(model.layers[layer_idx], int(output_y[idx][0])), -1)] opt = Optimizer(model.input, losses, wrt_tensor=None, norm_grads=False) grads = opt.minimize(seed_input=output_x[idx], max_iter=1, grad_modifier='absolute', verbose=False)[1] for i in range(grads.shape[3]): wc_subfolder = os.path.join(save_grads_path, "wc%d" % i) if not os.path.isdir(wc_subfolder): os.mkdir(wc_subfolder) np.savetxt(os.path.join(wc_subfolder, output_f[idx]), np.squeeze(grads[:, :, :, i]))
class MakeMovie(object): def run(self, args, parser): ''' Load the images from a tub and create a movie from them. Movie ''' if args.tub is None: print("ERR>> --tub argument missing.") parser.print_help() return conf = os.path.expanduser(args.config) if not os.path.exists(conf): print("No config file at location: %s. Add --config to specify\ location or run from dir containing config.py." % conf) return self.cfg = dk.load_config(conf) if args.type is None and args.model is not None: args.type = self.cfg.DEFAULT_MODEL_TYPE print("Model type not provided. Using default model type from config file") if args.salient: if args.model is None: print("ERR>> salient visualization requires a model. Pass with the --model arg.") parser.print_help() if args.type not in ['linear', 'categorical']: print("Model type {} is not supported. Only linear or categorical is supported for salient visualization".format(args.type)) parser.print_help() return self.tub = Tub(args.tub) start = args.start self.end_index = args.end if args.end != -1 else len(self.tub) num_frames = self.end_index - start # Move to the correct offset self.current = 0 self.iterator = self.tub.__iter__() while self.current < start: self.iterator.next() self.current += 1 self.scale = args.scale self.keras_part = None self.do_salient = False self.user = args.draw_user_input if args.model is not None: self.keras_part = get_model_by_type(args.type, cfg=self.cfg) self.keras_part.load(args.model) if args.salient: self.do_salient = self.init_salient(self.keras_part.model) print('making movie', args.out, 'from', num_frames, 'images') clip = mpy.VideoClip(self.make_frame, duration=((num_frames - 1) / self.cfg.DRIVE_LOOP_HZ)) clip.write_videofile(args.out, fps=self.cfg.DRIVE_LOOP_HZ) @staticmethod def draw_line_into_image(angle, throttle, is_left, img, color): import cv2 height, width, _ = img.shape length = height a1 = angle * 45.0 l1 = throttle * length mid = width // 2 + (- 1 if is_left else +1) p1 = tuple((mid - 2, height - 1)) p11 = tuple((int(p1[0] + l1 * math.cos((a1 + 270.0) * DEG_TO_RAD)), int(p1[1] + l1 * math.sin((a1 + 270.0) * DEG_TO_RAD)))) cv2.line(img, p1, p11, color, 2) def draw_user_input(self, record, img): """ Draw the user input as a green line on the image """ user_angle = float(record["user/angle"]) user_throttle = float(record["user/throttle"]) green = (0, 255, 0) self.draw_line_into_image(user_angle, user_throttle, False, img, green) def draw_model_prediction(self, img): """ query the model for it's prediction, draw the predictions as a blue line on the image """ if self.keras_part is None: return expected = tuple(self.keras_part.get_input_shape()[1:]) actual = img.shape # if model expects grey-scale but got rgb, covert if expected[2] == 1 and actual[2] == 3: # normalize image before grey conversion grey_img = rgb2gray(img) actual = grey_img.shape img = grey_img.reshape(grey_img.shape + (1,)) if expected != actual: print(f"expected input dim {expected} didn't match actual dim " f"{actual}") return blue = (0, 0, 255) pilot_angle, pilot_throttle = self.keras_part.run(img) self.draw_line_into_image(pilot_angle, pilot_throttle, True, img, blue) def draw_steering_distribution(self, img): """ query the model for it's prediction, draw the distribution of steering choices """ from donkeycar.parts.keras import KerasCategorical if self.keras_part is None or type(self.keras_part) is not KerasCategorical: return import cv2 pred_img = normalize_image(img) pred_img = pred_img.reshape((1,) + pred_img.shape) angle_binned, _ = self.keras_part.model.predict(pred_img) x = 4 dx = 4 y = 120 - 4 iArgMax = np.argmax(angle_binned) for i in range(15): p1 = (x, y) p2 = (x, y - int(angle_binned[0][i] * 100.0)) if i == iArgMax: cv2.line(img, p1, p2, (255, 0, 0), 2) else: cv2.line(img, p1, p2, (200, 200, 200), 2) x += dx def init_salient(self, model): # Utility to search for layer index by name. # Alternatively we can specify this as -1 since it corresponds to the last layer. first_output_name = None for i, layer in enumerate(model.layers): if first_output_name is None and "dropout" not in layer.name.lower() and "out" in layer.name.lower(): first_output_name = layer.name layer_idx = i if first_output_name is None: print("Failed to find the model layer named with 'out'. Skipping salient.") return False print("####################") print("Visualizing activations on layer:", first_output_name) print("####################") # ensure we have linear activation model.layers[layer_idx].activation = activations.linear # build salient model and optimizer sal_model = utils.apply_modifications(model) modifier_fn = get('guided') sal_model_mod = modifier_fn(sal_model) losses = [ (ActivationMaximization(sal_model_mod.layers[layer_idx], None), -1) ] self.opt = Optimizer(sal_model_mod.input, losses, norm_grads=False) return True def compute_visualisation_mask(self, img): grad_modifier = 'absolute' grads = self.opt.minimize(seed_input=img, max_iter=1, grad_modifier=grad_modifier, verbose=False)[1] channel_idx = 1 if K.image_data_format() == 'channels_first' else -1 grads = np.max(grads, axis=channel_idx) res = utils.normalize(grads)[0] return res def draw_salient(self, img): import cv2 alpha = 0.004 beta = 1.0 - alpha expected = self.keras_part.model.inputs[0].shape[1:] actual = img.shape # check input depth and convert to grey to match expected model input if expected[2] == 1 and actual[2] == 3: grey_img = rgb2gray(img) img = grey_img.reshape(grey_img.shape + (1,)) norm_img = normalize_image(img) salient_mask = self.compute_visualisation_mask(norm_img) z = np.zeros_like(salient_mask) salient_mask_stacked = np.dstack((z, z)) salient_mask_stacked = np.dstack((salient_mask_stacked, salient_mask)) blend = cv2.addWeighted(img.astype('float32'), alpha, salient_mask_stacked, beta, 0.0) return blend def make_frame(self, t): ''' Callback to return an image from from our tub records. This is called from the VideoClip as it references a time. We don't use t to reference the frame, but instead increment a frame counter. This assumes sequential access. ''' if self.current >= self.end_index: return None rec = self.iterator.next() img_path = os.path.join(self.tub.images_base_path, rec['cam/image_array']) image = img_to_arr(Image.open(img_path)) if self.do_salient: image = self.draw_salient(image) image = image * 255 image = image.astype('uint8') if self.user: self.draw_user_input(rec, image) if self.keras_part is not None: self.draw_model_prediction(image) self.draw_steering_distribution(image) if self.scale != 1: import cv2 h, w, d = image.shape dsize = (w * self.scale, h * self.scale) image = cv2.resize(image, dsize=dsize, interpolation=cv2.INTER_CUBIC) self.current += 1 # returns a 8-bit RGB array return image