def main(): # hyper-parameters val_dir = '/path/to/imagenet/val/' batch_size = 1 num_workers = 4 batch = 0 model_type = "vgg" saliency_type = 'group_cam' # sample_range = range(5 * batch, 5 * (batch + 1)) sample_range = range(1 * batch, 1 * (batch + 1)) vgg = models.vgg19(pretrained=True).eval() vgg = vgg.cuda() cam = GroupCAM(vgg, target_layer='features.35', groups=32) normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) val_loader = DataLoader( datasets.ImageFolder(val_dir, transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ])), batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True, sampler=RangeSampler(sample_range) ) images, exp = explain_all(val_loader, explainer=cam) # Function that blurs input image blur = lambda x: gaussian_blur2d(x, kernel_size=(51, 51), sigma=(50., 50.)) # Evaluate a batch of explanations insertion = CausalMetric(vgg, 'ins', 224 * 2, substrate_fn=blur) deletion = CausalMetric(vgg, 'del', 224 * 2, substrate_fn=torch.zeros_like) scores = {'del': [], 'ins': []} del_tmps = [] ins_tmps = [] # Load saved batch of explanations for i in tqdm(range(len(images)), total=len(images), desc='Evaluating Saliency'): # Evaluate deletion del_score = deletion.evaluate(img=images[i], mask=exp[i], cls_idx=None, verbose=0) ins_score = insertion.evaluate(img=images[i], mask=exp[i], cls_idx=None, verbose=0) del_tmps.append(del_score) ins_tmps.append(ins_score) scores['del'].append(auc(del_score)) scores['ins'].append(auc(ins_score)) print('----------------------------------------------------------------') print('Final:\nDeletion - {:.5f}\nInsertion - {:.5f}'.format(np.mean(scores['del']), np.mean(scores['ins'])))
def forward(self, x: torch.Tensor) -> Tuple[List, List, List]: # type: ignore bs, ch, h, w = x.size() cur_level, cur_sigma, pixel_distance = self.get_first_level(x) sigmas = [ cur_sigma * torch.ones(bs, self.n_levels + self.extra_levels).to( x.device).to(x.dtype) ] pixel_dists = [ pixel_distance * torch.ones( bs, self.n_levels + self.extra_levels).to(x.device).to(x.dtype) ] pyr = [[cur_level]] oct_idx = 0 while True: cur_level = pyr[-1][0] for level_idx in range(1, self.n_levels + self.extra_levels): sigma = cur_sigma * math.sqrt(self.sigma_step**2 - 1.0) ksize = self.get_kernel_size(sigma) # Hack, because PyTorch does not allow to pad more than original size. # But for the huge sigmas, one needs huge kernel and padding... ksize = min(ksize, min(cur_level.size(2), cur_level.size(3))) if ksize % 2 == 0: ksize += 1 cur_level = gaussian_blur2d(cur_level, (ksize, ksize), (sigma, sigma)) cur_sigma *= self.sigma_step pyr[-1].append(cur_level) sigmas[-1][:, level_idx] = cur_sigma pixel_dists[-1][:, level_idx] = pixel_distance _pyr = pyr[-1][-self.extra_levels] nextOctaveFirstLevel = F.interpolate( _pyr, size=(_pyr.size(-2) // 2, _pyr.size(-1) // 2), mode='nearest') # Nearest matches OpenCV SIFT pixel_distance *= 2.0 cur_sigma = self.init_sigma if min(nextOctaveFirstLevel.size(2), nextOctaveFirstLevel.size(3)) <= self.min_size: break pyr.append([nextOctaveFirstLevel]) sigmas.append( cur_sigma * torch.ones(bs, self.n_levels + self.extra_levels).to(x.device)) pixel_dists.append( pixel_distance * torch.ones(bs, self.n_levels + self.extra_levels).to(x.device)) oct_idx += 1 for i in range(len(pyr)): pyr[i] = torch.stack(pyr[i], dim=2) # type: ignore return pyr, sigmas, pixel_dists
def main(): args = parse_args() raw_img = cv2.imread(args.input, 1) raw_img = cv2.resize(raw_img, (224, 224), interpolation=cv2.INTER_LINEAR) raw_img = np.float32(raw_img) / 255 image, norm_image = preprocess_img(raw_img) model = models.__dict__[args.arch](pretrained=True).eval() model = model.cuda() gc = GradCAM(model, target_layer=args.target_layer) heatmap = gc(norm_image.cuda(), class_idx=args.cls_idx).cpu().data cam = show_cam(image, heatmap, args.output) if args.ins_del: blur = lambda x: gaussian_blur2d(x, kernel_size=(51, 51), sigma=(50., 50.)) insertion = CausalMetric(model, 'ins', 224 * 2, substrate_fn=blur) deletion = CausalMetric(model, 'del', 224 * 2, substrate_fn=torch.zeros_like) out_video_path = './VIDEO' check_path_exist(out_video_path) ins_path = os.path.join(os.path.join(out_video_path, "ins")) del_path = os.path.join(os.path.join(out_video_path, "del")) check_path_exist(ins_path) check_path_exist(del_path) norm_image = norm_image.cpu() heatmap = heatmap.cpu().numpy() ins_score = insertion.evaluate(norm_image, mask=heatmap, cls_idx=None, save_to=ins_path) del_score = deletion.evaluate(norm_image, mask=heatmap, cls_idx=None, save_to=del_path) print("\nDeletion - {:.5f}\nInsertion - {:.5f}".format(auc(del_score), auc(ins_score))) # generate video video_ins = os.path.join(ins_path, args.input.split('/')[-1].split('.')[0] + '.avi') video_del = os.path.join(del_path, args.input.split('/')[-1].split('.')[0] + '.avi') cmd_str_ins = 'ffmpeg -f image2 -i {}/%06d.jpg -b 5000k -r 30 -c:v mpeg4 {} -y'.format(ins_path, video_ins) cmd_str_del = 'ffmpeg -f image2 -i {}/%06d.jpg -b 5000k -r 30 -c:v mpeg4 {} -y'.format(del_path, video_del) os.system(cmd_str_ins) os.system(cmd_str_del)
def get_first_level(self, input): pixel_distance = 1.0 cur_sigma = 0.5 # Same as in OpenCV up to interpolation difference if self.double_image: x = F.interpolate(input, scale_factor=2.0, mode='bilinear', align_corners=False) pixel_distance = 0.5 cur_sigma *= 2.0 else: x = input if self.init_sigma > cur_sigma: sigma = max(math.sqrt(self.init_sigma**2 - cur_sigma**2), 0.01) ksize = self.get_kernel_size(sigma) cur_level = gaussian_blur2d(x, (ksize, ksize), (sigma, sigma)) cur_sigma = self.init_sigma else: cur_level = x return cur_level, cur_sigma, pixel_distance
import torch.nn as nn import torch.nn.functional as F import torch.nn.parallel import torch.optim import torch.utils.data import torch.utils.data.distributed import torchvision.datasets as datasets import torchvision.transforms as transforms from kornia.filters.gaussian import gaussian_blur2d from cam import GroupCAM # import torchvision.models as models import backbones as models # Function that blurs input image blur = lambda x: gaussian_blur2d(x, kernel_size=(51, 51), sigma=(50., 50.)) model_names = sorted(name for name in models.__dict__ if name.islower() and not name.startswith("__") and callable(models.__dict__[name])) parser = argparse.ArgumentParser( description= 'An example of adopting group-cam to fine-tune classification models') parser.add_argument('data', metavar='DIR', help='path to dataset') parser.add_argument('-a', '--arch', metavar='ARCH', default='resnet18', choices=model_names, help='model architecture: ' + ' | '.join(model_names) +
def forward(self, x, class_idx=None, retain_graph=False): input = x.clone() input = input.cuda() b, c, h, w = input.size() logit = self.model(input) if class_idx is None: predicted_class = logit.max(1)[-1] score = logit[:, logit.max(1)[-1]].squeeze() else: predicted_class = torch.LongTensor([class_idx]) score = logit[:, class_idx].squeeze() predicted_class = predicted_class.cuda() self.model.zero_grad() score.backward(retain_graph=retain_graph) gradients = self.gradients['value'].data activations = self.activations['value'].data b, k, u, v = activations.size() alpha = gradients.view(b, k, -1).mean(2) weights = alpha.view(b, k, 1, 1) activations = weights * activations masks = activations.chunk(self.groups, 1) # parallel implement masks = torch.cat(masks, dim=0) saliency_map = masks.sum(1, keepdim=True) saliency_map = F.relu(saliency_map) threshold = np.percentile(saliency_map.cpu().numpy(), 70) saliency_map = torch.where(saliency_map > threshold, saliency_map, torch.full_like(saliency_map, 0)) saliency_map = F.interpolate(saliency_map, size=(h, w), mode='bilinear', align_corners=False) saliency_map = saliency_map.reshape(self.groups, -1) inter_min, inter_max = saliency_map.min( dim=-1, keepdim=True)[0], saliency_map.max(dim=-1, keepdim=True)[0] saliency_map = (saliency_map - inter_min) / (inter_max - inter_min) saliency_map = saliency_map.reshape(self.groups, 1, h, w) with torch.no_grad(): blur_input = input * saliency_map + gaussian_blur2d(input) * ( 1 - saliency_map) output = self.model(blur_input) output = F.softmax(output, dim=-1) score = output[:, predicted_class].unsqueeze(-1).unsqueeze(-1) score_saliency_map = torch.sum(saliency_map * score, dim=0, keepdim=True) score_saliency_map = F.relu(score_saliency_map) score_saliency_map_min, score_saliency_map_max = score_saliency_map.min( ), score_saliency_map.max() if score_saliency_map_min == score_saliency_map_max: return None score_saliency_map = (score_saliency_map - score_saliency_map_min) / ( score_saliency_map_max - score_saliency_map_min).data return score_saliency_map
def canny( input: torch.Tensor, low_threshold: float = 0.1, high_threshold: float = 0.2, kernel_size: Tuple[int, int] = (5, 5), sigma: Tuple[float, float] = (1, 1), hysteresis: bool = True, eps: float = 1e-6, ) -> Tuple[torch.Tensor, torch.Tensor]: r"""Finds edges of the input image and filters them using the Canny algorithm. .. image:: _static/img/canny.png Args: input: input image tensor with shape :math:`(B,C,H,W)`. low_threshold: lower threshold for the hysteresis procedure. high_threshold: upper threshold for the hysteresis procedure. kernel_size: the size of the kernel for the gaussian blur. sigma: the standard deviation of the kernel for the gaussian blur. hysteresis: if True, applies the hysteresis edge tracking. Otherwise, the edges are divided between weak (0.5) and strong (1) edges. eps: regularization number to avoid NaN during backprop. Returns: - the canny edge magnitudes map, shape of :math:`(B,1,H,W)`. - the canny edge detection filtered by thresholds and hysteresis, shape of :math:`(B,1,H,W)`. Example: >>> input = torch.rand(5, 3, 4, 4) >>> magnitude, edges = canny(input) # 5x3x4x4 >>> magnitude.shape torch.Size([5, 1, 4, 4]) >>> edges.shape torch.Size([5, 1, 4, 4]) """ if not isinstance(input, torch.Tensor): raise TypeError("Input type is not a torch.Tensor. Got {}".format(type(input))) if not len(input.shape) == 4: raise ValueError("Invalid input shape, we expect BxCxHxW. Got: {}".format(input.shape)) if low_threshold > high_threshold: raise ValueError( "Invalid input thresholds. low_threshold should be smaller than the high_threshold. Got: {}>{}".format( low_threshold, high_threshold ) ) if low_threshold < 0 and low_threshold > 1: raise ValueError( "Invalid input threshold. low_threshold should be in range (0,1). Got: {}".format(low_threshold) ) if high_threshold < 0 and high_threshold > 1: raise ValueError( "Invalid input threshold. high_threshold should be in range (0,1). Got: {}".format(high_threshold) ) device: torch.device = input.device dtype: torch.dtype = input.dtype # To Grayscale if input.shape[1] == 3: input = rgb_to_grayscale(input) # Gaussian filter blurred: torch.Tensor = gaussian_blur2d(input, kernel_size, sigma) # Compute the gradients gradients: torch.Tensor = spatial_gradient(blurred, normalized=False) # Unpack the edges gx: torch.Tensor = gradients[:, :, 0] gy: torch.Tensor = gradients[:, :, 1] # Compute gradient magnitude and angle magnitude: torch.Tensor = torch.sqrt(gx * gx + gy * gy + eps) angle: torch.Tensor = torch.atan2(gy, gx) # Radians to Degrees angle = rad2deg(angle) # Round angle to the nearest 45 degree angle = torch.round(angle / 45) * 45 # Non-maximal suppression nms_kernels: torch.Tensor = get_canny_nms_kernel(device, dtype) nms_magnitude: torch.Tensor = F.conv2d(magnitude, nms_kernels, padding=nms_kernels.shape[-1] // 2) # Get the indices for both directions positive_idx: torch.Tensor = (angle / 45) % 8 positive_idx = positive_idx.long() negative_idx: torch.Tensor = ((angle / 45) + 4) % 8 negative_idx = negative_idx.long() # Apply the non-maximum suppresion to the different directions channel_select_filtered_positive: torch.Tensor = torch.gather(nms_magnitude, 1, positive_idx) channel_select_filtered_negative: torch.Tensor = torch.gather(nms_magnitude, 1, negative_idx) channel_select_filtered: torch.Tensor = torch.stack( [channel_select_filtered_positive, channel_select_filtered_negative], 1 ) is_max: torch.Tensor = channel_select_filtered.min(dim=1)[0] > 0.0 magnitude = magnitude * is_max # Threshold edges: torch.Tensor = F.threshold(magnitude, low_threshold, 0.0) low: torch.Tensor = magnitude > low_threshold high: torch.Tensor = magnitude > high_threshold edges = low * 0.5 + high * 0.5 edges = edges.to(dtype) # Hysteresis if hysteresis: edges_old: torch.Tensor = -torch.ones(edges.shape, device=edges.device, dtype=dtype) hysteresis_kernels: torch.Tensor = get_hysteresis_kernel(device, dtype) while ((edges_old - edges).abs() != 0).any(): weak: torch.Tensor = (edges == 0.5).float() strong: torch.Tensor = (edges == 1).float() hysteresis_magnitude: torch.Tensor = F.conv2d( edges, hysteresis_kernels, padding=hysteresis_kernels.shape[-1] // 2 ) hysteresis_magnitude = (hysteresis_magnitude == 1).any(1, keepdim=True).to(dtype) hysteresis_magnitude = hysteresis_magnitude * weak + strong edges_old = edges.clone() edges = hysteresis_magnitude + (hysteresis_magnitude == 0) * weak * 0.5 edges = hysteresis_magnitude return magnitude, edges
def main(): args = parse_args() raw_img = cv2.imread(args.input, 1) raw_img = cv2.resize(raw_img, (224, 224), interpolation=cv2.INTER_LINEAR) raw_img = np.float32(raw_img) / 255 image, norm_image = preprocess_img(raw_img) model = models.__dict__[args.arch](pretrained=True).eval() model = model.cuda() rise = RISE(model, input_size=(224, 224), batch_size=40) rise.generate_masks() gd = GradCAM(model, target_layer=args.target_layer) gc = GroupCAM(model, target_layer=args.target_layer) rise_heatmap = rise(norm_image.cuda(), class_idx=args.cls_idx).cpu().data gd_heatmap = gd(norm_image.cuda(), class_idx=args.cls_idx).cpu().data gc_heatmap = gc(norm_image.cuda(), class_idx=args.cls_idx).cpu().data if args.output is not None: rise_cam = show_cam(image, rise_heatmap, "rise_base.png") gd_cam = show_cam(image, gd_heatmap, "gd_base.png") gc_cam = show_cam(image, gc_heatmap, "gc_base.png") if args.ins_del: blur = lambda x: gaussian_blur2d( x, kernel_size=(51, 51), sigma=(50., 50.)) insertion = CausalMetric(model, 'ins', 224 * 2, substrate_fn=blur) deletion = CausalMetric(model, 'del', 224 * 2, substrate_fn=torch.zeros_like) norm_image = norm_image.cpu() gd_heatmap = gd_heatmap.cpu().numpy() gc_heatmap = gc_heatmap.cpu().numpy() rise_heatmap = rise_heatmap.cpu().numpy() gc_ins_score = insertion.evaluate(norm_image, mask=gc_heatmap, cls_idx=None) gd_ins_score = insertion.evaluate(norm_image, mask=gd_heatmap, cls_idx=None) rise_ins_score = insertion.evaluate(norm_image, mask=rise_heatmap, cls_idx=None) gc_del_score = deletion.evaluate(norm_image, mask=gc_heatmap, cls_idx=None) gd_del_score = deletion.evaluate(norm_image, mask=gd_heatmap, cls_idx=None) rise_del_score = deletion.evaluate(norm_image, mask=rise_heatmap, cls_idx=None) legend = ["RISE", "Grad-CAM", "Group-CAM"] ins_scores = [ auc(rise_ins_score), auc(gd_ins_score), auc(gc_ins_score) ] del_scores = [ auc(rise_del_score), auc(gd_del_score), auc(gc_del_score) ] ins_scores = [round(i * 100, 2) for i in ins_scores] del_scores = [round(i * 100, 2) for i in del_scores] ins_legend = [i + ": " + str(j) for i, j in zip(legend, ins_scores)] del_legend = [i + ": " + str(j) for i, j in zip(legend, del_scores)] n_steps = len(gd_ins_score) x = np.arange(n_steps) / n_steps plt.figure(figsize=(12, 5)) plt.xlim(-0.1, 1.1) plt.ylim(0, 1.05) plt.subplot(121) plt.plot(x, rise_ins_score) plt.plot(x, gd_ins_score) plt.plot(x, gc_ins_score) plt.xticks(fontsize=15) plt.yticks(fontsize=15) plt.legend(ins_legend, loc='best', fontsize=15) plt.title("Insertion Curve", fontsize=15) plt.subplot(122) plt.plot(x, rise_del_score) plt.plot(x, gd_del_score) plt.plot(x, gc_del_score) plt.xticks(fontsize=15) plt.yticks(fontsize=15) plt.legend(del_legend, loc='best', fontsize=15) plt.title("Deletion Curve", fontsize=15) plt.show()