def show_gradcam(tensor, model): # Feedforward image, calculate GradCAM and gather heatmap if model.__class__.__name__ == 'ResNet': target_layer = model.layer4[-1].conv2 gradcam = GradCAM(model, target_layer) elif model.__class__.__name__ == 'DenseNet': target_layer = model.features.norm5 gradcam = GradCAM(model, target_layer) elif model.__class__.__name__ == 'DataParallel': target_layer = model.module.densenet121.features.norm5 gradcam = GradCAM(model.module.densenet121, target_layer) else: raise ValueError('improper model') mask, _ = gradcam(tensor) heatmap, _ = visualize_cam(mask, tensor) # heatmap from torch.tensor to numpy.array mask = mask[0].permute(1, 2, 0).detach().cpu().numpy() heatmap = heatmap.permute(1, 2, 0).numpy() return heatmap, mask
def main(): args = get_args() root_dir = args.root_dir imgs = list(os.walk(root_dir))[0][2] save_dir = args.save_dir num_classes = 100 # CIFAR100 model = ResNet.resnet(arch='resnet50', pretrained=False, num_classes=num_classes, use_att=args.use_att, att_mode=args.att_mode) #model = nn.DataParallel(model) #print(model) if args.resume: if os.path.isfile(args.resume): print(f'=> loading checkpoint {args.resume}') checkpoint = torch.load(args.resume) best_acc5 = checkpoint['best_acc5'] model.load_state_dict(checkpoint['state_dict'], strict=False) print(f"=> loaded checkpoint {args.resume} (epoch {checkpoint['epoch']})") print(f'=> best accuracy {best_acc5}') else: print(f'=> no checkpoint found at {args.resume}') model_dict = get_model_dict(model, args.type) normalizer = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) for img_name in imgs: img_path = os.path.join(root_dir, img_name) pil_img = PIL.Image.open(img_path) torch_img = torch.from_numpy(np.asarray(pil_img)) torch_img = torch_img.permute(2, 0, 1).unsqueeze(0) torch_img = torch_img.float().div(255) torch_img = F.interpolate(torch_img, size=(224, 224), mode='bilinear', align_corners=False) normalized_torch_img = normalizer(torch_img) gradcam = GradCAM(model_dict, True) gradcam_pp = GradCAMpp(model_dict, True) mask, _ = gradcam(normalized_torch_img) heatmap, result = visualize_cam(mask, torch_img) mask_pp, _ = gradcam_pp(normalized_torch_img) heatmap_pp, result_pp = visualize_cam(mask_pp, torch_img) images = torch.stack([torch_img.squeeze().cpu(), heatmap, heatmap_pp, result, result_pp], 0) images = make_grid(images, nrow=1) if args.use_att: save_dir = os.path.join(args.save_dir, 'att') else: save_dir = os.path.join(args.save_dir, 'no_att') os.makedirs(save_dir, exist_ok=True) output_name = img_name output_path = os.path.join(save_dir, output_name) save_image(images, output_path)
def plot_gradcam_images(model, layers, image_list, classes, figsize=(23, 33), sub_plot_rows=9, sub_plot_cols=3, image_count=25): fig = plt.figure(figsize=figsize) for i in range(image_count): heat_map_image = [image_list[i][0].cpu() / 2 + 0.5] result_image = [image_list[i][0].cpu() / 2 + 0.5] for model_layer in layers: grad_cam = GradCAM(model, model_layer) mask, _ = grad_cam(image_list[i][0].clone().unsqueeze_(0)) heatmap, result = visualize_cam( mask, image_list[i][0].clone().unsqueeze_(0) / 2 + 0.5) heat_map_image.extend([heatmap]) result_image.extend([result]) grid_image = make_grid(heat_map_image + result_image, nrow=len(layers) + 1, pad_value=1) npimg = grid_image.numpy() sub = fig.add_subplot(sub_plot_rows, sub_plot_cols, i + 1) plt.imshow(np.transpose(npimg, (1, 2, 0))) sub.set_title('P = ' + classes[int(image_list[i][1])] + " A = " + classes[int(image_list[i][2])], fontweight="bold", fontsize=18) sub.axis("off") plt.tight_layout() fig.subplots_adjust(wspace=0)
def DisplayMisclassifiedGradCamImages(model, model_type, layer, misclassified_indexes, device, classes): gradcam = GradCAM.from_config( **dict(model_type=model_type, arch=model, layer_name=layer)) x, y = 0, 0 fig, axs = plt.subplots(5, 5, figsize=(20, 20)) plt.setp(axs, xticks=[], yticks=[]) fig.subplots_adjust(wspace=0.7) images = list(misclassified_indexes.items())[:25] for index, results in images: img = results['data'] img = torch.from_numpy(img) actual_class = classes[results['actual']] predicted_class = classes[results['predicted']] mask, _ = gradcam(img[np.newaxis, :].to(device)) heatmap, result = visualize_cam(mask, img[np.newaxis, :]) result = np.transpose(result.cpu().numpy(), (1, 2, 0)) axs[x, y].imshow(result) axs[x, y].set_title('Actual Class:' + str(actual_class) + "\nPredicted class: " + str(predicted_class)) if y == 4: x += 1 y = 0 else: y += 1
def DisplayGradCamImages(model, model_type, layer, dataloader, classes, device, count=10): gradcam = GradCAM.from_config( **dict(model_type=model_type, arch=model, layer_name=layer)) dataiter = iter(dataloader) images, labels = dataiter.next() outputs = model(images.to(device)) _, predicted = torch.max(outputs.data, 1) for i in range(count): imagestodisplay = [] mask, _ = gradcam(images[i][np.newaxis, :].to(device)) heatmap, result = visualize_cam(mask, images[i][np.newaxis, :]) imagestodisplay.extend([images[i].cpu(), heatmap, result]) grid_image = make_grid(imagestodisplay, nrow=3) plt.figure(figsize=(20, 20)) plt.imshow(np.transpose(grid_image, (1, 2, 0))) plt.show() print( f"Prediction : {classes[predicted[i]]}, Actual : {classes[labels[i]]}" )
def grad_cam(model, x_batch): gradcam = GradCAM.from_config(arch=model._modules['resnet'], model_type='resnet', layer_name='7') mask, _ = gradcam(x_batch) heatmap, result = visualize_cam(mask, x_batch) result = result.numpy().transpose(1, 2, 0) return heatmap, result
def main(args): np.random.seed(args.seed) use_cuda = args.cuda and torch.cuda.is_available() device = 'cuda' if use_cuda else 'cpu' model = FactorVAE(args.z_dim).to(device) model_found = load_checkpoint(model, args.dir, args.name, device) if not model_found: return gcam = GradCAM(model.encode, args.target_layer, device, args.image_size) _, dataset = return_data(args) input = dataset[np.arange(0, args.sample_count)][0].to(device) recon, mu, logvar, z = model(input) input, recon = input.repeat(1, 3, 1, 1), recon.repeat(1, 3, 1, 1) maps = gcam.generate(z) maps = maps.transpose(0,1) first_cam, second_cam = [], [] for map in maps: response = map.flatten(1).sum(1) argmax = torch.argmax(response).item() first_cam.append(normalize_tensor(map[argmax])) response = torch.cat((response[:argmax], response[argmax+1:])) second_cam.append(normalize_tensor(map[torch.argmax(response).item()])) first_cam = ((torch.stack(first_cam, axis=1)).transpose(0,1)).unsqueeze(1) second_cam = ((torch.stack(second_cam, axis=1)).transpose(0,1)).unsqueeze(1) input, recon, first_cam, second_cam = process_imgs(input.detach(), recon.detach(), first_cam.detach(), second_cam.detach(), args.sample_count) heatmap = add_heatmap(input, first_cam) heatmap2 = add_heatmap(input, second_cam) input = np.uint8(np.asarray(input, dtype=np.float)*255) recon = np.uint8(np.asarray(recon, dtype=np.float)*255) grid = np.concatenate((input, heatmap, heatmap2)) cv2.imshow('Attention Maps of ' + args.name, grid) cv2.waitKey(0)
def grad_cam(model, model_type, layer_name, normed_torch_img, torch_img): config = dict(model_type=model_type, arch=model, layer_name=layer_name) config['arch'].eval() #.to(device) cam = GradCAM.from_config(**config) mask, _ = cam(normed_torch_img) heatmap, result = visualize_cam(mask, torch_img) return (transforms.ToPILImage()(result))
def gradcam(model, img_orig, img_b, pred, conf, label_max): img_encs = [] for lbl_index in range(label_max): if pred[lbl_index] > conf: cam = GradCAM(model, lbl_index) heatmap = cam.compute_heatmap(np.array([img_b])) heatmap = cv2.resize(heatmap, (img_orig.shape[1], img_orig.shape[0])) (heatmap, output) = cam.overlay_heatmap(heatmap, img_orig, alpha=0.5) _, out_enc = cv2.imencode(".jpg", output) out_enc = base64.b64encode(out_enc).decode('ascii') img_encs.append(out_enc) else: img_encs.append(None) return img_encs
def train(train_loader, model, criterion, sam_criterion, sam_criterion_outer, epoch, optimizer): global best_metrics_train metrics_holder = MetricsHolder(TRAIN_AMOUNT) # switch to train mode model.train() th = 0.5 sigmoid = nn.Sigmoid() for i, dictionary in enumerate(train_loader): input_img = dictionary['image'] target = dictionary['label'] segm = dictionary['segm'] if is_server: input_img = input_img.cuda(args.cuda_device) target = target.cuda(args.cuda_device) segm = segm.cuda(args.cuda_device) # get gradcam mask + compute output target_layer = model.layer4 gradcam = GradCAM(model, target_layer=target_layer) gc_mask, no_norm_gc_mask, output, sam_output = gradcam( input_img, retain_graph=True) # calculate loss loss_main = criterion(output, target) loss_add = calculate_and_choose_additional_loss( segm, sam_output, sam_criterion, sam_criterion_outer) loss_comb = loss_main + loss_add metrics_holder.update_losses(loss_add=loss_add, loss_main=loss_main, loss_comb=loss_comb) # update classification metrics activated_output = (sigmoid(output.data) > th).float() metrics_holder.update_expected_predicted(target=target, output=activated_output) # calculate and update SAM and gradcam metrics metrics_holder.update_gradcam_metrics( *calculate_gradcam_metrics(no_norm_gc_mask, segm)) metrics_holder.update_sam_metrics( *calculate_sam_metrics(sam_output, segm)) optimizer.zero_grad() loss_comb.backward() optimizer.step() if i % args.print_freq == 0: print(f'Train: [{epoch}][{i}/{len(train_loader)}]') metrics_holder.calculate_all_metrcis() best_metrics_train.update(metrics_holder) wandb_log("trn", epoch, metrics_holder)
def validate(val_loader, model, criterion, sam_criterion, sam_criterion_outer, epoch): global best_metrics_val metrics_holder = MetricsHolder(VAL_AMOUNT) th = 0.5 sigmoid = nn.Sigmoid() # switch to evaluate mode model.eval() for i, dictionary in enumerate(val_loader): input_img = dictionary['image'] target = dictionary['label'] segm = dictionary['segm'] if is_server: input_img = input_img.cuda(args.cuda_device) target = target.cuda(args.cuda_device) segm = segm.cuda(args.cuda_device) # get gradcam mask + compute output target_layer = model.layer4 gradcam = GradCAM(model, target_layer=target_layer) gc_mask, no_norm_gc_mask, output, sam_output = gradcam(input_img) # calculate loss and update its metrics loss_main = criterion(output, target) loss_add = calculate_and_choose_additional_loss( segm, sam_output, sam_criterion, sam_criterion_outer) loss_comb = loss_main + loss_add metrics_holder.update_losses(loss_add=loss_add, loss_main=loss_main, loss_comb=loss_comb) # update classification metrics activated_output = (sigmoid(output.data) > th).float() metrics_holder.update_expected_predicted(target=target, output=activated_output) # calculate and update SAM and gradcam metrics metrics_holder.update_gradcam_metrics( *calculate_gradcam_metrics(no_norm_gc_mask, segm)) metrics_holder.update_sam_metrics( *calculate_sam_metrics(sam_output, segm)) if i % args.print_freq == 0: print(f'Validate: [{epoch}][{i}/{len(val_loader)}]') metrics_holder.calculate_all_metrcis() best_metrics_val.update(metrics_holder) wandb_log("val", epoch, metrics_holder)
def __init__(self, weights=None, model_metadata=None): super().__init__(weights, model_metadata) # Se carga la arquitectura de una DenseNet169 desde torchvision. Adicionalmente, se carga los preentrenados # disponibles para esta arquitectura. Finalmente, se dispone de este modelo en la CPU. Nota: Esto debe ajustarse # para disponer de la posibilidad de alojar el modelo y sus pesos en la GPU si esta se encuentra disponible. self.model = models.densenet169(pretrained=True).cpu() # Se cargan los pre entrenados pretrained_net = torch.load(weights, map_location='cpu') self.model.load_state_dict(pretrained_net) self.model.eval() self.gradcam = GradCAM.from_config(model_type='densenet', arch=self.model, layer_name='features_norm5')
def generate_saliency_map(img, img_name): start = time.time() normalizer = Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) torch_img = torch.from_numpy(np.asarray(img)).permute( 2, 0, 1).unsqueeze(0).float().div(255) torch_img = F.upsample(torch_img, size=(512, 512), mode='bilinear', align_corners=False) normed_torch_img = normalizer(torch_img) resnet = models.resnet101(pretrained=True) resnet.eval() cam_dict = dict() model_dict = dict(type='resnet', arch=resnet, layer_name='layer4', input_size=(512, 512)) gradcam = GradCAM(model_dict, True) gradcam_pp = GradCAMpp(model_dict, True) images = [] mask, _ = gradcam(normed_torch_img) heatmap, result = visualize_cam(mask, torch_img) mask_pp, _ = gradcam_pp(normed_torch_img) heatmap_pp, result_pp = visualize_cam(mask_pp, torch_img) images.append( torch.stack([ torch_img.squeeze().cpu(), heatmap, heatmap_pp, result, result_pp ], 0)) images = make_grid(torch.cat(images, 0), nrow=1) # Only going to use result_pp output_dir = 'outputs' os.makedirs(output_dir, exist_ok=True) output_name = img_name output_path = os.path.join(output_dir, output_name) save_image(result_pp, output_path) end = time.time() duration = round(end - start, 2) return output_path
def Grad_Cam(model, train_datasets): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model.eval() target_layer = model.features gradcam = GradCAM(model, target_layer) gradcam_pp = GradCAMpp(model, target_layer) images = [] for i in range(10): index = random.randint(0, 212) first_inputs, _ = train_datasets.__getitem__(index) inputs = first_inputs.to(device).unsqueeze(0) mask, _ = gradcam(inputs) heatmap, result = visualize_cam(mask, first_inputs) mask_pp, _ = gradcam_pp(inputs) heatmap_pp, result_pp = visualize_cam(mask_pp, first_inputs) images.extend([first_inputs.cpu(), heatmap, heatmap_pp, result, result_pp]) grid_image = make_grid(images, nrow=5) return transforms.ToPILImage()(grid_image)
print("Image {}".format(f)) input_image_name = f # Our input image to process save_prefix = os.path.split(os.path.splitext(input_image_name)[0])[-1] # Chop the file extension and path load_image_name = os.path.join(input_dir, input_image_name) os.makedirs(output_dir, exist_ok=True) # Lets load in our image. We will do a simple resize on it. in_tensor = misc.LoadImageToTensor(load_image_name, device) in_tensor = F.interpolate(in_tensor, size=(in_height, in_width), mode='bilinear', align_corners=False) # Now, lets get the Grad-CAM++ saliency map only. resnet_gradcam = GradCAM.from_config(model_type='resnet', arch=model, layer_name='layer4') cam_map, logit = resnet_gradcam(in_tensor) # Create our saliency map object. We hand it our Torch model and names for the layers we want to tap. get_salmap = maps.SaliencyModel(model, layers, output_size=[in_height,in_width], weights=weights, norm_method=norm_method) # Get Forward sal map csmap,smaps,_ = get_salmap(in_tensor) # Let's get our original input image back. We will just use this one for visualization. raw_tensor = misc.LoadImageToTensor(load_image_name, device, norm=False) raw_tensor = F.interpolate(raw_tensor, size=(in_height, in_width), mode='bilinear', align_corners=False)
def make_plot_and_save(input_img, img_name, no_norm_image, segm, model, train_or_val, epoch=None, vis_prefix=None): global is_server # get Grad-CAM results and prepare them to show on the plot target_layer = model.layer4 gradcam = GradCAM(model, target_layer=target_layer) gradcam_pp = GradCAMpp(model, target_layer=target_layer) # sam_output shapes: # [1, 1, 56, 56]x3 , [1, 1, 28, 28]x4 [1, 1, 14, 14]x6 , [1, 1, 7, 7]x3 mask, no_norm_mask, logit, sam_output = gradcam(input_img) sam1_show = torch.squeeze(sam_output[0].cpu()).detach().numpy() sam4_show = torch.squeeze(sam_output[3].cpu()).detach().numpy() sam8_show = torch.squeeze(sam_output[7].cpu()).detach().numpy() sam14_show = torch.squeeze(sam_output[13].cpu()).detach().numpy() heatmap, result = visualize_cam(mask, no_norm_image) result_show = np.moveaxis(torch.squeeze(result).detach().numpy(), 0, -1) mask_pp, no_norm_mask_pp, logit_pp, sam_output_pp = gradcam_pp(input_img) heatmap_pp, result_pp = visualize_cam(mask_pp, no_norm_image) result_pp_show = np.moveaxis( torch.squeeze(result_pp).detach().numpy(), 0, -1) # prepare mask and original image to show on the plot segm_show = torch.squeeze(segm.cpu()).detach().numpy() segm_show = np.moveaxis(segm_show, 0, 2) input_show = np.moveaxis( torch.squeeze(no_norm_image).detach().numpy(), 0, -1) # draw and save the plot plt.close('all') fig, axs = plt.subplots(nrows=2, ncols=6, figsize=(24, 9)) plt.suptitle(f'{train_or_val}-Image: {img_name}') axs[1][0].imshow(segm_show) axs[1][0].set_title('Mask') axs[0][0].imshow(input_show) axs[0][0].set_title('Original Image') axs[0][1].imshow(result_show) axs[0][1].set_title('Grad-CAM') axs[1][1].imshow(result_pp_show) axs[1][1].set_title('Grad-CAM++') axs[1][2].imshow(sam1_show, cmap='gray') axs[1][2].set_title('SAM-1 relative') axs[0][2].imshow(sam1_show, vmin=0., vmax=1., cmap='gray') axs[0][2].set_title('SAM-1 absolute') axs[1][3].imshow(sam4_show, cmap='gray') axs[1][3].set_title('SAM-4 relative') axs[0][3].imshow(sam4_show, vmin=0., vmax=1., cmap='gray') axs[0][3].set_title('SAM-4 absolute') axs[1][4].imshow(sam8_show, cmap='gray') axs[1][4].set_title('SAM-8 relative') axs[0][4].imshow(sam8_show, vmin=0., vmax=1., cmap='gray') axs[0][4].set_title('SAM-8 absolute') axs[1][5].imshow(sam14_show, cmap='gray') axs[1][5].set_title('SAM-14 relative') axs[0][5].imshow(sam14_show, vmin=0., vmax=1., cmap='gray') axs[0][5].set_title('SAM-14 absolute') plt.show() if vis_prefix is not None: plt.savefig(f'vis/{vis_prefix}/{train_or_val}/{img_name}.png', bbox_inches='tight') if is_server: if epoch is not None: wandb.log({f'{train_or_val}/{img_name}': fig}, step=epoch) else: wandb.log({f'{train_or_val}/{img_name}': fig})
model = VGG16(weights='imagenet') activation_layer = 'block5_conv3' img_path = '../images/cat_dog.jpg' img = load_image(path=img_path, target_size=(img_width, img_height)) preds = model.predict(img) predicted_class = preds.argmax(axis=1)[0] # decode the results into a list of tuples (class, description, probability) # (one such list for each sample in the batch) print("predicted top1 class:", predicted_class) print('Predicted:', decode_predictions(preds, top=1)[0]) # Predicted: [(u'n02504013', u'Indian_elephant', 0.82658225), (u'n01871265', u'tusker', 0.1122357), (u'n02504458', u'African_elephant', 0.061040461)] # create Grad-CAM generator gradcam_generator = GradCAM(model, activation_layer, predicted_class) grad_cam, grad_val = gradcam_generator.generate(img) # create Convolution Visualizer vis_conv = VisConvolution(model, VGG16, activation_layer) gradient = vis_conv.generate(img) img = cv2.imread(img_path) img = cv2.resize(img, (img_width, img_height)) grad_cam = grad_cam / grad_cam.max() grad_cam = grad_cam * 255 grad_cam = cv2.resize(grad_cam, (img_width, img_height)) grad_cam = np.uint8(grad_cam) cv_cam = cv2.applyColorMap(grad_cam, cv2.COLORMAP_JET)
if FLAGS.show_model_summary: visual_model.summary() else: visual_model = model_factory.get_model(FLAGS) FLAGS.batch_size = 1 test_generator = get_generator(FLAGS.test_csv,FLAGS) images_names = test_generator.get_images_names() for batch_i in tqdm(range(test_generator.steps)): batch, _ = test_generator.__getitem__(batch_i) image_path = os.path.join(FLAGS.image_directory, images_names[batch_i]) original = cv2.imread(image_path) preds = visual_model.predict(batch) predicted_class = np.argmax(preds[0]) label = f"Birad-{predicted_class + 1}" cam = GradCAM(visual_model, predicted_class) heatmap = cam.compute_heatmap(batch) heatmap = cv2.resize(heatmap, (original.shape[1], original.shape[0])) (heatmap, output) = cam.overlay_heatmap(heatmap, original, alpha=0.5) cv2.rectangle(output, (0, 0), (340, 40), (0, 0, 0), -1) cv2.putText(output, label, (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2) cv2.imwrite(os.path.join(write_path,images_names[batch_i]),output)
def main(args): # Load the synset words file_name = 'synset_words.txt' classes = list() with open(file_name) as class_file: for line in class_file: classes.append(line.strip().split(' ', 1)[1].split(', ', 1)[0].replace( ' ', '_')) print('Loading a model...') model = torchvision.models.resnet152(pretrained=True) transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) print('\nGrad-CAM') gcam = GradCAM(model=model, target_layer='layer4.2', n_class=1000, cuda=args.cuda) gcam.load_image(args.image, transform) gcam.forward() for i in range(0, 5): gcam.backward(idx=gcam.idx[i]) cls_name = classes[gcam.idx[i]] output = gcam.generate() print('\t{:.5f}\t{}'.format(gcam.prob[i], cls_name)) gcam.save('results/{}_gcam.png'.format(cls_name), output) print('\nBackpropagation') bp = BackPropagation(model=model, target_layer='conv1', n_class=1000, cuda=args.cuda) bp.load_image(args.image, transform) bp.forward() for i in range(0, 5): bp.backward(idx=bp.idx[i]) cls_name = classes[bp.idx[i]] output = bp.generate() print('\t{:.5f}\t{}'.format(bp.prob[i], cls_name)) bp.save('results/{}_bp.png'.format(cls_name), output) print('\nGuided Backpropagation') gbp = GuidedBackPropagation(model=model, target_layer='conv1', n_class=1000, cuda=args.cuda) gbp.load_image(args.image, transform) gbp.forward() for i in range(0, 5): cls_idx = gcam.idx[i] cls_name = classes[cls_idx] gcam.backward(idx=cls_idx) output_gcam = gcam.generate() gbp.backward(idx=cls_idx) output_gbp = gbp.generate() output_gcam -= output_gcam.min() output_gcam /= output_gcam.max() output_gcam = cv2.resize(output_gcam, (224, 224)) output_gcam = cv2.cvtColor(output_gcam, cv2.COLOR_GRAY2BGR) output = output_gbp * output_gcam print('\t{:.5f}\t{}'.format(gbp.prob[i], cls_name)) gbp.save('results/{}_gbp.png'.format(cls_name), output_gbp) gbp.save('results/{}_ggcam.png'.format(cls_name), output)
def main(config): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") train_transform = transforms.Compose([ transforms.Scale(256), transforms.RandomCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor() ]) val_transform = transforms.Compose( [transforms.Resize((224, 224)), transforms.ToTensor()]) test_transform = transforms.Compose([transforms.ToTensor()]) trainset = AVADataset(csv_file=config.train_csv_file, root_dir=config.train_img_path, transform=train_transform) valset = AVADataset(csv_file=config.val_csv_file, root_dir=config.val_img_path, transform=val_transform) train_loader = torch.utils.data.DataLoader( trainset, batch_size=config.train_batch_size, shuffle=True, num_workers=config.num_workers) val_loader = torch.utils.data.DataLoader(valset, batch_size=config.val_batch_size, shuffle=False, num_workers=config.num_workers) # base_model = models.vgg16(pretrained=True) # base_model = models.resnet18(pretrained=True) base_model = models.resnet101(pretrained=True, progress=False) # base_model = models.inception_v3(pretrained=True) model = NIMA(base_model) # model = NIMA() if config.warm_start: model.load_state_dict( torch.load( os.path.join(config.ckpt_path, 'epoch-%d.pkl' % config.warm_start_epoch))) print('Successfully loaded model epoch-%d.pkl' % config.warm_start_epoch) if config.multi_gpu: model.features = torch.nn.DataParallel(model.features, device_ids=config.gpu_ids) model = model.to(device) else: model = model.to(device) conv_base_lr = config.conv_base_lr dense_lr = config.dense_lr optimizer = optim.SGD([{ 'params': model.features.parameters(), 'lr': conv_base_lr }, { 'params': model.classifier.parameters(), 'lr': dense_lr }], momentum=0.9) # optimizer = optim.Adam( model.parameters(), lr = conv_base_lr, betas=(0.9,0.999)) # Loss functions # criterion = torch.nn.L1Loss() criterion = torch.nn.CrossEntropyLoss() # send hyperparams lrs.send({ 'title': 'EMD Loss', 'train_batch_size': config.train_batch_size, 'val_batch_size': config.val_batch_size, 'optimizer': 'SGD', 'conv_base_lr': config.conv_base_lr, 'dense_lr': config.dense_lr, 'momentum': 0.9 }) param_num = 0 for param in model.parameters(): param_num += int(np.prod(param.shape)) print('Trainable params: %.2f million' % (param_num / 1e6)) if config.test: # start.record() print('Testing') model.load_state_dict( torch.load( os.path.join(config.ckpt_path, 'epoch-%d.pkl' % config.warm_start_epoch))) target_layer = model.features # compute mean score test_transform = test_transform #val_transform testset = AVADataset(csv_file=config.test_csv_file, root_dir=config.test_img_path, transform=val_transform) test_loader = torch.utils.data.DataLoader( testset, batch_size=config.test_batch_size, shuffle=False, num_workers=config.num_workers) ypreds = [] ylabels = [] im_ids = [] # std_preds = [] count = 0 gradcam = GradCAM(model, target_layer) for data in test_loader: im_id = data['img_id'] im_name = os.path.split(im_id[0]) myname = os.path.splitext(im_name[1]) image = data['image'].to(device) mask, _ = gradcam(image) heatmap, result = visualize_cam(mask, image) im = transforms.ToPILImage()(result) im.save(myname[0] + ".jpg") labels = data['annotations'].to(device).long() output = model(image) output = output.view(-1, 2) bpred = output.to(torch.device("cpu")) cpred = bpred.data.numpy() blabel = labels.to(torch.device("cpu")) clabel = blabel.data.numpy() # predicted_mean, predicted_std = 0.0, 0.0 # for i, elem in enumerate(output, 1): # predicted_mean += i * elem # for j, elem in enumerate(output, 1): # predicted_std += elem * (i - predicted_mean) ** 2 ypreds.append(cpred) ylabels.append(clabel) im_name = os.path.split(im_id[0]) im_ids.append(im_name[1]) count = count + 1 np.savez('Test_results_16.npz', Label=ylabels, Predict=ypreds) df = pd.DataFrame(data={'Label': ylabels, "Predict": ypreds}) print(df.dtypes) df.to_pickle("./Test_results_19_resnet.pkl")
def main(): # Initialize the model for this run model_ft, input_size = initialize_model(model_name, num_classes, feature_extract, use_pretrained=True) model_ft.to(device) # Temporary header # directory - normal, bacteria, TB, COVID-19, virus dir_test = '/home/ubuntu/segmentation/output/COVID-19/' label = 3 # set 3 for COVID-19 for virus class # Data loader test_masked_images = sorted(glob.glob(dir_test + '*.npz')) #test_masks = sorted(glob.glob(dir_test + '*.mask.npy')) for masked_img in test_masked_images: test_masked_img = np.load(masked_img) #test_mask = np.load(mask) test_masked_img = Image.fromarray(test_masked_img).resize((1024, 1024)) #test_mask = Image.fromarray(test_mask).resize((1024,1024)) #test_img = np.asarray(test_img) #test_mask = np.round(np.asarray(test_mask)) #test_masked = np.multiply(test_img, test_mask) test_normalized = test_masked_img h_whole = test_normalized.shape[0] # original w w_whole = test_normalized.shape[1] # original h background = np.zeros((h_whole, w_whole)) background_indicer = np.zeros((h_whole, w_whole)) sum_prob_wt = 0.0 for i in range(header.repeat): non_zero_list = np.nonzero(test_normalized) random_index = random.randint(0, len(non_zero_list[0]) - 1) non_zero_row = non_zero_list[0][ random_index] # random non-zero row index non_zero_col = non_zero_list[1][ random_index] # random non-zero col index X_patch = test_normalized[ int(max(0, non_zero_row - (header.img_size / 2)) ):int(min(h_whole, non_zero_row + (header.img_size / 2))), int(max(0, non_zero_col - (header.img_size / 2)) ):int(min(w_whole, non_zero_col + (header.img_size / 2)))] X_patch_img = data_transforms( augmentation(Image.fromarray(X_patch), rand_p=0.0, mode='test')) X_patch_img_ = np.squeeze(np.asarray(X_patch_img)) X_patch_1 = np.expand_dims(X_patch_img_, axis=0) X_patch_2 = np.expand_dims(X_patch_img_, axis=0) X_patch_3 = np.expand_dims(X_patch_img_, axis=0) X_ = np.concatenate((X_patch_1, X_patch_2, X_patch_3), axis=0) X_ = np.expand_dims(X_, axis=0) X = torch.from_numpy(X_) X = X.to(device) checkpoint = torch.load( os.path.join(header.save_dir, str(header.inference_epoch) + '.pth')) model_ft.load_state_dict(checkpoint['model_state_dict']) model_ft.eval() outputs = model_ft(X) outputs_prob = F.softmax(outputs) prob = outputs_prob[0][label] prob_wt = prob.detach().cpu().numpy() gradcam = GradCAM.from_config(model_type='resnet', arch=model_ft, layer_name='layer4') mask, logit = gradcam(X, class_idx=label) mask_np = np.squeeze(mask.detach().cpu().numpy()) indicer = np.ones((224, 224)) mask_np = np.asarray( cv2.resize( mask_np, dsize=( int(min(w_whole, non_zero_col + (header.img_size / 2))) - int(max(0, non_zero_col - (header.img_size / 2))), int(min(h_whole, non_zero_row + (header.img_size / 2))) - int(max(0, non_zero_row - (header.img_size / 2)))))) indicer = np.asarray( cv2.resize( indicer, dsize=( int(min(w_whole, non_zero_col + (header.img_size / 2))) - int(max(0, non_zero_col - (header.img_size / 2))), int(min(h_whole, non_zero_row + (header.img_size / 2))) - int(max(0, non_zero_row - (header.img_size / 2)))))) mask_add = np.zeros((1024, 1024)) mask_add[ int(max(0, non_zero_row - (header.img_size / 2)) ):int(min(h_whole, non_zero_row + (header.img_size / 2))), int(max(0, non_zero_col - (header.img_size / 2)) ):int(min(w_whole, non_zero_col + (header.img_size / 2)))] = mask_np mask_add = mask_add * prob_wt indicer_add = np.zeros((1024, 1024)) indicer_add[ int(max(0, non_zero_row - (header.img_size / 2)) ):int(min(h_whole, non_zero_row + (header.img_size / 2))), int(max(0, non_zero_col - (header.img_size / 2)) ):int(min(w_whole, non_zero_col + (header.img_size / 2)))] = indicer indicer_add = indicer_add background = background + mask_add background_indicer = background_indicer + indicer_add # number in this indicer means how many time the area included. sum_prob_wt = sum_prob_wt + prob_wt final_mask = np.divide(background, background_indicer + 1e-7) final_mask = np.expand_dims(np.expand_dims(final_mask, axis=0), axis=0) torch_final_mask = torch.from_numpy(final_mask) test_img = np.asarray(Image.fromarray(test_img).resize((1024, 1024))) test_img = (test_img - test_img.min()) / test_img.max() test_img = np.expand_dims(test_img, axis=0) test_img = np.concatenate((test_img, test_img, test_img), axis=0) torch_final_img = torch.from_numpy(np.expand_dims(test_img, axis=0)) final_cam, cam_result = visualize_cam(torch_final_mask, torch_final_img) final_cam = (final_cam - final_cam.min()) / final_cam.max() final_cam_np = np.swapaxes(np.swapaxes(np.asarray(final_cam), 0, 2), 0, 1) test_img_np = np.swapaxes(np.swapaxes(test_img, 0, 2), 0, 1) final_combined = test_img_np + final_cam_np final_combined = (final_combined - final_combined.min()) / final_combined.max() plt.imshow(final_combined) plt.savefig( test_masked_img.split('.image.npy')[0] + '.patch.heatmap_' + '.png')
return vocab_scores_tensor images = [] ct = 0 random.seed(0) for batch in valid_data: # or anything else you want to do if ct == 6: break target, distractors, idx = batch target = target[0].unsqueeze(0) distractors = [distractors[0][0].unsqueeze(0)] sm = simpleModel(model, distractors, word_counts, 's_t') sm.eval() gradcam = GradCAM(sm, sm.model.cnn.conv_net[8]) mask, _ = gradcam(target) heatmap_s, result = visualize_cam(mask, target) model.zero_grad() if use_distractors_in_sender: sm = simpleModel(model, target, word_counts, 's_d') sm.eval() gradcam = GradCAM(sm, sm.model.cnn.conv_net[6]) mask, _ = gradcam(distractors[0]) heatmap_s_d, result = visualize_cam(mask, distractors[0]) model.zero_grad() sm = simpleModel(model, distractors, word_counts, 'r_t') sm.train() gradcam = GradCAM(sm, sm.model.cnn.conv_net[6])
def dl_system(): # texto da página st.title('IA para Detecção de doenças pulmonares') st.write("\n ") st.write( "O sistema de Inteligência Artificial desenvolvido tem a finalidade de identificar doenças pulmonares, auxiliando de maneira acurada o trabalho do médico." ) st.write( "Além de identificar se a imagem de um determinado raio-x há algum indício de uma doença, o sistema mostra onde ele olhou para tomar a decisão final sobre o raio-x, isso permite ao médico ter uma interpretabilidade." "de onde a IA está olhando, e possívelmente identificar possíveis ruídos que não foi identificado pelo médico." ) st.write('\n') st.write('\n') # upload da imagem uploaded_file = st.file_uploader("Escolha uma imagem...", type="png") temp_file = NamedTemporaryFile(delete=False) st.write("\n") st.write("\n") st.write("\n") # carregamento da image if uploaded_file is not None: temp_file.write(uploaded_file.getvalue()) image = Image.open(temp_file) #os.mkdir(f"{image}") #image = np.asarray(Image.open(temp_file)) st.image(image, caption='Raio-x', width=300, height=250) # Botão de predição if st.button('Predição'): y_pred = prediction(image) if y_pred.any() == 0: st.success("Predição: Covid") elif y_pred.any() == 1: st.success("Predição: Normal") elif y_pred.any() == 2: st.success("Predição: Pneumonia") else: pass # parâmetros GradCAM architecture = model last_conv = model.get_layer("conv5_block3_out") last_layers = ["avg_pool", "predictions"] image_size = (224, 224, 3) #img_path = image st.write("\n") st.write("\n") st.write("\n") st.title("Intepretabilidade IA") st.write("\n") st.write("\n") st.write("\n") # GradCAM if st.button("GradCAM"): img_path = uploaded_file.name grad = GradCAM(architecture, last_conv, last_layers, img_path, image_size) image_grad = grad.gradcam_generate() st.image(image_grad, caption='Diagnóstico', width=500, height=450)
def process_image(ids, model, learn): for ind, ID in enumerate(tqdm(ids)): ids_processed = [x.split('.')[0] for x in os.listdir(ROOT+'test_p2/')] if ID in ids_processed: continue print(f'Processing {ID}.') img, orig_shape = read_image(ID) #img_array = np.array(img) #np.transpose(, (2,0,1)) #Use cellpose for masks. masks (list of 2D arrays, or single 3D array (if do_3D=True)) – labelled image, where 0=no masks; 1,2,…=mask labels. #mask, flows, styles, diams = model.eval(img, diameter=200, channels=channels, do_3D=False, progress=None) #flow_threshold=None, #if mask.max() <= 4: # mask, flows, styles, diams = model.eval(img, diameter=100, channels=channels, do_3D=False, progress=None) #io.save_masks(img, mask, flows, ROOT+f'test_p3/{ID}.png') mask = np.load(ROOT+'test_masks/'+ID+'.npy') mask_bin = np.where(mask > 0, 1, 0) img = np.uint8(img*mask_bin[:, :, None]) #Get bounding boxes. #bboxes = get_contour_bbox_from_raw(mask) #if (len(bboxes) == 0): # return_dict[ID] = (img.shape, default_rle(img)) # continue #Cut Out, Pad to Square, and Resize. The first 'cell' in cell_tiles is the whole image and should be ignored. #img = read_image(ID, greencolor='green') #cell_tiles = [ # #cv2.resize( # pad_to_square(img[bbox[1]:bbox[3], bbox[0]:bbox[2], ...]) # # ,TILE_SIZE, interpolation=cv2.INTER_CUBIC) # for bbox in bboxes] #Calculate RLEs for all cells ordered by their ID in mask. orig_mask = scale(mask, orig_shape[0], orig_shape[1]) rles = [encode_binary_mask(orig_mask, mask_id) for mask_id in range(mask.max()+1)] #Get image predictions. #('nucleoplasm', tensor(16), tensor([2.0571e-02, 2.7850e-03, 3.8773e-02, 1.0485e-01, 2.2821e-02, 6.9570e-02,...])) #for i in range(3): # img[i,:,:] -= imagenet_stats[0][i] # img[i,:,:] /= imagenet_stats[1][ _preds = learn.predict(img) #[learn.predict(tile) for tile in cell_tiles] torch_img = transforms.Compose([transforms.ToTensor()])(img)[None] # .cuda() transforms.Resize((460, 460)), normed_torch_img = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])(torch_img)[None] #, 0.456 , 0.224 #Get explanation. For each class find its explainable cells. prediction_str = "" all_cells = {} #Gradcam. class_idxs = np.where(_preds[2]>CONF_THRESH)[0] #class_idxs = np.argpartition(_preds[2], -4)[-4:].numpy() target_layer = learn.model[0] gradcam = GradCAM(learn.model, target_layer) #Lime. #classifier_fn = lambda x: [learn.predict(i)[2].numpy() for i in x] #explainer = lime_image.LimeImageExplainer() #explanation = explainer.explain_instance(img, classifier_fn, top_labels=4, num_samples=190) #hide_color=0, #class_idxs = explanation.top_labels for class_idx in class_idxs: mask_cam = gradcam(normed_torch_img[0], class_idx=class_idx) #[0] Gradcam mask for one predicted class. visualize_cam1(mask_cam[0], torch_img, ID, class_idx) mask_cam = mask_cam[0].numpy() #img_cpy, mask_cam = explanation.get_image_and_mask(class_idx, positive_only=True) #Find cells with high explanation. Multiply by Cellpose mask to find relevant cells. Calculate histogram and select only large overlapping regions. mask_cam_bin = np.where(mask_cam>0, 1, 0) explained_cells = np.histogram(mask_cam_bin * mask, bins=range(mask.max()+2)) _thresh = 0.5 * mask_bin.sum()/mask.max() #0.5 * img.shape[0]**2/mask.max() #Approximated cell area in pixels. cell_ids = np.where(explained_cells[0] > _thresh) #For each explaining cell build its prediction string. for i in np.delete(cell_ids, 0): #range(1, len(cell_tiles)): #classes = np.where(_preds[i][2]>CONF_THRESH)[0] #for j in classes: mask_bin = np.where(mask==i, 1, 0) iou = (mask_cam_bin * mask_bin).sum() #avg_cam = iou/mask_bin.sum() avg_cam = np.mean(mask_cam * mask_bin) cell_pred = avg_cam*_preds[2][class_idx].item() clsid = LEARN_LBL_NAMES[class_idx] if (i not in all_cells) or all_cells[i]*10 < cell_pred: all_cells[i] = cell_pred prediction_str+=f'{int(clsid)} {cell_pred} {rles[i]} ' #LEARN_INT_2_KAGGLE_INT[ #For unexplained cells use a random class. for i in set(range(mask.max()+1)) - set(all_cells.keys()) - {0}: class_idx = np.random.choice(class_idxs, 1)[0] clsid = LEARN_LBL_NAMES[class_idx] prediction_str+=f'{int(clsid)} {avg_cam*_preds[2][class_idx].item()} {rles[i]} ' #Save Predictions to Be Added to Dataframe At The End. #ImageAID,ImageAWidth,ImageAHeight,class_0 1 rle_encoded_cell_1_mask class_14 1 rle_encoded_cell_1_mask 0 1 rle encoded_cell_2_mask return_dict[ID] = (orig_shape, prediction_str) with open(ROOT+'test_p2/'+ID+'.txt', 'w') as f: f.write(f'{ID},{orig_shape[0]},{orig_shape[1]},{prediction_str}') f.flush() os.fsync(f.fileno())
state_dict = torch.load('./model/2real/extractor_8.pth') #加载预先训练好net-a的.pth文件 new_state_dict = OrderedDict() #不是必要的【from collections import OrderedDict】 new_state_dict = {k:v for k,v in state_dict.items() if k in resnet101_dict} #删除net-b不需要的键 resnet101_dict.update(new_state_dict) #更新参数 resnet.load_state_dict(resnet101_dict) #加载参数 resnet.eval(), resnet.cuda(); ### cam_dict = dict() resnet_model_dict = dict(type='resnet', arch=resnet, layer_name='layer4', input_size=(224, 224)) resnet_gradcam = GradCAM(resnet_model_dict, True) resnet_gradcampp = GradCAMpp(resnet_model_dict, True) cam_dict['resnet'] = [resnet_gradcam, resnet_gradcampp] images = [] for gradcam, gradcam_pp in cam_dict.values(): mask, _ = gradcam(normed_torch_img) heatmap, result = visualize_cam(mask.cpu(), torch_img.cpu()) mask_pp, _ = gradcam_pp(normed_torch_img) heatmap_pp, result_pp = visualize_cam(mask_pp.cpu(), torch_img.cpu()) images.append(torch.stack([torch_img.squeeze().cpu(), heatmap, heatmap_pp, result, result_pp], 0)) # images = make_grid(torch.cat(images, 0), nrow=5)
def main(args): """ Main Function for testing and saving attention maps. Inputs: args - Namespace object from the argument parser """ torch.manual_seed(args.seed) # Load dataset if args.dataset == 'mnist': test_dataset = OneClassMnist.OneMNIST('./data', args.one_class, train=False, transform=transforms.ToTensor()) elif args.dataset == 'ucsd_ped1': test_dataset = Ped1_loader.UCSDAnomalyDataset('./data', train=False, resize=args.image_size) elif args.dataset == 'mvtec_ad': class_name = mvtec.CLASS_NAMES[args.one_class] test_dataset = mvtec.MVTecDataset(class_name=class_name, is_train=False, grayscale=False, root_path=args.data_path) test_steps = len(test_dataset) kwargs = { 'num_workers': args.num_workers, 'pin_memory': True } if device == "cuda" else {} test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size, shuffle=True, **kwargs) # Select a model architecture if args.model == 'vanilla_mnist': imshape = [1, 28, 28] model = ConvVAE_mnist(args.latent_size).to(device) elif args.model == 'vanilla_ped1': imshape = [1, args.image_size, args.image_size] model = ConvVAE_ped1(args.latent_size, args.image_size, args.batch_norm).to(device) elif args.model == 'resnet18_3': imshape = [3, 256, 256] model = ResNet18VAE_3(args.latent_size, x_dim=imshape[-1], nc=imshape[0]).to(device) print("Layer is:", args.target_layer) # Load model checkpoint = torch.load(args.model_path) model.load_state_dict(checkpoint['state_dict']) mu_avg, logvar_avg = (0, 1) gcam = GradCAM(model, target_layer=args.target_layer, device=device) prediction_stack = np.zeros((test_steps, imshape[-1], imshape[-1]), dtype=np.float32) gt_mask_stack = np.zeros((test_steps, imshape[-1], imshape[-1]), dtype=np.uint8) # Generate attention maps for batch_idx, (x, y) in enumerate(test_loader): # print("batch_idx", batch_idx) model.eval() x = x.to(device) x_rec, mu, logvar = gcam.forward(x) model.zero_grad() gcam.backward(mu, logvar, mu_avg, logvar_avg) gcam_map = gcam.generate() gcam_max = torch.max(gcam_map).item() # If image has one channel, make it three channel(need for heatmap) if x.size(1) == 1: x = x.repeat(1, 3, 1, 1) # Visualize and save attention maps for i in range(x.size(0)): x_arr = x[i].permute(1, 2, 0).cpu().numpy() * 255 x_im = Image.fromarray(x_arr.astype(np.uint8)) # Get the gradcam for this image prediction = gcam_map[i].squeeze().cpu().data.numpy() # Add prediction and mask to the stacks prediction_stack[batch_idx * args.batch_size + i] = prediction gt_mask_stack[batch_idx * args.batch_size + i] = y[i] if save_gcam_image: im_path = args.result_dir if not os.path.exists(im_path): os.mkdir(im_path) x_im.save( os.path.join(im_path, "{}-{}-origin.png".format(batch_idx, i))) file_path = os.path.join( im_path, "{}-{}-attmap.png".format(batch_idx, i)) save_gradcam(x_arr, file_path, prediction, gcam_max=gcam_max) # Stop of dataset is mnist because there aren't GTs available if args.dataset != 'mnist': # Compute area under the ROC score auc = roc_auc_score(gt_mask_stack.flatten(), prediction_stack.flatten()) print(f"AUROC score: {auc}") fpr, tpr, thresholds = roc_curve(gt_mask_stack.flatten(), prediction_stack.flatten()) if plot_ROC: plt.plot(tpr, fpr, label="ROC") plt.xlabel("FPR") plt.ylabel("TPR") plt.legend() plt.savefig( str(args.result_dir) + "auroc_" + str(args.model) + str(args.target_layer) + str(args.one_class) + ".png") # Compute IoU if args.iou == True: print(f"IoU score: {j_score}") max_val = np.max(prediction_stack) max_steps = 100 best_thres = 0 best_iou = 0 # Ge the IoU for 100 different thresholds for i in range(1, max_steps): thresh = i / max_steps * max_val prediction_bin_stack = prediction_stack > thresh iou = jaccard_score(gt_mask_stack.flatten(), prediction_bin_stack.flatten()) if iou > best_iou: best_iou = iou best_thres = thresh print("Best threshold;", best_thres) print("Best IoU score:", best_iou) return
image = preprocess_input(image) # use the network to make predictions on the input image and find # the class label index with the largest corresponding probability preds = model.predict(image) i = np.argmax(preds[0]) class_names = ['Opencountry', 'coast', 'forest', 'highway', ',inside_city', 'mountain', 'street', 'tallbuilding'] label = class_names[i] prob = np.max(preds[0]) label = "{}: {:.2f}%".format(label, prob * 100) print("[INFO] {}".format(label)) # initialize our gradient class activation map and build the heatmap cam = GradCAM(model, i) heatmap = cam.compute_heatmap(image, normalize_grads=args["norm"]) # load the original image from disk (in OpenCV format) orig = cv2.imread(args["image"]) # resize the resulting heatmap to the original input image dimensions # and then overlay heatmap on top of the image heatmap = cv2.resize(heatmap, (orig.shape[1], orig.shape[0])) (heatmap, output) = cam.overlay_heatmap(heatmap, orig, alpha=0.5, colormap=color_maps[args["color"]]) # draw the predicted label on the output image cv2.rectangle(output, (0, 0), (340, 40), (0, 0, 0), -1) cv2.putText(output, label, (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)
def eval(self, gradcam=False, rise=False, test_on_val=False): """The function for the meta-eval phase.""" # Load the logs if os.path.exists(osp.join(self.args.save_path, 'trlog')): trlog = torch.load(osp.join(self.args.save_path, 'trlog')) else: trlog = None torch.manual_seed(1) np.random.seed(1) # Load meta-test set test_set = Dataset('val' if test_on_val else 'test', self.args) sampler = CategoriesSampler(test_set.label, 600, self.args.way, self.args.shot + self.args.val_query) loader = DataLoader(test_set, batch_sampler=sampler, num_workers=8, pin_memory=True) # Set test accuracy recorder test_acc_record = np.zeros((600, )) # Load model for meta-test phase if self.args.eval_weights is not None: weights = self.addOrRemoveModule( self.model, torch.load(self.args.eval_weights)['params']) self.model.load_state_dict(weights) else: self.model.load_state_dict( torch.load(osp.join(self.args.save_path, 'max_acc' + '.pth'))['params']) # Set model to eval mode self.model.eval() # Set accuracy averager ave_acc = Averager() # Generate labels label = torch.arange(self.args.way).repeat(self.args.val_query) if torch.cuda.is_available(): label = label.type(torch.cuda.LongTensor) else: label = label.type(torch.LongTensor) label_shot = torch.arange(self.args.way).repeat(self.args.shot) if torch.cuda.is_available(): label_shot = label_shot.type(torch.cuda.LongTensor) else: label_shot = label_shot.type(torch.LongTensor) if gradcam: self.model.layer3 = self.model.encoder.layer3 model_dict = dict(type="resnet", arch=self.model, layer_name='layer3') grad_cam = GradCAM(model_dict, True) grad_cam_pp = GradCAMpp(model_dict, True) self.model.features = self.model.encoder guided = GuidedBackprop(self.model) if rise: self.model.layer3 = self.model.encoder.layer3 score_mod = ScoreCam(self.model) # Start meta-test for i, batch in enumerate(loader, 1): if torch.cuda.is_available(): data, _ = [_.cuda() for _ in batch] else: data = batch[0] k = self.args.way * self.args.shot data_shot, data_query = data[:k], data[k:] if i % 5 == 0: suff = "_val" if test_on_val else "" if self.args.rep_vec or self.args.cross_att: print('batch {}: {:.2f}({:.2f})'.format( i, ave_acc.item() * 100, acc * 100)) if self.args.cross_att: label_one_hot = self.one_hot(label).to(label.device) _, _, logits, simMapQuer, simMapShot, normQuer, normShot = self.model( (data_shot, label_shot, data_query), ytest=label_one_hot, retSimMap=True) else: logits, simMapQuer, simMapShot, normQuer, normShot, fast_weights = self.model( (data_shot, label_shot, data_query), retSimMap=True) torch.save( simMapQuer, "../results/{}/{}_simMapQuer{}{}.th".format( self.args.exp_id, self.args.model_id, i, suff)) torch.save( simMapShot, "../results/{}/{}_simMapShot{}{}.th".format( self.args.exp_id, self.args.model_id, i, suff)) torch.save( data_query, "../results/{}/{}_dataQuer{}{}.th".format( self.args.exp_id, self.args.model_id, i, suff)) torch.save( data_shot, "../results/{}/{}_dataShot{}{}.th".format( self.args.exp_id, self.args.model_id, i, suff)) torch.save( normQuer, "../results/{}/{}_normQuer{}{}.th".format( self.args.exp_id, self.args.model_id, i, suff)) torch.save( normShot, "../results/{}/{}_normShot{}{}.th".format( self.args.exp_id, self.args.model_id, i, suff)) else: logits, normQuer, normShot, fast_weights = self.model( (data_shot, label_shot, data_query), retFastW=True, retNorm=True) torch.save( normQuer, "../results/{}/{}_normQuer{}{}.th".format( self.args.exp_id, self.args.model_id, i, suff)) torch.save( normShot, "../results/{}/{}_normShot{}{}.th".format( self.args.exp_id, self.args.model_id, i, suff)) if gradcam: print("Saving gradmaps", i) allMasks, allMasks_pp, allMaps = [], [], [] for l in range(len(data_query)): allMasks.append( grad_cam(data_query[l:l + 1], fast_weights, None)) allMasks_pp.append( grad_cam_pp(data_query[l:l + 1], fast_weights, None)) allMaps.append( guided.generate_gradients(data_query[l:l + 1], fast_weights)) allMasks = torch.cat(allMasks, dim=0) allMasks_pp = torch.cat(allMasks_pp, dim=0) allMaps = torch.cat(allMaps, dim=0) torch.save( allMasks, "../results/{}/{}_gradcamQuer{}{}.th".format( self.args.exp_id, self.args.model_id, i, suff)) torch.save( allMasks_pp, "../results/{}/{}_gradcamppQuer{}{}.th".format( self.args.exp_id, self.args.model_id, i, suff)) torch.save( allMaps, "../results/{}/{}_guidedQuer{}{}.th".format( self.args.exp_id, self.args.model_id, i, suff)) if rise: print("Saving risemaps", i) allScore = [] for l in range(len(data_query)): allScore.append( score_mod(data_query[l:l + 1], fast_weights)) else: if self.args.cross_att: label_one_hot = self.one_hot(label).to(label.device) _, _, logits = self.model( (data_shot, label_shot, data_query), ytest=label_one_hot) else: logits = self.model((data_shot, label_shot, data_query)) acc = count_acc(logits, label) ave_acc.add(acc) test_acc_record[i - 1] = acc # Calculate the confidence interval, update the logs m, pm = compute_confidence_interval(test_acc_record) if trlog is not None: print('Val Best Epoch {}, Acc {:.4f}, Test Acc {:.4f}'.format( trlog['max_acc_epoch'], trlog['max_acc'], ave_acc.item())) print('Test Acc {:.4f} + {:.4f}'.format(m, pm)) return m
image = imagenet_utils.preprocess_input(image) # use the network to make predictions on the input imag and find # the class label index with the largest corresponding probability preds = model.predict(image) i = np.argmax(preds[0]) # decode the ImageNet predictions to obtain the human-readable label decoded = imagenet_utils.decode_predictions(preds) (imagenetID, label, prob) = decoded[0][0] label = "{}: {:.2f}%".format(label, prob * 100) print("[INFO] {}".format(label)) # initialize our gradient class activation map and build the heatmap if args['layer'] == 'None': cam = GradCAM(model, i) else: cam = GradCAM(model, i, args['layer']) heatmap = cam.compute_heatmap(image) # resize the resulting heatmap to the original input image dimensions # and then overlay heatmap on top of the image heatmap = cv2.resize(heatmap, (orig.shape[1], orig.shape[0])) (heatmap, output) = cam.overlay_heatmap(heatmap, orig, alpha=0.5) # draw the predicted label on the output image cv2.rectangle(output, (0, 0), (340, 40), (0, 0, 0), -1) cv2.putText(output, label, (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)
def main(): parser = argparse.ArgumentParser( description='Explainable VAE MNIST Example') parser.add_argument('--result_dir', type=str, default='test_results', metavar='DIR', help='output directory') parser.add_argument('--batch_size', type=int, default=128, metavar='N', help='input batch size for training (default: 128)') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') # model options parser.add_argument('--latent_size', type=int, default=32, metavar='N', help='latent vector size of encoder') parser.add_argument('--model_path', type=str, default='./ckpt/model_best.pth', metavar='DIR', help='pretrained model directory') parser.add_argument('--one_class', type=int, default=8, metavar='N', help='outlier digit for one-class VAE testing') args = parser.parse_args() torch.manual_seed(args.seed) kwargs = {'num_workers': 1, 'pin_memory': True} if cuda else {} one_class = args.one_class # Choose the current outlier digit to be 8 one_mnist_test_dataset = OneClassMnist.OneMNIST( './data', one_class, train=False, transform=transforms.ToTensor()) test_loader = torch.utils.data.DataLoader(one_mnist_test_dataset, batch_size=args.batch_size, shuffle=False, **kwargs) model = ConvVAE(args.latent_size).to(device) checkpoint = torch.load(args.model_path) model.load_state_dict(checkpoint['state_dict']) mu_avg, logvar_avg = 0, 1 gcam = GradCAM(model, target_layer='encoder.2', cuda=True) test_index = 0 for batch_idx, (x, _) in enumerate(test_loader): model.eval() x = x.to(device) x_rec, mu, logvar = gcam.forward(x) model.zero_grad() gcam.backward(mu, logvar, mu_avg, logvar_avg) gcam_map = gcam.generate() ## Visualize and save attention maps ## x = x.repeat(1, 3, 1, 1) for i in range(x.size(0)): raw_image = x[i] * 255.0 ndarr = raw_image.permute(1, 2, 0).cpu().byte().numpy() im = Image.fromarray(ndarr.astype(np.uint8)) im_path = args.result_dir if not os.path.exists(im_path): os.mkdir(im_path) im.save( os.path.join( im_path, "{}-{}-origin.png".format(test_index, str(one_class)))) file_path = os.path.join( im_path, "{}-{}-attmap.png".format(test_index, str(one_class))) r_im = np.asarray(im) save_cam(r_im, file_path, gcam_map[i].squeeze().cpu().data.numpy()) test_index += 1