def vis_attention(img, q, ans, att_map): ''' Function to visualize the attention maps: img: 3 X 448 X 448 q: 23 ans: 1 returns: att_map over image, questions in english, answers in english ''' q_dict = np.load('q_dict.npy').item() a_dict = np.load('a_dict.npy').item() unorm = utils.UnNormalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) sent = sent_from_que(q, q_dict) anss = (a_dict[ans]) #Resize att map to full res rsz_att_map = cv2.resize( 5 * att_map.data.cpu().numpy(), (img.size(2), img.size(2))) #5 * att values to make maps more salient #Convert to 0-255 range final_att = np.uint8(255 * rsz_att_map) img_np1 = unorm(img.data).cpu().numpy() #COnvert Image to PIL format img_cv = np.transpose(img_np1, (1, 2, 0)) img_cv = cv2.convertScaleAbs(img_cv.reshape(448, 448, 3) * 255) att_over_img = save_class_activation_on_image(img_cv, final_att) return att_over_img, sent, anss
def __init__(self, VQA_model, targetted=False): # save a globle vqa model self.VQA_model = VQA_model # define an attacker net self.attack_model = AttackNet() # putting it into train mode self.attack_model.train() # transfer to gpus self.attack_model.cuda() # get all the learnable parameters from it self.optimizer = optim.Adam( [p for p in self.attack_model.parameters() if p.requires_grad]) # Define softmax self.log_softmax = nn.LogSoftmax().cuda() self.scaller_const = Variable(torch.Tensor([10000]).float()).cuda() # Define unnormalizer self.unorm = utils.UnNormalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) # is it targetted self.targetted_const = 1 if targetted == True: self.targetted_const = -1
def __init__(self, VQA_model, targetted=False): # save a globle vqa model self.VQA_model = VQA_model self.vocab = VQA_model.get_vocab() self.ans_vocab_inv = {b: a for a, b in self.vocab['answer'].items()} self.tanh = nn.Tanh().cuda() self.targetted = targetted self.confidence = 20 self.scalar_const = Variable(torch.Tensor([1000]).float()).cuda() self.unorm = utils.UnNormalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
log_path = os.path.join(saving_root, 'log_' + model_setting + '/') utils.mkdir(log_path) tb_logger = utils.Logger(log_path) ## if (chnum_in_ == 1): norm_mean = [0.5] norm_std = [0.5] elif (chnum_in_ == 3): norm_mean = (0.5, 0.5, 0.5) norm_std = (0.5, 0.5, 0.5) frame_trans = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(norm_mean, norm_std)]) unorm_trans = utils.UnNormalize(mean=norm_mean, std=norm_std) ###### data video_dataset = data.VideoDataset(tr_data_idx_dir, tr_data_frame_dir, transform=frame_trans) tr_data_loader = DataLoader(video_dataset, batch_size=batch_size_in, shuffle=True, num_workers=opt.NumWorker) ###### model if (opt.ModelName == 'MemAE'): model = AutoEncoderCov3DMem(chnum_in_, mem_dim_in, shrink_thres=sparse_shrink_thres)
if args.target_dataset == "Avenue": data_dir = os.path.join(args.dataset_path, "Avenue/frames/testing/") elif "UCSD" in args.target_dataset: data_dir = os.path.join(args.dataset_path, "%s/Test_jpg/" % args.target_dataset) else: print("The dataset is not available..........") pass frame_trans = transforms.Compose([ transforms.Resize([height, width]), transforms.Grayscale(num_output_channels=1), transforms.ToTensor(), transforms.Normalize([0.5], [0.5]), ]) unorm_trans = utils.UnNormalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)) print("------Data folder", data_dir) print("------Model folder", model_dir) print("------Restored ckpt", ckpt_dir) data_loader = data_utils.DataLoader(data_dir, frame_trans, time_step=num_frame-1, num_pred=1) video_data_loader = DataLoader(data_loader, batch_size=batch_size, shuffle=False) chnum_in_ = 1 mem_dim_in = 2000 sparse_shrink_thres = 0.0025 model = AutoEncoderCov3DMem(chnum_in_, mem_dim_in, shrink_thres=sparse_shrink_thres) model_para = torch.load(ckpt_dir) model.load_state_dict(model_para)
def __init__(self, model, optimizer, all_loaders, args, resume_epoch): self.resume_epoch = resume_epoch self.args = args self.optimizer = torch.optim.SGD((model.parameters()), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) self.layer_list_all = args.layers self.layers_dict = { 'layer2': { 'name': 'layer2', 'depth': 512, 'size': 4 }, 'layer3': { 'name': 'layer3', 'depth': 512, 'size': 8 }, 'layer4': { 'name': 'layer4', 'depth': 512, 'size': 8 }, 'layer5': { 'name': 'layer5', 'depth': 256, 'size': 16 }, 'layer6': { 'name': 'layer6', 'depth': 256, 'size': 16 }, } self.generator = gantest.GanTester(args.path_model_gan, self.layer_list_all, device=torch.device('cuda')) self.z = self.generator.standard_z_sample(200000) self.model = model self.optimizer = optimizer self.loaders = all_loaders self.loss_type = args.loss_type # Other parameters self.margin = args.margin self.clustering = args.clustering self.epoch = 0 self.unorm = utils.UnNormalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) output_size = 32 if 'large' in args.audio_model else 256 if args.active_learning: active_learning.get_clusterer(self, args, output_size, model) else: if args.clustering: print('Creating cluster from scratch') cluster_path = os.path.join( self.args.results, 'clusters', args.name_checkpoint + '_' + str(time.time())) self.clusterer = Clusterer( self.loaders['train'], model, path_store=cluster_path, model_dim=args.embedding_dim, save_results=True, output_size=output_size, args=self.args, path_cluster_load=args.path_cluster_load) self.epochs_clustering = self.args.epochs_clustering self.clusters = self.mean_clust = self.std_clust = self.cluster_counts = self.clusters_unit = None
import numpy as np import os import copy import cv2 import torch from torch.autograd import Variable from torchvision import models import utils unorm = utils.UnNormalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) def softmax(x): """Compute softmax values for each sets of scores in x.""" e_x = np.exp(x - np.max(x)) return e_x / e_x.sum(axis=0) # only difference def save_class_activation_on_image(img_cv, activation_map, path_to_file=None): """ Saves and returns cam activation map on the original image Args: img_cv (PIL img): Original image activation_map (numpy arr): activation map (grayscale) 0-255 path_to_file (str): path to store the visualization map to """ # Heatmap of activation map activation_heatmap = cv2.applyColorMap(activation_map, cv2.COLORMAP_HSV)