示例#1
0
def vis_attention(img, q, ans, att_map):
    '''
    Function to visualize the attention maps:
    img: 3 X 448 X 448 
    q: 23
    ans: 1

    returns: att_map over image, questions in english, answers in english
    '''

    q_dict = np.load('q_dict.npy').item()
    a_dict = np.load('a_dict.npy').item()
    unorm = utils.UnNormalize(mean=(0.485, 0.456, 0.406),
                              std=(0.229, 0.224, 0.225))

    sent = sent_from_que(q, q_dict)
    anss = (a_dict[ans])

    #Resize att map to full res
    rsz_att_map = cv2.resize(
        5 * att_map.data.cpu().numpy(),
        (img.size(2), img.size(2)))  #5 * att values to make maps more salient
    #Convert to 0-255 range
    final_att = np.uint8(255 * rsz_att_map)

    img_np1 = unorm(img.data).cpu().numpy()
    #COnvert Image to PIL format
    img_cv = np.transpose(img_np1, (1, 2, 0))
    img_cv = cv2.convertScaleAbs(img_cv.reshape(448, 448, 3) * 255)

    att_over_img = save_class_activation_on_image(img_cv, final_att)

    return att_over_img, sent, anss
示例#2
0
    def __init__(self, VQA_model, targetted=False):

        # save a globle vqa model
        self.VQA_model = VQA_model
        # define an attacker net
        self.attack_model = AttackNet()
        # putting it into train mode
        self.attack_model.train()
        # transfer to gpus
        self.attack_model.cuda()
        # get all the learnable parameters from it
        self.optimizer = optim.Adam(
            [p for p in self.attack_model.parameters() if p.requires_grad])
        # Define softmax
        self.log_softmax = nn.LogSoftmax().cuda()
        self.scaller_const = Variable(torch.Tensor([10000]).float()).cuda()

        # Define unnormalizer
        self.unorm = utils.UnNormalize(mean=(0.485, 0.456, 0.406),
                                       std=(0.229, 0.224, 0.225))

        # is it targetted
        self.targetted_const = 1
        if targetted == True:
            self.targetted_const = -1
示例#3
0
 def __init__(self, VQA_model, targetted=False):
     # save a globle vqa model
     self.VQA_model = VQA_model
     self.vocab = VQA_model.get_vocab()
     self.ans_vocab_inv = {b: a for a, b in self.vocab['answer'].items()}
     self.tanh = nn.Tanh().cuda()
     self.targetted = targetted
     self.confidence = 20
     self.scalar_const = Variable(torch.Tensor([1000]).float()).cuda()
     self.unorm = utils.UnNormalize(mean=(0.485, 0.456, 0.406),
                                    std=(0.229, 0.224, 0.225))
示例#4
0
    log_path = os.path.join(saving_root, 'log_' + model_setting + '/')
    utils.mkdir(log_path)
    tb_logger = utils.Logger(log_path)

##
if (chnum_in_ == 1):
    norm_mean = [0.5]
    norm_std = [0.5]
elif (chnum_in_ == 3):
    norm_mean = (0.5, 0.5, 0.5)
    norm_std = (0.5, 0.5, 0.5)

frame_trans = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize(norm_mean, norm_std)])
unorm_trans = utils.UnNormalize(mean=norm_mean, std=norm_std)

###### data
video_dataset = data.VideoDataset(tr_data_idx_dir,
                                  tr_data_frame_dir,
                                  transform=frame_trans)
tr_data_loader = DataLoader(video_dataset,
                            batch_size=batch_size_in,
                            shuffle=True,
                            num_workers=opt.NumWorker)

###### model
if (opt.ModelName == 'MemAE'):
    model = AutoEncoderCov3DMem(chnum_in_,
                                mem_dim_in,
                                shrink_thres=sparse_shrink_thres)
示例#5
0
    
if args.target_dataset == "Avenue":
    data_dir = os.path.join(args.dataset_path, "Avenue/frames/testing/")
elif "UCSD" in args.target_dataset:
    data_dir = os.path.join(args.dataset_path, "%s/Test_jpg/" % args.target_dataset)
else:
    print("The dataset is not available..........")
    pass
    
frame_trans = transforms.Compose([
        transforms.Resize([height, width]),
        transforms.Grayscale(num_output_channels=1),
        transforms.ToTensor(),
        transforms.Normalize([0.5], [0.5]),
    ])
unorm_trans = utils.UnNormalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))

print("------Data folder", data_dir)
print("------Model folder", model_dir)
print("------Restored ckpt", ckpt_dir)

data_loader = data_utils.DataLoader(data_dir, frame_trans, time_step=num_frame-1, num_pred=1)
video_data_loader = DataLoader(data_loader, batch_size=batch_size, shuffle=False)

chnum_in_ = 1
mem_dim_in = 2000
sparse_shrink_thres = 0.0025

model = AutoEncoderCov3DMem(chnum_in_, mem_dim_in, shrink_thres=sparse_shrink_thres)
model_para = torch.load(ckpt_dir)
model.load_state_dict(model_para)
示例#6
0
    def __init__(self, model, optimizer, all_loaders, args, resume_epoch):

        self.resume_epoch = resume_epoch
        self.args = args

        self.optimizer = torch.optim.SGD((model.parameters()),
                                         args.lr,
                                         momentum=args.momentum,
                                         weight_decay=args.weight_decay)

        self.layer_list_all = args.layers
        self.layers_dict = {
            'layer2': {
                'name': 'layer2',
                'depth': 512,
                'size': 4
            },
            'layer3': {
                'name': 'layer3',
                'depth': 512,
                'size': 8
            },
            'layer4': {
                'name': 'layer4',
                'depth': 512,
                'size': 8
            },
            'layer5': {
                'name': 'layer5',
                'depth': 256,
                'size': 16
            },
            'layer6': {
                'name': 'layer6',
                'depth': 256,
                'size': 16
            },
        }

        self.generator = gantest.GanTester(args.path_model_gan,
                                           self.layer_list_all,
                                           device=torch.device('cuda'))
        self.z = self.generator.standard_z_sample(200000)

        self.model = model
        self.optimizer = optimizer
        self.loaders = all_loaders
        self.loss_type = args.loss_type

        # Other parameters
        self.margin = args.margin
        self.clustering = args.clustering

        self.epoch = 0
        self.unorm = utils.UnNormalize(mean=(0.485, 0.456, 0.406),
                                       std=(0.229, 0.224, 0.225))

        output_size = 32 if 'large' in args.audio_model else 256

        if args.active_learning:
            active_learning.get_clusterer(self, args, output_size, model)
        else:
            if args.clustering:
                print('Creating cluster from scratch')
                cluster_path = os.path.join(
                    self.args.results, 'clusters',
                    args.name_checkpoint + '_' + str(time.time()))
                self.clusterer = Clusterer(
                    self.loaders['train'],
                    model,
                    path_store=cluster_path,
                    model_dim=args.embedding_dim,
                    save_results=True,
                    output_size=output_size,
                    args=self.args,
                    path_cluster_load=args.path_cluster_load)

        self.epochs_clustering = self.args.epochs_clustering
        self.clusters = self.mean_clust = self.std_clust = self.cluster_counts = self.clusters_unit = None
示例#7
0
import numpy as np
import os
import copy
import cv2

import torch
from torch.autograd import Variable
from torchvision import models
import utils

unorm = utils.UnNormalize(mean=(0.485, 0.456, 0.406),
                          std=(0.229, 0.224, 0.225))


def softmax(x):
    """Compute softmax values for each sets of scores in x."""
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0)  # only difference


def save_class_activation_on_image(img_cv, activation_map, path_to_file=None):
    """
        Saves and returns cam  activation map on the original image
    Args:
        img_cv (PIL img): Original image
        activation_map (numpy arr): activation map (grayscale) 0-255
        path_to_file (str): path to store the visualization map to
    """

    # Heatmap of activation map
    activation_heatmap = cv2.applyColorMap(activation_map, cv2.COLORMAP_HSV)