예제 #1
0
def train_lfl_epoch(old_model, model, criterion, optimizer, dataloader):
    old_model.eval()
    model.train()
    total_loss = 0
    total_metrics = 0
    for idx, data_items in enumerate(tqdm(dataloader)):
        optimizer.zero_grad()
        data_items = send_to_device(data_items, device)

        b, c, h, w = data_items["neg"].size()
        data_items["neg"] = data_items["neg"].view(
            b*args.neg_samples, int(c/args.neg_samples), h, w)

        anchor, pos, neg = model(
            data_items["anchor"], data_items["pos"], data_items["neg"])
        with torch.no_grad():
            old_anchor = old_model.get_embedding(data_items["anchor"])
        loss, metric = criterion(old_anchor=old_anchor, anchor=anchor,
                                 pos=pos, neg=neg, targets=data_items["anchor_target"])

        total_loss += loss.item()
        loss.backward()
        total_metrics += metric

        torch.nn.utils.clip_grad_norm_(model.parameters(), 10)
        optimizer.step()

    total_loss /= len(dataloader)
    if args.task_method == "regression":
        metric = total_metrics/len(dataloader)
    else:
        metric = total_metrics/len(dataloader.dataset)
    return total_loss, metric
def get_flat_full_features(loader, args):

    avg_fn = loader.dataset.get_avg_embedding
    E = loader.dataset.dictionary.emb_size
    questions = torch.FloatTensor(loader.dataset.N, args.D, E)
    answers = torch.FloatTensor(loader.dataset.N, args.D, E)
    for i, batch in enumerate(loader):

        sys.stdout.write('\r{}/{} --> {:3.1f}%'.format(str(i+1), str(len(loader)), (i+1)/float(len(loader))*100))
        sys.stdout.flush()

        batch = utils.send_to_device(batch, args.gpu)
        bsz = batch['questions_ids'].size(0)
        questions[i*loader.batch_size:i*loader.batch_size+bsz] = avg_fn(batch['questions_ids'], batch['questions_length']).cpu()
        answers[i*loader.batch_size:i*loader.batch_size+bsz] = avg_fn(batch['answers_ids'], batch['answers_length']).cpu()
        
    sys.stdout.write("\n")
 
    return [ answers.view(-1, E), questions.view(-1, E)]
예제 #3
0
    def load_dictionary(self, shared_dictionary=None):

        if shared_dictionary:
            self.dictionary = shared_dictionary
        else:
            print('dictionary and word vectors loading...')
            if os.path.exists(self.saved_dictionary_path):
                self.dictionary = torch.load(self.saved_dictionary_path)
            else:
                self.build_dictionary()
            self.dictionary.load_word_vectors(self.fast_text_model,
                                              self.saved_wordvectors_path)
            self.dictionary.word_vectors = send_to_device(
                self.dictionary.word_vectors, self.device)

            print('dictionary loaded: words: {:d}'.format(len(
                self.dictionary)))
            print('word vectors loaded: words: {:d}; {:d}-dim'.format(
                len(self.dictionary.word_vectors), self.dictionary.emb_size))
def get_flat_human_features(loader, args):
    
    avg_fn = loader.dataset.get_avg_embedding
    E = loader.dataset.dictionary.emb_size
    questions, answers = [], []
    for i, batch in enumerate(loader):

        sys.stdout.write('\r{}/{} --> {:3.1f}%'.format(str(i+1), str(len(loader)), (i+1)/float(len(loader))*100))
        sys.stdout.flush()

        mask = get_mask(batch['in_human_set'])

        if isinstance(mask, torch.Tensor):
            bsz = mask.sum()
            batch = utils.send_to_device(batch, args.gpu)
            human_scores = batch['answer_options_scores'][mask].view(bsz,-1,100)
            cluster_mask = (human_scores > 0)
            cluster_mask.scatter_(2, batch['gtidxs'][mask].view(bsz,-1, 1), 1)
            cluster_sizes = cluster_mask.sum(dim=2).view(bsz)

            emb_question = avg_fn(batch['questions_ids'][mask].view(bsz,-1,args.S), batch['questions_length'][mask].view(bsz,-1)).cpu()
            emb_answer_set = avg_fn(batch['answer_options_ids'][mask].view(-1,100,args.S), batch['answer_options_length'][mask].view(-1,100))
            emb_answer_set = emb_answer_set.view(bsz,-1,100,E)
            emb_cluster_set = emb_answer_set[cluster_mask].cpu()

            batch_idx, counter = 0, 1
            acc_cluster_sizes = torch.cumsum(cluster_sizes, dim=0)
            for emb_answer in emb_cluster_set:
                questions.append(emb_question[batch_idx])
                answers.append(emb_answer)
                if counter == acc_cluster_sizes[batch_idx]:
                    batch_idx += 1
                counter += 1
             
    sys.stdout.write("\n")
    questions = torch.stack(questions)
    answers = torch.stack(answers)
 
    return [ answers.view(-1, E), questions.view(-1, E)]
예제 #5
0
def candidate_answers_recall(test_loader, lambdas, proj_mtxs, train_projections, proj_train_mus, dictionary, opt, log, set, train_loader=None):

    log.info('computing ranks with on-the-fly candidates = ' + str(bool(opt.on_the_fly)))
   
    torch.autograd.set_grad_enabled(False)

    # set up meters and buffers
    meters, ranks_to_save = {}, []
    if opt.on_the_fly:
        # create buffers for projected candidates
        topk_idx_buffer = torch.zeros(test_loader.batch_size, opt.exchangesperimage, opt.on_the_fly).long()
        topk_idx_buffer = utils.send_to_device(topk_idx_buffer, opt.gpu)

        proj_opts_buffer = torch.zeros(test_loader.batch_size, opt.exchangesperimage, opt.on_the_fly, opt.k)
        proj_opts_buffer = utils.send_to_device(proj_opts_buffer, opt.gpu)

        # mean centred train question projections
        proj_q_train = cca_utils.mean_center(train_projections[1], proj_train_mus[1])
        proj_q_train = utils.send_to_device(proj_q_train, opt.gpu)
        train_projections[0] = utils.send_to_device(train_projections[0], opt.gpu)
    else: # only compute ranks when not on_the_fly since gtidxs are meaningless
        kvals = [1, 5, 10]
        meters['mrank'] = utils.AverageMeter()
        for k in kvals:
            meters['recall@' + str(k)] = utils.AverageMeter()
        meters['rrank'] = utils.AverageMeter()
    
    if opt.threshold:
        meters['ge_thresh'] = utils.AverageMeter()
        meters['ge_thresh_std'] = utils.AverageMeter()
        meters['ge_thresh_size'] = utils.AverageMeter()
    
    proj_mtxs = utils.send_to_device(proj_mtxs, opt.gpu)
    proj_train_mus = utils.send_to_device(proj_train_mus, opt.gpu)
    lambdas = utils.send_to_device(lambdas, opt.gpu)
    
    b1 = proj_mtxs[0]
    b2 = proj_mtxs[1]
        
    N = len(test_loader.dataset)

    for i, batch in enumerate(test_loader):

        # current batch size
        bsz = batch['img'].size(0)
        batch = utils.send_to_device(batch, opt.gpu)

        # averaged word vectors for question
        emb_question = utils.get_avg_embedding(batch['questions_ids'], batch['questions_length'], dictionary) # bsz x nexchanges x 300

        # project question to joint space using b2
        proj_q = cca_utils.get_projection(emb_question.view(-1, opt.k), b2, lambdas, opt.p)
        proj_q = cca_utils.mean_center(proj_q, proj_train_mus[1]) # center by projected train question mean
        proj_q = proj_q.view(bsz, opt.exchangesperimage, 1, opt.k)
        
        # compute candidate answer set
        if opt.on_the_fly: # >0
            topk_train_question_idxs = topk_idx_buffer[0:bsz].view(-1, opt.on_the_fly).fill_(0)
            proj_opts = proj_opts_buffer[0:bsz].view(-1, opt.on_the_fly, opt.k).fill_(0)
            for q_i, q in enumerate(proj_q.view(-1, opt.k)): # flatten bsz and opt.exchangesperimage
                # get top-k questions from train set
                topk_train_question_idxs[q_i] = cca_utils.topk_corr_distance(proj_q_train, q.unsqueeze(0), k=opt.on_the_fly)[1] # k indices
                # get their corresponding answers projections
                proj_opts[q_i] = train_projections[0].index_select(0, topk_train_question_idxs[q_i])
            topk_train_question_idxs = topk_train_question_idxs.view(bsz, opt.exchangesperimage, opt.on_the_fly)
            proj_opts = proj_opts.view(bsz, opt.exchangesperimage, opt.on_the_fly, opt.k) 
        else:
            emb_opts = utils.get_avg_embedding(batch['answer_options_ids'].view(-1, 100, opt.seqlen), batch['answer_options_length'].view(-1, 100), dictionary)
            emb_opts = emb_opts.view(bsz, opt.exchangesperimage, 100, emb_opts.size(-1)) # bsz x nexchanges x 100 x opt.k
            
            # project answer candidates to joint space using b1
            proj_opts = cca_utils.get_projection(emb_opts.view(-1, opt.k), b1, lambdas, opt.p)
            proj_opts = cca_utils.mean_center(proj_opts, proj_train_mus[0]) # center by projected train answer mean
            proj_opts = proj_opts.view(bsz, opt.exchangesperimage, 100, opt.k) 

        # compute (sorted) correlation between 100 candidates & 1 test question
        denom = torch.norm(proj_opts, p=2, dim=3) * torch.norm(proj_q.expand_as(proj_opts), p=2, dim=3)
        corrs = torch.matmul(proj_opts, proj_q.transpose(2,3)).squeeze(-1).div_(denom) # bsz x nexchanges x 100/opt.on_the_fly
        sorted_corrs, indices = torch.sort(corrs, dim=2, descending=True) # indices: bsz x nexchanges x 100/opt.on_the_fly

        # compute ranks 
        ranks = torch.zeros(sorted_corrs.size()).type_as(sorted_corrs)
        ranks.scatter_(2, indices, torch.arange(1, opt.on_the_fly+1 if opt.on_the_fly else 101).type_as(sorted_corrs).view(1,1, opt.on_the_fly if opt.on_the_fly else 100).expand_as(sorted_corrs))
        if opt.save_ranks and not opt.on_the_fly:
            ranks_to_save = utils.process_ranks_to_save(ranks_to_save, batch['img_name'], ranks, batch['gtidxs'], set)
        
        if not set == 'test':
            gt_ranks = ranks.gather(2, batch['gtidxs'].unsqueeze(-1))
            meters = utils.process_ranks_for_meters(meters, gt_ranks, sorted_corrs if opt.threshold else None, opt.on_the_fly) 
            utils.log_iteration_stats(log, meters, i+1, len(test_loader)) 
        
        # interactive mode
        if opt.interactive:
            randint = random.randint(0, bsz-1)
            print ('Image: {}'.format(batch['img_name'][randint]))
            for ex in range(opt.exchangesperimage):
                worded_q = test_loader.dataset.all_questions[batch['questions'][randint][ex]]
                worded_gt_a = test_loader.dataset.all_answers[batch['answers'][randint][ex]]
                print ('Question #{:d}/{:d}: {}'.format(ex+1, opt.exchangesperimage, worded_q))
                print ('Ground-truth answer #{:d}/{:d}: {}'.format(ex+1, opt.exchangesperimage, worded_gt_a))
                print ('Ranked (by correlation) candidate answers (on-the-fly=' + str(opt.on_the_fly) + '):')
                if opt.on_the_fly:
                    ranked_idxs = topk_train_question_idxs[randint][ex][ indices[randint][ex] ]
                    idxs = zip(ranked_idxs // opt.exchangesperimage, ranked_idxs % opt.exchangesperimage)
                    candidates = [train_loader.dataset[idx]['answers'][exchange_idx] for idx, exchange_idx in idxs]
                    worded_candidates = [train_loader.dataset.all_answers[c] for c in candidates]
                else:
                    candidates = batch['answer_options'][randint][ex][ indices[randint][ex] ]
                    worded_candidates = [ test_loader.dataset.all_answers[c] for c in candidates ]
                print (worded_candidates)
                input()

    # set up logging mechanisms
    resultsdir = os.path.join(opt.resultsdir, 'experiment_id' + str(opt.id))
    save_path = os.path.join(resultsdir, 'exp' + str(opt.id) + '_' + set + '_' + opt.input_vars + '_' + opt.condition_vars + '_k_' + str(opt.k) + '_p_' + str(opt.p))
    utils.save_meters(meters, save_path)
    if opt.save_ranks:
        with open(save_path + '_ranks.json', 'w') as outfile:
            json.dump(ranks_to_save, outfile)
            log.info('Ranks saved to ' + save_path + '_ranks.json')
 
    torch.autograd.set_grad_enabled(True)

    return meters
예제 #6
0
def nearest_neighbour_baselines(test_loader, dictionary, opt, log, set, train_views, test_views):

    kvals, meters = [1, 5, 10], {}
    meters['mrank'] = utils.AverageMeter()
    for k in kvals:
        meters['recall@' + str(k)] = utils.AverageMeter()
    meters['rrank'] = utils.AverageMeter()
    
    log.info('computing nearest-neighbour baseline with ' + str(opt.condition_vars))

    k=100
    kp=20

    emb_train_answers = utils.send_to_device(train_views[0], opt.gpu)
    emb_train_questions = utils.send_to_device(train_views[-1], opt.gpu)
    if len(train_views) == 3:
        emb_train_images = utils.send_to_device(train_views[1], opt.gpu)
        emb_test_images = utils.send_to_device(test_views[1], opt.gpu)
        img_feat_size = emb_train_images.size(1)
    else:
        emb_train_images = None
    
    N = emb_train_answers.size(0)

    for i, batch in enumerate(test_loader):

        # current batch size
        bsz = batch['img'].size(0)
        batch = utils.send_to_device(batch, opt.gpu)
        
        # averaged word vectors for question
        emb_question = utils.get_avg_embedding(batch['questions_ids'], batch['questions_length'], dictionary) # bsz x nexchanges x 300

        # averaged word vectors for answer candidates
        emb_opts = utils.get_avg_embedding(batch['answer_options_ids'].view(-1, 100, opt.seqlen), batch['answer_options_length'].view(-1, 100), dictionary)
      
        dists = torch.norm( emb_train_questions.unsqueeze(1).unsqueeze(1).expand(N, bsz, opt.exchangesperimage, opt.emsize) 
                - emb_question.unsqueeze(0).expand(N, bsz, opt.exchangesperimage, opt.emsize), dim=3, p=2) # N x bsz x opt.exchangesperimage
        topk_train_question_idxs = torch.topk(dists, k=k, dim=0, largest=False)[1] # k x bsz x opt.exchangesperimage
        topk_train_emb_answers = emb_train_answers.index_select(0, topk_train_question_idxs.view(-1)).view(k, bsz, opt.exchangesperimage, opt.emsize)
        mean_train_answer = topk_train_emb_answers.mean(dim=0)

        if len(train_views) == 3: # further filter ids with image features
            test_emb_images = emb_test_images[i*opt.exchangesperimage*opt.batch_size : i*opt.exchangesperimage*opt.batch_size+bsz*opt.exchangesperimage]
            test_emb_images = test_emb_images.view(bsz, opt.exchangesperimage, img_feat_size)
            dists = torch.norm( emb_train_images.index_select(0, topk_train_question_idxs.view(-1)).view(k, bsz, opt.exchangesperimage, img_feat_size) - test_emb_images.unsqueeze(0).expand(k, bsz, opt.exchangesperimage,
                img_feat_size), p=2, dim=3)
            topkp_train_question_idxs = torch.topk(dists, k=kp, dim=0, largest=False)[1] # kp x bsz x opt.exchangesperimage

            topkp_train_question_idxs = topkp_train_question_idxs.unsqueeze(-1).expand(kp, bsz, opt.exchangesperimage, opt.emsize)
            mean_train_answer = topk_train_emb_answers.gather(0, topkp_train_question_idxs).view(kp, bsz, opt.exchangesperimage, opt.emsize).mean(dim=0)

        dists = torch.norm( emb_opts - mean_train_answer.unsqueeze(2).expand(bsz, opt.exchangesperimage, 100, opt.emsize), p=2, dim=3) # bsz x opt.exchangesperimage x 100
        sorted_dists, indices_dists = torch.sort(dists, dim=2, descending=False)
        
        # compute ranks 
        ranks = torch.zeros(sorted_dists.size()).type_as(sorted_dists)
        ranks.scatter_(2, indices_dists, torch.arange(1,101).type_as(sorted_dists).view(1,1,100).expand_as(sorted_dists)) # bsz x nexchanges
            
        gt_ranks = ranks.gather(2, batch['gtidxs'].unsqueeze(-1))
        meters = utils.process_ranks_for_meters(meters, gt_ranks, sorted_corrs if opt.threshold else None, opt.on_the_fly) 
        utils.log_iteration_stats(log, meters, i+1, len(test_loader)) 

    return meters
예제 #7
0
def get_features(dictionary, opt, log, set):
 
    loader = get_dataloader(set, opt, dictionary, log)
    
    featuredir = os.path.join(opt.datasetdir, str(opt.datasetversion), 'features')
    img_feature_file = os.path.join(featuredir, set + '_' + opt.imagemodel + '_img_feats.pt')
    cap_feature_file = os.path.join(featuredir, set + '_' + os.path.basename(opt.wordmodel) + '_cap_feats.pt')
    quest_feature_file = os.path.join(featuredir, set + '_' + os.path.basename(opt.wordmodel) + '_quest_feats.pt')
    ans_feature_file = os.path.join(featuredir, set + '_' + os.path.basename(opt.wordmodel) + '_ans_feats.pt')
    
    #load from saved
    if os.path.exists(img_feature_file) and os.path.exists(cap_feature_file) and os.path.exists(quest_feature_file) and os.path.exists(ans_feature_file):         
        
        V1 = torch.load(img_feature_file)
        V2 = torch.load(cap_feature_file)
        V3 = torch.load(quest_feature_file)
        V4 = torch.load(ans_feature_file)

    else: #get features on the fly

        log.info('getting pre-trained features for ' + set + ' images, questions and answers...')

        # build image feature network
        img_model = torchvision.models.__dict__[opt.imagemodel](pretrained=True) # use pre-trained weights
        if 'resnet' in opt.imagemodel:
            img_feat_net = nn.Sequential(*list(img_model.children())[:-1])
        else:
            img_feat_net = nn.ModuleList([img_model.features, nn.Sequential(*list(img_model.classifier.children())[:-1])])
        img_feat_net.eval()
        for p in img_feat_net.parameters():
            p.requires_grad = False
        if opt.gpu>=0:
            img_feat_net.to('cuda:' + str(opt.gpu))
        
        V1 = torch.zeros(len(loader.dataset), 512 if 'resnet' in opt.imagemodel else 4096) # resnet feature dim
        V2 = torch.zeros(len(loader.dataset), opt.emsize) # avg fasttext dim
        V3 = torch.zeros(len(loader.dataset), opt.exchangesperimage, opt.emsize) # avg fasttext dim
        V4 = torch.zeros(len(loader.dataset), opt.exchangesperimage, opt.emsize) # avg fasttext dim
         
        for i, batch in enumerate(loader):

            sys.stdout.write('\r{}/{} --> {:3.1f}%'.format(str(i+1), str(len(loader)), (i+1)/float(len(loader))*100))
            sys.stdout.flush()

            bsz = batch['img'].size(0)
            batch = utils.send_to_device(batch, opt.gpu)
            
	    # bsz x 512 image features
            img_feat = img_feat_net(batch['img']) if 'resnet' in opt.imagemodel else img_feat_net[1](img_feat_net[0](batch['img']).view(bsz, -1))
            V1[i*loader.batch_size:i*loader.batch_size+bsz] = img_feat.detach().squeeze().cpu()
	
            # bsz x opt.emsize average caption embeddings
            V2[i*loader.batch_size:i*loader.batch_size+bsz] = utils.get_avg_embedding(batch['caption_ids'].unsqueeze(1), batch['caption_length'].unsqueeze(1), dictionary).squeeze(1).cpu()

            # bsz x opt.emsize average question embeddings
            V3[i*loader.batch_size:i*loader.batch_size+bsz] = utils.get_avg_embedding(batch['questions_ids'], batch['questions_length'], dictionary).cpu()
             
            # bsz x opt.emsize average answer embeddings
            V4[i*loader.batch_size:i*loader.batch_size+bsz] = utils.get_avg_embedding(batch['answers_ids'], batch['answers_length'], dictionary).cpu()

        sys.stdout.write("\n")
        os.makedirs(featuredir, exist_ok=True)
        
        img_feat_net.to('cpu')
        torch.save(V1, img_feature_file)
        torch.save(V2, cap_feature_file)
        torch.save(V3, quest_feature_file)
        torch.save(V4, ans_feature_file)
        log.info('-' * 100)

    return loader, {'img': V1, 'caption': V2, 'question': V3, 'answer': V4}