def train_lfl_epoch(old_model, model, criterion, optimizer, dataloader): old_model.eval() model.train() total_loss = 0 total_metrics = 0 for idx, data_items in enumerate(tqdm(dataloader)): optimizer.zero_grad() data_items = send_to_device(data_items, device) b, c, h, w = data_items["neg"].size() data_items["neg"] = data_items["neg"].view( b*args.neg_samples, int(c/args.neg_samples), h, w) anchor, pos, neg = model( data_items["anchor"], data_items["pos"], data_items["neg"]) with torch.no_grad(): old_anchor = old_model.get_embedding(data_items["anchor"]) loss, metric = criterion(old_anchor=old_anchor, anchor=anchor, pos=pos, neg=neg, targets=data_items["anchor_target"]) total_loss += loss.item() loss.backward() total_metrics += metric torch.nn.utils.clip_grad_norm_(model.parameters(), 10) optimizer.step() total_loss /= len(dataloader) if args.task_method == "regression": metric = total_metrics/len(dataloader) else: metric = total_metrics/len(dataloader.dataset) return total_loss, metric
def get_flat_full_features(loader, args): avg_fn = loader.dataset.get_avg_embedding E = loader.dataset.dictionary.emb_size questions = torch.FloatTensor(loader.dataset.N, args.D, E) answers = torch.FloatTensor(loader.dataset.N, args.D, E) for i, batch in enumerate(loader): sys.stdout.write('\r{}/{} --> {:3.1f}%'.format(str(i+1), str(len(loader)), (i+1)/float(len(loader))*100)) sys.stdout.flush() batch = utils.send_to_device(batch, args.gpu) bsz = batch['questions_ids'].size(0) questions[i*loader.batch_size:i*loader.batch_size+bsz] = avg_fn(batch['questions_ids'], batch['questions_length']).cpu() answers[i*loader.batch_size:i*loader.batch_size+bsz] = avg_fn(batch['answers_ids'], batch['answers_length']).cpu() sys.stdout.write("\n") return [ answers.view(-1, E), questions.view(-1, E)]
def load_dictionary(self, shared_dictionary=None): if shared_dictionary: self.dictionary = shared_dictionary else: print('dictionary and word vectors loading...') if os.path.exists(self.saved_dictionary_path): self.dictionary = torch.load(self.saved_dictionary_path) else: self.build_dictionary() self.dictionary.load_word_vectors(self.fast_text_model, self.saved_wordvectors_path) self.dictionary.word_vectors = send_to_device( self.dictionary.word_vectors, self.device) print('dictionary loaded: words: {:d}'.format(len( self.dictionary))) print('word vectors loaded: words: {:d}; {:d}-dim'.format( len(self.dictionary.word_vectors), self.dictionary.emb_size))
def get_flat_human_features(loader, args): avg_fn = loader.dataset.get_avg_embedding E = loader.dataset.dictionary.emb_size questions, answers = [], [] for i, batch in enumerate(loader): sys.stdout.write('\r{}/{} --> {:3.1f}%'.format(str(i+1), str(len(loader)), (i+1)/float(len(loader))*100)) sys.stdout.flush() mask = get_mask(batch['in_human_set']) if isinstance(mask, torch.Tensor): bsz = mask.sum() batch = utils.send_to_device(batch, args.gpu) human_scores = batch['answer_options_scores'][mask].view(bsz,-1,100) cluster_mask = (human_scores > 0) cluster_mask.scatter_(2, batch['gtidxs'][mask].view(bsz,-1, 1), 1) cluster_sizes = cluster_mask.sum(dim=2).view(bsz) emb_question = avg_fn(batch['questions_ids'][mask].view(bsz,-1,args.S), batch['questions_length'][mask].view(bsz,-1)).cpu() emb_answer_set = avg_fn(batch['answer_options_ids'][mask].view(-1,100,args.S), batch['answer_options_length'][mask].view(-1,100)) emb_answer_set = emb_answer_set.view(bsz,-1,100,E) emb_cluster_set = emb_answer_set[cluster_mask].cpu() batch_idx, counter = 0, 1 acc_cluster_sizes = torch.cumsum(cluster_sizes, dim=0) for emb_answer in emb_cluster_set: questions.append(emb_question[batch_idx]) answers.append(emb_answer) if counter == acc_cluster_sizes[batch_idx]: batch_idx += 1 counter += 1 sys.stdout.write("\n") questions = torch.stack(questions) answers = torch.stack(answers) return [ answers.view(-1, E), questions.view(-1, E)]
def candidate_answers_recall(test_loader, lambdas, proj_mtxs, train_projections, proj_train_mus, dictionary, opt, log, set, train_loader=None): log.info('computing ranks with on-the-fly candidates = ' + str(bool(opt.on_the_fly))) torch.autograd.set_grad_enabled(False) # set up meters and buffers meters, ranks_to_save = {}, [] if opt.on_the_fly: # create buffers for projected candidates topk_idx_buffer = torch.zeros(test_loader.batch_size, opt.exchangesperimage, opt.on_the_fly).long() topk_idx_buffer = utils.send_to_device(topk_idx_buffer, opt.gpu) proj_opts_buffer = torch.zeros(test_loader.batch_size, opt.exchangesperimage, opt.on_the_fly, opt.k) proj_opts_buffer = utils.send_to_device(proj_opts_buffer, opt.gpu) # mean centred train question projections proj_q_train = cca_utils.mean_center(train_projections[1], proj_train_mus[1]) proj_q_train = utils.send_to_device(proj_q_train, opt.gpu) train_projections[0] = utils.send_to_device(train_projections[0], opt.gpu) else: # only compute ranks when not on_the_fly since gtidxs are meaningless kvals = [1, 5, 10] meters['mrank'] = utils.AverageMeter() for k in kvals: meters['recall@' + str(k)] = utils.AverageMeter() meters['rrank'] = utils.AverageMeter() if opt.threshold: meters['ge_thresh'] = utils.AverageMeter() meters['ge_thresh_std'] = utils.AverageMeter() meters['ge_thresh_size'] = utils.AverageMeter() proj_mtxs = utils.send_to_device(proj_mtxs, opt.gpu) proj_train_mus = utils.send_to_device(proj_train_mus, opt.gpu) lambdas = utils.send_to_device(lambdas, opt.gpu) b1 = proj_mtxs[0] b2 = proj_mtxs[1] N = len(test_loader.dataset) for i, batch in enumerate(test_loader): # current batch size bsz = batch['img'].size(0) batch = utils.send_to_device(batch, opt.gpu) # averaged word vectors for question emb_question = utils.get_avg_embedding(batch['questions_ids'], batch['questions_length'], dictionary) # bsz x nexchanges x 300 # project question to joint space using b2 proj_q = cca_utils.get_projection(emb_question.view(-1, opt.k), b2, lambdas, opt.p) proj_q = cca_utils.mean_center(proj_q, proj_train_mus[1]) # center by projected train question mean proj_q = proj_q.view(bsz, opt.exchangesperimage, 1, opt.k) # compute candidate answer set if opt.on_the_fly: # >0 topk_train_question_idxs = topk_idx_buffer[0:bsz].view(-1, opt.on_the_fly).fill_(0) proj_opts = proj_opts_buffer[0:bsz].view(-1, opt.on_the_fly, opt.k).fill_(0) for q_i, q in enumerate(proj_q.view(-1, opt.k)): # flatten bsz and opt.exchangesperimage # get top-k questions from train set topk_train_question_idxs[q_i] = cca_utils.topk_corr_distance(proj_q_train, q.unsqueeze(0), k=opt.on_the_fly)[1] # k indices # get their corresponding answers projections proj_opts[q_i] = train_projections[0].index_select(0, topk_train_question_idxs[q_i]) topk_train_question_idxs = topk_train_question_idxs.view(bsz, opt.exchangesperimage, opt.on_the_fly) proj_opts = proj_opts.view(bsz, opt.exchangesperimage, opt.on_the_fly, opt.k) else: emb_opts = utils.get_avg_embedding(batch['answer_options_ids'].view(-1, 100, opt.seqlen), batch['answer_options_length'].view(-1, 100), dictionary) emb_opts = emb_opts.view(bsz, opt.exchangesperimage, 100, emb_opts.size(-1)) # bsz x nexchanges x 100 x opt.k # project answer candidates to joint space using b1 proj_opts = cca_utils.get_projection(emb_opts.view(-1, opt.k), b1, lambdas, opt.p) proj_opts = cca_utils.mean_center(proj_opts, proj_train_mus[0]) # center by projected train answer mean proj_opts = proj_opts.view(bsz, opt.exchangesperimage, 100, opt.k) # compute (sorted) correlation between 100 candidates & 1 test question denom = torch.norm(proj_opts, p=2, dim=3) * torch.norm(proj_q.expand_as(proj_opts), p=2, dim=3) corrs = torch.matmul(proj_opts, proj_q.transpose(2,3)).squeeze(-1).div_(denom) # bsz x nexchanges x 100/opt.on_the_fly sorted_corrs, indices = torch.sort(corrs, dim=2, descending=True) # indices: bsz x nexchanges x 100/opt.on_the_fly # compute ranks ranks = torch.zeros(sorted_corrs.size()).type_as(sorted_corrs) ranks.scatter_(2, indices, torch.arange(1, opt.on_the_fly+1 if opt.on_the_fly else 101).type_as(sorted_corrs).view(1,1, opt.on_the_fly if opt.on_the_fly else 100).expand_as(sorted_corrs)) if opt.save_ranks and not opt.on_the_fly: ranks_to_save = utils.process_ranks_to_save(ranks_to_save, batch['img_name'], ranks, batch['gtidxs'], set) if not set == 'test': gt_ranks = ranks.gather(2, batch['gtidxs'].unsqueeze(-1)) meters = utils.process_ranks_for_meters(meters, gt_ranks, sorted_corrs if opt.threshold else None, opt.on_the_fly) utils.log_iteration_stats(log, meters, i+1, len(test_loader)) # interactive mode if opt.interactive: randint = random.randint(0, bsz-1) print ('Image: {}'.format(batch['img_name'][randint])) for ex in range(opt.exchangesperimage): worded_q = test_loader.dataset.all_questions[batch['questions'][randint][ex]] worded_gt_a = test_loader.dataset.all_answers[batch['answers'][randint][ex]] print ('Question #{:d}/{:d}: {}'.format(ex+1, opt.exchangesperimage, worded_q)) print ('Ground-truth answer #{:d}/{:d}: {}'.format(ex+1, opt.exchangesperimage, worded_gt_a)) print ('Ranked (by correlation) candidate answers (on-the-fly=' + str(opt.on_the_fly) + '):') if opt.on_the_fly: ranked_idxs = topk_train_question_idxs[randint][ex][ indices[randint][ex] ] idxs = zip(ranked_idxs // opt.exchangesperimage, ranked_idxs % opt.exchangesperimage) candidates = [train_loader.dataset[idx]['answers'][exchange_idx] for idx, exchange_idx in idxs] worded_candidates = [train_loader.dataset.all_answers[c] for c in candidates] else: candidates = batch['answer_options'][randint][ex][ indices[randint][ex] ] worded_candidates = [ test_loader.dataset.all_answers[c] for c in candidates ] print (worded_candidates) input() # set up logging mechanisms resultsdir = os.path.join(opt.resultsdir, 'experiment_id' + str(opt.id)) save_path = os.path.join(resultsdir, 'exp' + str(opt.id) + '_' + set + '_' + opt.input_vars + '_' + opt.condition_vars + '_k_' + str(opt.k) + '_p_' + str(opt.p)) utils.save_meters(meters, save_path) if opt.save_ranks: with open(save_path + '_ranks.json', 'w') as outfile: json.dump(ranks_to_save, outfile) log.info('Ranks saved to ' + save_path + '_ranks.json') torch.autograd.set_grad_enabled(True) return meters
def nearest_neighbour_baselines(test_loader, dictionary, opt, log, set, train_views, test_views): kvals, meters = [1, 5, 10], {} meters['mrank'] = utils.AverageMeter() for k in kvals: meters['recall@' + str(k)] = utils.AverageMeter() meters['rrank'] = utils.AverageMeter() log.info('computing nearest-neighbour baseline with ' + str(opt.condition_vars)) k=100 kp=20 emb_train_answers = utils.send_to_device(train_views[0], opt.gpu) emb_train_questions = utils.send_to_device(train_views[-1], opt.gpu) if len(train_views) == 3: emb_train_images = utils.send_to_device(train_views[1], opt.gpu) emb_test_images = utils.send_to_device(test_views[1], opt.gpu) img_feat_size = emb_train_images.size(1) else: emb_train_images = None N = emb_train_answers.size(0) for i, batch in enumerate(test_loader): # current batch size bsz = batch['img'].size(0) batch = utils.send_to_device(batch, opt.gpu) # averaged word vectors for question emb_question = utils.get_avg_embedding(batch['questions_ids'], batch['questions_length'], dictionary) # bsz x nexchanges x 300 # averaged word vectors for answer candidates emb_opts = utils.get_avg_embedding(batch['answer_options_ids'].view(-1, 100, opt.seqlen), batch['answer_options_length'].view(-1, 100), dictionary) dists = torch.norm( emb_train_questions.unsqueeze(1).unsqueeze(1).expand(N, bsz, opt.exchangesperimage, opt.emsize) - emb_question.unsqueeze(0).expand(N, bsz, opt.exchangesperimage, opt.emsize), dim=3, p=2) # N x bsz x opt.exchangesperimage topk_train_question_idxs = torch.topk(dists, k=k, dim=0, largest=False)[1] # k x bsz x opt.exchangesperimage topk_train_emb_answers = emb_train_answers.index_select(0, topk_train_question_idxs.view(-1)).view(k, bsz, opt.exchangesperimage, opt.emsize) mean_train_answer = topk_train_emb_answers.mean(dim=0) if len(train_views) == 3: # further filter ids with image features test_emb_images = emb_test_images[i*opt.exchangesperimage*opt.batch_size : i*opt.exchangesperimage*opt.batch_size+bsz*opt.exchangesperimage] test_emb_images = test_emb_images.view(bsz, opt.exchangesperimage, img_feat_size) dists = torch.norm( emb_train_images.index_select(0, topk_train_question_idxs.view(-1)).view(k, bsz, opt.exchangesperimage, img_feat_size) - test_emb_images.unsqueeze(0).expand(k, bsz, opt.exchangesperimage, img_feat_size), p=2, dim=3) topkp_train_question_idxs = torch.topk(dists, k=kp, dim=0, largest=False)[1] # kp x bsz x opt.exchangesperimage topkp_train_question_idxs = topkp_train_question_idxs.unsqueeze(-1).expand(kp, bsz, opt.exchangesperimage, opt.emsize) mean_train_answer = topk_train_emb_answers.gather(0, topkp_train_question_idxs).view(kp, bsz, opt.exchangesperimage, opt.emsize).mean(dim=0) dists = torch.norm( emb_opts - mean_train_answer.unsqueeze(2).expand(bsz, opt.exchangesperimage, 100, opt.emsize), p=2, dim=3) # bsz x opt.exchangesperimage x 100 sorted_dists, indices_dists = torch.sort(dists, dim=2, descending=False) # compute ranks ranks = torch.zeros(sorted_dists.size()).type_as(sorted_dists) ranks.scatter_(2, indices_dists, torch.arange(1,101).type_as(sorted_dists).view(1,1,100).expand_as(sorted_dists)) # bsz x nexchanges gt_ranks = ranks.gather(2, batch['gtidxs'].unsqueeze(-1)) meters = utils.process_ranks_for_meters(meters, gt_ranks, sorted_corrs if opt.threshold else None, opt.on_the_fly) utils.log_iteration_stats(log, meters, i+1, len(test_loader)) return meters
def get_features(dictionary, opt, log, set): loader = get_dataloader(set, opt, dictionary, log) featuredir = os.path.join(opt.datasetdir, str(opt.datasetversion), 'features') img_feature_file = os.path.join(featuredir, set + '_' + opt.imagemodel + '_img_feats.pt') cap_feature_file = os.path.join(featuredir, set + '_' + os.path.basename(opt.wordmodel) + '_cap_feats.pt') quest_feature_file = os.path.join(featuredir, set + '_' + os.path.basename(opt.wordmodel) + '_quest_feats.pt') ans_feature_file = os.path.join(featuredir, set + '_' + os.path.basename(opt.wordmodel) + '_ans_feats.pt') #load from saved if os.path.exists(img_feature_file) and os.path.exists(cap_feature_file) and os.path.exists(quest_feature_file) and os.path.exists(ans_feature_file): V1 = torch.load(img_feature_file) V2 = torch.load(cap_feature_file) V3 = torch.load(quest_feature_file) V4 = torch.load(ans_feature_file) else: #get features on the fly log.info('getting pre-trained features for ' + set + ' images, questions and answers...') # build image feature network img_model = torchvision.models.__dict__[opt.imagemodel](pretrained=True) # use pre-trained weights if 'resnet' in opt.imagemodel: img_feat_net = nn.Sequential(*list(img_model.children())[:-1]) else: img_feat_net = nn.ModuleList([img_model.features, nn.Sequential(*list(img_model.classifier.children())[:-1])]) img_feat_net.eval() for p in img_feat_net.parameters(): p.requires_grad = False if opt.gpu>=0: img_feat_net.to('cuda:' + str(opt.gpu)) V1 = torch.zeros(len(loader.dataset), 512 if 'resnet' in opt.imagemodel else 4096) # resnet feature dim V2 = torch.zeros(len(loader.dataset), opt.emsize) # avg fasttext dim V3 = torch.zeros(len(loader.dataset), opt.exchangesperimage, opt.emsize) # avg fasttext dim V4 = torch.zeros(len(loader.dataset), opt.exchangesperimage, opt.emsize) # avg fasttext dim for i, batch in enumerate(loader): sys.stdout.write('\r{}/{} --> {:3.1f}%'.format(str(i+1), str(len(loader)), (i+1)/float(len(loader))*100)) sys.stdout.flush() bsz = batch['img'].size(0) batch = utils.send_to_device(batch, opt.gpu) # bsz x 512 image features img_feat = img_feat_net(batch['img']) if 'resnet' in opt.imagemodel else img_feat_net[1](img_feat_net[0](batch['img']).view(bsz, -1)) V1[i*loader.batch_size:i*loader.batch_size+bsz] = img_feat.detach().squeeze().cpu() # bsz x opt.emsize average caption embeddings V2[i*loader.batch_size:i*loader.batch_size+bsz] = utils.get_avg_embedding(batch['caption_ids'].unsqueeze(1), batch['caption_length'].unsqueeze(1), dictionary).squeeze(1).cpu() # bsz x opt.emsize average question embeddings V3[i*loader.batch_size:i*loader.batch_size+bsz] = utils.get_avg_embedding(batch['questions_ids'], batch['questions_length'], dictionary).cpu() # bsz x opt.emsize average answer embeddings V4[i*loader.batch_size:i*loader.batch_size+bsz] = utils.get_avg_embedding(batch['answers_ids'], batch['answers_length'], dictionary).cpu() sys.stdout.write("\n") os.makedirs(featuredir, exist_ok=True) img_feat_net.to('cpu') torch.save(V1, img_feature_file) torch.save(V2, cap_feature_file) torch.save(V3, quest_feature_file) torch.save(V4, ans_feature_file) log.info('-' * 100) return loader, {'img': V1, 'caption': V2, 'question': V3, 'answer': V4}