def main(): args = parse_args() dataset = args.dataset if dataset == 'cpv1': dictionary = Dictionary.load_from_file('data/dictionary_v1.pkl') elif dataset == 'cpv2' or dataset == 'v2': dictionary = Dictionary.load_from_file('data/dictionary.pkl') print("Building train dataset...") train_dset = VQAFeatureDataset('train', dictionary, dataset=dataset, cache_image_features=args.cache_features) print("Building test dataset...") eval_dset = VQAFeatureDataset('val', dictionary, dataset=dataset, cache_image_features=args.cache_features) lable2answer = eval_dset.label2ans bias_p = get_bias(train_dset, eval_dset) bias_color = bias_p['what color is'] bias_color_top5 = bias_color.argsort()[::-1][0:5] bias_color_p = [] bias_color_word = [] for i in bias_color_top5: bias_color_p.append(bias_color[i]) bias_color_word.append(lable2answer[i]) print(bias_color_p) print(bias_color_word)
def main(): args = parse_args() dataset = args.dataset with open('util/qid2type_%s.json' % args.dataset, 'r') as f: qid2type = json.load(f) if dataset == 'cpv1': dictionary = Dictionary.load_from_file('data/dictionary_v1.pkl') elif dataset == 'cpv2' or dataset == 'v2': dictionary = Dictionary.load_from_file('data/dictionary.pkl') print("Building test dataset...") eval_dset = VQAFeatureDataset('val', dictionary, dataset=dataset, cache_image_features=args.cache_features) # Build the model using the original constructor constructor = 'build_%s' % args.model model = getattr(CCB_model, constructor)(eval_dset, args.num_hid).cuda() #model = getattr(base_model, constructor)(eval_dset, args.num_hid).cuda() if args.debias == "bias_product": model.debias_loss_fn = BiasProduct() elif args.debias == "none": model.debias_loss_fn = Plain() elif args.debias == "reweight": model.debias_loss_fn = ReweightByInvBias() elif args.debias == "learned_mixin": model.debias_loss_fn = LearnedMixin(args.entropy_penalty) elif args.debias == "CCB_loss": model.debias_loss_fn = CCB_loss(args.entropy_penalty) else: raise RuntimeError(args.mode) model_state = torch.load(args.model_state) model.load_state_dict(model_state) model = model.cuda() batch_size = args.batch_size torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) torch.backends.cudnn.benchmark = True # The original version uses multiple workers, but that just seems slower on my setup eval_loader = DataLoader(eval_dset, batch_size, shuffle=False, num_workers=5) print("Starting eval...") evaluate(model, eval_loader, qid2type)
def create_dictionary2(dataroot): dictionary = Dictionary() questions = [] files = ['train/questions.txt', 'train/questions.txt'] for path in files: question_path = os.path.join(dataroot, path) qs = open(question_path) qs = qs.read().split("\n") for q in qs: dictionary.tokenize(q, True) return dictionary
def create_dictionary(dataroot): dictionary = Dictionary() files = ['allwords4verbq1.json'] for path in files: question_path = os.path.join(dataroot, path) q_data = json.load(open(question_path)) for label, eng_name in q_data.items(): dictionary.tokenize(eng_name, True) return dictionary
def create_dictionary(dataroot): dictionary = Dictionary() questions = [] files = ['VQA_caption_traindataset.pkl', 'VQA_caption_valdataset.pkl'] for path in files: question_path = os.path.join(dataroot, path) dataset = cPickle.load(open(question_path, 'rb')) for idx in range(len(dataset)): captions = dataset[idx]['caption'] for cap in captions: dictionary.tokenize(cap, True) return dictionary
def create_dictionary(dataroot): dictionary = Dictionary() questions = [] files = [ 'v2_OpenEnded_mscoco_train2014_questions.json', 'v2_OpenEnded_mscoco_val2014_questions.json', 'v2_OpenEnded_mscoco_test2015_questions.json', 'v2_OpenEnded_mscoco_test-dev2015_questions.json' ] for path in files: question_path = os.path.join(dataroot, path) qs = json.load(open(question_path))['questions'] for q in qs: dictionary.tokenize(q['question'], True) print('words coming from vqa ', dictionary.__len__()) #add all collected words from imsitu. contains both overlaps with vqa as well as new words imsitu_words_path = os.path.join( dataroot, 'allnverbs_imsitu_words_nl2vqamatching.json') imsitu_words = json.load(open(imsitu_words_path)) for label, eng_name in imsitu_words.items(): dictionary.tokenize(eng_name, True) print(' with words coming from imsitu ', dictionary.__len__()) return dictionary
def create_dictionary(dataroot): dictionary = Dictionary() #general questions files = [ 'imsitu_questions_prev.json' ] for path in files: question_path = os.path.join(dataroot, path) q_data = json.load(open(question_path)) for verb, values in q_data.items(): roles = values['roles'] for role, info in roles.items(): question = info['question'] dictionary.tokenize(question, True) #tempalted words with open(os.path.join(dataroot, 'role_abstracts.txt')) as f: content = f.readlines() verb_desc = [x.strip() for x in content] for desc in verb_desc: dictionary.tokenize(desc, True) #labels question_path = os.path.join(dataroot, 'all_label_mapping.json') q_data = json.load(open(question_path)) for label, eng_name in q_data.items(): dictionary.tokenize(eng_name, True) return dictionary
def create_dictionary(dataroot): dictionary = Dictionary() questions = [] files = [ 'v2_OpenEnded_mscoco_train2014_questions.json', 'v2_OpenEnded_mscoco_val2014_questions.json', 'v2_OpenEnded_mscoco_test2015_questions.json', 'v2_OpenEnded_mscoco_test-dev2015_questions.json' ] for path in files: question_path = os.path.join(dataroot, path) qs = json.load(open(question_path))['questions'] for q in qs: dictionary.tokenize(q['question'], True) return dictionary
def __init__(self, args, logger): self.args = args self.logger = logger Dict = Dictionary(data_path=os.path.join(args.data_path, args.dataset), task_type=args.task_type) self.dict = Dict.dict self.attr_len = Dict.attr_len self.all_the_poss = reduce(mul, Dict.attr_len, 1) self.logger.info("Experiment initializing . . . ") # build models device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") if args.model_type == 'POP': self.model = 'POP' elif any( [True if args.model_type == m else False for m in ['ETN', 'ETNA']]): self.model = ETNADemoPredictor(logger, args.model_type, self.dict.__len__(), args.item_emb_size, Dict.attr_len, args.no_cuda).to(device) else: sys.exit() if args.model_type != 'POP': self.select_optimizer(self.model) self.logger.info(self.model) self.step_count = 0
def create_dictionary(dataroot): dictionary = Dictionary() files = [ 'imsitu_questions_prev.json' ] for path in files: question_path = os.path.join(dataroot, path) q_data = json.load(open(question_path)) for verb, values in q_data.items(): roles = values['roles'] for role, info in roles.items(): question = info['question'] dictionary.tokenize(question, True) return dictionary
def load_model_data(config, is_train=True, eval_name="val"): # data load dictionary = Dictionary() embedding_weight = dictionary.create_glove_embedding_init( pre=True, pre_dir='../data/vocabs/embedding_weight.npy') if is_train: train_dset = TextVQA('train', dictionary) eval_dset = TextVQA('val', dictionary) test_dset = None if eval_name == "test": test_dset = TextVQA('test', dictionary) model = build_model(train_dset, config['model_attributes']) return model, train_dset, eval_dset, embedding_weight, test_dset else: eval_dset = TextVQA(eval_name, dictionary) model = build_model(eval_dset, config['model_attributes']) return model, eval_dset
def create_dictionary(dataroot): dictionary = Dictionary() questions = [] files = [ 'OpenEnded_abstract_v002_test2015_questions.json', 'OpenEnded_abstract_v002_train2015_questions.json', 'OpenEnded_abstract_v002_val2015_questions.json', 'MultipleChoice_abstract_v002_test2015_questions.json', 'MultipleChoice_abstract_v002_train2015_questions.json', 'MultipleChoice_abstract_v002_val2015_questions.json' ] for path in files: question_path = os.path.join(dataroot, path) qs = json.load(open(question_path))['questions'] for q in qs: dictionary.tokenize(q['question'], True) return dictionary
def create_dictionary(dataroot, tk='mecab'): dictionary = Dictionary() if tk == 'mecab': tokenizer = Mecab() elif tk == 'kkma': tokenizer = Kkma() files = [ 'KVQA_annotations_train.json', 'KVQA_annotations_val.json', 'KVQA_annotations_test.json' ] for path in files: question_path = os.path.join(dataroot, path) qs = json.load(open(question_path, encoding='utf-8')) for q in qs: dictionary.tokenize(tokenize_kvqa(q['question']), True, tokenizer.morphs) return dictionary
def main(args): os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu net = Question_Classifier(args.bert_mode, args.bert_pretrain, num_classes=3) net.load_state_dict( torch.load(args.load_path, map_location=lambda storage, loc: storage)) torch.cuda.set_device(device=0) net.cuda() dictionary = Dictionary.load_from_file(args.dictionary_path) valset = Question_Dataset('val', dictionary, args.data_root, question_len=12) testset = Question_Dataset('test', dictionary, args.data_root, question_len=12) valloader = DataLoader(valset, batch_size=args.batch_size, shuffle=False, num_workers=2) testloader = DataLoader(testset, batch_size=args.batch_size, shuffle=False, num_workers=2) net.eval() val_acc = 0.0 test_acc = 0.0 with torch.no_grad(): for ii, sample_batched in enumerate(valloader): question, label = sample_batched['question'], sample_batched[ 'label'] question, label = question.cuda(), label.cuda() out = net.forward(question) tmp_acc = utils.cal_acc(out, label) val_acc += (tmp_acc * question.shape[0]) val_acc /= len(valset) for ii, sample_batched in enumerate(testloader): question, label = sample_batched['question'], sample_batched[ 'label'] question, label = question.cuda(), label.cuda() out = net.forward(question) tmp_acc = utils.cal_acc(out, label) test_acc += (tmp_acc * question.shape[0]) test_acc /= len(testset) print('valset || questions: %d acc: %.4f' % (len(valset), val_acc)) print('testset || questions: %d acc: %.4f' % (len(testset), test_acc))
def create_dictionary(dataroot): dictionary = Dictionary() questions = [] files = ['imsitu_questions_prev.json'] for path in files: question_path = os.path.join(dataroot, path) q_data = json.load(open(question_path)) for verb, values in q_data.items(): roles = values['roles'] for role, info in roles.items(): question = info['question'] dictionary.tokenize(question, True) #add all collected words from imsitu. contains both overlaps with vqa as well as new words imsitu_words_path = os.path.join( dataroot, 'allnverbsall_imsitu_words_nl2glovematching.json') imsitu_words = json.load(open(imsitu_words_path)) for label, eng_name in imsitu_words.items(): dictionary.tokenize(eng_name, True) print(' with words coming from imsitu ', dictionary.__len__()) return dictionary
def create_dictionary(dataroot): dictionary = Dictionary() role_name_corrector = 'data/roles_namecorrected.json' role_name_dict = json.load(open(role_name_corrector)) files = [ 'imsitu_questions_prev.json' ] for path in files: question_path = os.path.join(dataroot, path) q_data = json.load(open(question_path)) for verb, values in q_data.items(): roles = values['roles'] for role, info in roles.items(): question = role_name_dict[role] dictionary.tokenize(question, True) return dictionary
def main(): parser = argparse.ArgumentParser( "Save a model's predictions for the VQA-CP test set") parser.add_argument("model", help="Directory of the model") parser.add_argument("output_file", help="File to write json output to") args = parser.parse_args() path = args.model print("Loading data...") dictionary = Dictionary.load_from_file('data/dictionary.pkl') train_dset = VQAFeatureDataset('train', dictionary, cp=True) eval_dset = VQAFeatureDataset('val', dictionary, cp=True) eval_loader = DataLoader(eval_dset, 256, shuffle=False, num_workers=0) constructor = 'build_%s' % 'baseline0_newatt' model = getattr(base_model, constructor)(train_dset, 1024).cuda() print("Loading state dict for %s..." % path) state_dict = torch.load(join(path, "model.pth")) if all(k.startswith("module.") for k in state_dict): filtered = {} for k in state_dict: filtered[k[len("module."):]] = state_dict[k] state_dict = filtered for k in list(state_dict): if k.startswith("debias_loss_fn"): del state_dict[k] model.load_state_dict(state_dict) model.cuda() model.eval() print("Done") predictions = [] for v, q, a, b in tqdm(eval_loader, ncols=100, total=len(eval_loader), desc="eval"): v = Variable(v, volatile=True).cuda() q = Variable(q, volatile=True).cuda() factor = model(v, None, q, None, None, True)[0] prediction = torch.max(factor, 1)[1].data.cpu().numpy() for p in prediction: predictions.append(train_dset.label2ans[p]) out = [] for p, e in zip(predictions, eval_dset.entries): out.append(dict(answer=p, question_id=e["question_id"])) with open(join(path, args.output_file), "w") as f: json.dump(out, f)
def create_dictionary(dataroot): dictionary = Dictionary() questions = [] files = [ 'v2_OpenEnded_mscoco_train2014_questions.json', 'v2_OpenEnded_mscoco_val2014_questions.json', 'v2_OpenEnded_mscoco_test2015_questions.json', 'v2_OpenEnded_mscoco_test-dev2015_questions.json', 'how_many_qa/HowMany-QA/qzcreate.json' ] for path in files: question_path = os.path.join(dataroot, path) qs = json.load(open(question_path)) if "HowMany-QA" not in path: qs = qs['questions'] for q in qs: if 'question' in q: dictionary.tokenize(q['question'], True) print(path, " is ok") return dictionary
def create_dictionary(dataroot, task='vqa'): dictionary = Dictionary() if task == 'vqa': files = [ 'v2_OpenEnded_mscoco_train2014_questions.json', 'v2_OpenEnded_mscoco_val2014_questions.json', 'v2_OpenEnded_mscoco_test2015_questions.json', 'v2_OpenEnded_mscoco_test-dev2015_questions.json' ] for path in files: question_path = os.path.join(dataroot, path) qs = json.load(open(question_path))['questions'] for q in qs: dictionary.tokenize(q['question'], True) elif task == 'flickr': files = [ 'train_ids.pkl', 'val_ids.pkl', 'test_ids.pkl', ] sentence_dir = os.path.join(dataroot, 'Flickr30kEntities/Sentences') for path in files: ids_file = os.path.join(dataroot, path) with open(ids_file, 'rb') as f: imgids = cPickle.load(f) for image_id in imgids: question_path = os.path.join(sentence_dir, '%d.txt' % image_id) phrases = get_sent_data(question_path) for phrase in phrases: dictionary.tokenize(phrase, True) return dictionary
def create_dictionary(dataroot, dataset, old_dictionary=None, args=None): dictionary = Dictionary() if old_dictionary is not None: print("Copying old dictionary to new dictionary") dictionary.word2idx = old_dictionary.word2idx dictionary.idx2word = old_dictionary.idx2word file_names = [ 'train_questions.json', 'val_questions.json', 'test_questions.json' ] if dataset.lower() == 'vqa2': file_names.append('test_dev_questions.json') files = [] for f in file_names: files.append(os.path.join(dataroot, 'vqa2', f)) if args.combine_with is not None: for cs in args.combine_with_splits: files.append( os.path.join(args.combine_with_dataroot, 'vqa2', cs + "_questions.json")) print("files to process {}".format(files)) for question_path in files: qs = json.load(open(question_path))['questions'] for q in qs: dictionary.tokenize(q['question'], True) return dictionary
def main(): logger.info("Creating vocabulary dictionary...") vocab = Dictionary.from_corpus(train_data, unk='<unk>') logger.info("Creating tag dictionary...") vocab_tags = Dictionary.from_corpus_tags(train_data, unk='<unk>') vocab.add_word('<s>') vocab.add_word('</s>') V = vocab.size() vocab_tags.add_word('<s>') vocab_tags.add_word('</s>') V_tag = vocab_tags.size() feature_matrix = np.zeros((vocab_tags.size(), vocab_tags.num_sub_tags)) feature_matrix[(0, 0)] = 1 # unk encoding for tag, tag_id in vocab_tags: if tag == "<s>": feature_matrix[(tag_id, 1)] = 1 elif tag == "</s>": feature_matrix[(tag_id, 2)] = 1 else: for sub_tag in vocab_tags.map_tag_to_sub_tags[tag]: val = vocab_tags.map_sub_to_ids[sub_tag] feature_matrix[(tag_id, val)] = 1 Q = cPickle.load(open(sys.argv[4], 'rb')) print "START COMPARING" word = sys.argv[5] word_id = vocab.lookup_id(word) words = [] for j, q in enumerate(Q): words.append((j, vocab.lookup_word(j), cosine(Q[word_id], q))) words.sort(key=lambda x: x[2]) print words[:20]
def dispatch(cls, key, request): if key is None or request is None: raise Exception kwargs = get_params(request) params = Dictionary() for k in kwargs: params.set(k, kwargs[k]) params.filter() return cls.hand_logic(params, key, request)
def evalFromImages(args): # Fetch data. dictionary = Dictionary.load_from_file('data/dictionary.pkl') print "Fetching eval data" imageLoader = imageModel.ImageLoader("data/val2014img", "val") eval_dset = VQAFeatureDataset('valSample', args.evalset_name, dictionary, imageLoader=imageLoader) # Fetch model. model = imageModel.getCombinedModel(args, eval_dset) model = nn.DataParallel(model).cuda() # Evaluate eval_loader = DataLoader(eval_dset, args.batch_size, shuffle=True) print "Evaluating..." model.train(False) eval_score, bound = train.evaluate(model, eval_loader) print "eval score: %.2f (%.2f)" % (100 * eval_score, 100 * bound)
def trainNormal(args): # Fetch data. dictionary = Dictionary.load_from_file('data/dictionary.pkl') print "Fetching train data" train_dset = VQAFeatureDataset('train', 'train', dictionary) print "Fetching eval data" eval_dset = VQAFeatureDataset('valSample', args.evalset_name, dictionary) # Fetch model. constructor = 'build_%s' % args.model model = getattr(base_model, constructor)(train_dset, args.num_hid).cuda() model.w_emb.init_embedding('data/glove6b_init_300d.npy') model = nn.DataParallel(model).cuda() if args.load_path: load_path = os.path.join(args.load_path, 'model.pth') print "Loading model from {}".format(load_path) model.load_state_dict(torch.load(load_path)) # Train. train_loader = DataLoader(train_dset, args.batch_size, shuffle=True) eval_loader = DataLoader(eval_dset, args.batch_size, shuffle=True) train.train(model, train_loader, eval_loader, args.epochs, args.output)
def evalNormal(args): # Fetch data. dictionary = Dictionary.load_from_file('data/dictionary.pkl') print "Fetching eval data" eval_dset = VQAFeatureDataset('val', args.evalset_name, dictionary) # Fetch model. constructor = 'build_%s' % args.model model = getattr(base_model, constructor)(eval_dset, args.num_hid).cuda() model.w_emb.init_embedding('data/glove6b_init_300d.npy') model = nn.DataParallel(model).cuda() if args.load_path: load_path = os.path.join(args.load_path, 'model.pth') print "Loading model from {}".format(load_path) model.load_state_dict(torch.load(load_path)) # Evaluate eval_loader = DataLoader(eval_dset, args.batch_size, shuffle=True) print "Evaluating..." model.train(False) eval_score, bound = train.evaluate(model, eval_loader) print "eval score: %.2f (%.2f)" % (100 * eval_score, 100 * bound)
def create_dictionary(dataroot, only_image_questions): dictionary = Dictionary() questions = [] files = [ 'official_aaai_split_train_data.json', 'v2_OpenEnded_mscoco_train2014_questions.json' ] for path in files: question_path = os.path.join(dataroot, path) if path == 'official_aaai_split_train_data.json': if only_image_questions: qs = [example for example in json.load(open(question_path)) if example['q_type'] == 'image'] else: qs = [example for example in json.load(open(question_path)) if example['image'] is not None] else: qs = json.load(open(question_path))['questions'] caps = [dia['caption'] for dia in json.load(open(os.path.join(dataroot, 'visdial_1.0_train.json')))['data']['dialogs']] for cap in caps: dictionary.tokenize(cap, True) for example in qs: dictionary.tokenize(example['question'], True) if path == 'official_aaai_split_train_data.json': dictionary.tokenize(example['image']['caption'], True) return dictionary
def create_dictionary(dataroot): dictionary = Dictionary() questions = [] files = ['vqacp_v2_train_questions.json', 'vqacp_v2_test_questions.json' ] for path in files: question_path = os.path.join(dataroot, path) qs = json.load(open(question_path)) for q in qs: dictionary.tokenize(q['question'], True) if 'train' in path: try: dictionary.tokenize(q['orig_question'], True) except: continue return dictionary
with open(glove_file, 'r', encoding='utf-8') as f: entries = f.readlines() emb_dim = len(entries[0].split(' ')) - 1 print('embedding dim is %d' % emb_dim) weights = np.zeros((len(idx2word), emb_dim), dtype=np.float32) for entry in entries: vals = entry.split(' ') word = vals[0] vals = list(map(float, vals[1:])) word2emb[word] = np.array(vals) for idx, word in enumerate(idx2word): if word not in word2emb: continue weights[idx] = word2emb[word] return weights, word2emb if __name__ == '__main__': d = create_dictionary(config.data_path) d.dump_to_file('./data/dictionary.pkl') d = Dictionary.load_from_file('./data/dictionary.pkl') emb_dim = 300 #glove_file = 'data/glove/glove.6B.%dd.txt' % emb_dim glove_file = os.path.join(config.data_glove_path, os.listdir(config.data_glove_path)[2]) weights, word2emb = create_glove_embedding_init(d.idx2word, glove_file) np.save('data/glove6b_init_%dd.npy' % emb_dim, weights)
emb_dim = len(entries[0].split(' ')) - 1 print('embedding dim is %d' % emb_dim) weights = np.zeros((len(idx2word), emb_dim), dtype=np.float32) for entry in entries: vals = entry.split(' ') word = vals[0] vals = list(map(float, vals[1:])) word2emb[word] = np.array(vals) for idx, word in enumerate(idx2word): if word not in word2emb: continue weights[idx] = word2emb[word] return weights, word2emb if __name__ == '__main__': args = parse_args() dataroot = 'data' if args.task == 'vqa' else 'data/flickr30k' dictionary_path = os.path.join(dataroot, 'dictionary.pkl') d = create_dictionary(dataroot, args.task) d.dump_to_file(dictionary_path) d = Dictionary.load_from_file(dictionary_path) emb_dim = 300 glove_file = 'data/glove/glove.6B.%dd.txt' % emb_dim weights, word2emb = create_glove_embedding_init(d.idx2word, glove_file) np.save(os.path.join(dataroot, 'glove6b_init_%dd.npy' % emb_dim), weights)
parser.add_argument('--num_hid', type=int, default=1024) parser.add_argument('--model', type=str, default='baseline0_newatt') parser.add_argument('--output', type=str, default='saved_models/exp0') parser.add_argument('--batch_size', type=int, default=512) parser.add_argument('--seed', type=int, default=1111, help='random seed') args = parser.parse_args() return args if __name__ == '__main__': args = parse_args() torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) torch.backends.cudnn.benchmark = True dictionary = Dictionary.load_from_file('data/dictionary.pkl') train_dset = VQAFeatureDataset('train', dictionary) eval_dset = VQAFeatureDataset('val', dictionary) batch_size = args.batch_size constructor = 'build_%s' % args.model model = getattr(base_model, constructor)(train_dset, args.num_hid).cuda() model.w_emb.init_embedding('data/glove6b_init_300d.npy') model = nn.DataParallel(model).cuda() train_loader = DataLoader(train_dset, batch_size, shuffle=True, num_workers=1) eval_loader = DataLoader(eval_dset, batch_size, shuffle=True, num_workers=1) train(model, train_loader, eval_loader, args.epochs, args.output)
def train_lbl(train_data, dev_data, test_data=[], K=20, context_sz=2, learning_rate=1.0, rate_update='simple', epochs=10, batch_size=100, rng=None, patience=None, patience_incr=2, improvement_thrs=0.995, validation_freq=1000, noise_data_ratio=25): """ Train log-bilinear model with noise contrastive estimation """ # create vocabulary from train data, plus <s>, </s> vocab = Dictionary.from_corpus(train_data, unk='<unk>') vocab.add_word('<s>') vocab.add_word('</s>') V = vocab.size() print vocab.vocab logger.debug("Vocabulary size: %d" % V) # initialize random generator if not provided rng = np.random.RandomState() if not rng else rng # generate (context, target) pairs of word ids train_set_x, train_set_y = make_instances(train_data, vocab, context_sz) dev_set_x, dev_set_y = make_instances(dev_data, vocab, context_sz) test_set_x, test_set_y = make_instances(test_data, vocab, context_sz) # generate noise samples noise_model = UnigramLanguageModel(train_data, vocab) data_sz = train_set_x.shape.eval()[0] noise_set = theano.shared(np.asarray(noise_model.samples(noise_data_ratio * data_sz), dtype=np.int32), borrow=True) # number of minibatches for training n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size n_dev_batches = dev_set_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size # build the model logger.info("Build the model ...") index = T.lscalar() x = T.imatrix('x') y = T.ivector('y') noise = T.ivector('noise') # create log-bilinear model lbl = LogBilinearLanguageModelNCE(x, V, K, context_sz, rng) # cost function is the unnormalized log-probability cost = lbl.unnormalized_neg_log_likelihood(y) noise_cost = lbl.unnormalized_neg_log_likelihood(noise) cost_normalized = lbl.negative_log_likelihood(y) # compute gradient gparams = [] noise_gparams = [] for param in lbl.params: gparam = T.grad(cost, param) noise_gparam = T.grad(noise_cost, param) gparams.append(gparam) noise_gparams.append(noise_gparam) # specify NCE objective update step for model parameter updates = [] for param, gparam, noise_gparam in zip(lbl.params, gparams, noise_gparams): # k * P_n(w) / (P_h(w) + k * P_n(w)) nce_weight = noise_data_ratio * noise_model.likelihood(y) / (lbl.unnormalized_neg_log_likelihood(y) + noise_data_ratio*noise_model.likelihood(y)) # nce update # update = nce_weight*gparam update = gparam # debug: just add half of the update # updates.append((param, param-learning_rate*update)) # gradient approximation with noise samples # P_h(w) / (P_h(w) + k * P_n(w)) # noise_weight = lbl.unnormalized_neg_log_likelihood(noise) / (lbl.unnormalized_neg_log_likelihood(noise) + noise_data_ratio*noise_model.likelihood(noise)) # noise_update = noise_weight*noise_gparam noise_update = noise_gparam # # sum over k noise samples noise_update.reshape((noise_data_ratio, y.shape[0])).sum(axis=0) # noise_update.reshape((noise_data_ratio, y.shape[0])).sum(axis=0) # # overall update step on objective function J updates.append((param, param-learning_rate*(update-noise_update))) # function that computes normalized log-probability of the dev set logprob_dev = theano.function(inputs=[index], outputs=cost_normalized, givens={x: dev_set_x[index*batch_size: (index+1)*batch_size], y: dev_set_y[index*batch_size: (index+1)*batch_size] }) # function that computes normalized log-probability of the test set logprob_test = theano.function(inputs=[index], outputs=cost_normalized, givens={x: test_set_x[index*batch_size: (index+1)*batch_size], y: test_set_y[index*batch_size: (index+1)*batch_size] }) # function that returns the unnormalized cost and updates the parameter # debug # return udpate for first paramter (R matrix) # train_model = theano.function(inputs=[index], outputs=nce_weight, # updates=updates, # givens={x: train_set_x[index*batch_size: # (index+1)*batch_size], # y: train_set_y[index*batch_size: # (index+1)*batch_size], # noise: noise_set[index*batch_size*noise_data_ratio: # (index+1)*batch_size*noise_data_ratio] # }, # on_unused_input='warn' # ) train_model = theano.function(inputs=[index], outputs=cost, updates=updates, givens={x: train_set_x[index*batch_size: (index+1)*batch_size], y: train_set_y[index*batch_size: (index+1)*batch_size], noise: noise_set[index*batch_size*noise_data_ratio: (index+1)*batch_size*noise_data_ratio] }, on_unused_input='warn' ) # train_model = theano.function(inputs=[index], outputs=cost, # givens={x: train_set_x[index*batch_size: # (index+1)*batch_size], # y: train_set_y[index*batch_size: # (index+1)*batch_size], # }) # perplexity functions def compute_dev_logp(): return np.mean([logprob_dev(i) for i in xrange(n_dev_batches)]) def compute_test_logp(): return np.mean([logprob_test(i) for i in xrange(n_test_batches)]) def ppl(neg_logp): return np.power(2.0, neg_logp) # train model logger.info("training model...") best_params = None last_epoch_dev_ppl = np.inf best_dev_ppl = np.inf test_ppl = np.inf test_core = 0 start_time = time.clock() done_looping = False for epoch in xrange(epochs): if done_looping: break logger.debug('epoch %i' % epoch) for minibatch_index in xrange(n_train_batches): itr = epoch * n_train_batches + minibatch_index # tmp = train_model(minibatch_index) # print "shape tmp:", tmp.shape train_logp = train_model(minibatch_index) logger.debug('epoch %i, minibatch %i/%i, train minibatch log prob %.4f ppl %.4f' % (epoch, minibatch_index+1, n_train_batches, train_logp, ppl(train_logp))) if (itr+1) % validation_freq == 0: # compute perplexity on dev set, lower is better dev_logp = compute_dev_logp() dev_ppl = ppl(dev_logp) logger.debug('epoch %i, minibatch %i/%i, dev log prob %.4f ppl %.4f' % (epoch, minibatch_index+1, n_train_batches, dev_logp, ppl(dev_logp))) # if we got the lowest perplexity until now if dev_ppl < best_dev_ppl: # improve patience if loss improvement is good enough if patience and dev_ppl < best_dev_ppl * improvement_thrs: patience = max(patience, itr * patience_incr) best_dev_ppl = dev_ppl test_logp = compute_test_logp() test_ppl = ppl(test_logp) logger.debug('epoch %i, minibatch %i/%i, test log prob %.4f ppl %.4f' % (epoch, minibatch_index+1, n_train_batches, test_logp, ppl(test_logp))) # stop learning if no improvement was seen for a long time if patience and patience <= itr: done_looping = True break # adapt learning rate if rate_update == 'simple': # set learning rate to 1 / (epoch+1) learning_rate = 1.0 / (epoch+1) elif rate_update == 'adaptive': # half learning rate if perplexity increased at end of epoch (Mnih and Teh 2012) this_epoch_dev_ppl = ppl(compute_dev_logp()) if this_epoch_dev_ppl > last_epoch_dev_ppl: learning_rate /= 2.0 last_epoch_dev_ppl = this_epoch_dev_ppl elif rate_update == 'constant': # keep learning rate constant pass else: raise ValueError("Unknown learning rate update strategy: %s" %rate_update) end_time = time.clock() total_time = end_time - start_time logger.info('Optimization complete with best dev ppl of %.4f and test ppl %.4f' % (best_dev_ppl, test_ppl)) logger.info('Training took %d epochs, with %.1f epochs/sec' % (epoch+1, float(epoch+1) / total_time)) logger.info("Total training time %d days %d hours %d min %d sec." % (total_time/60/60/24, total_time/60/60%24, total_time/60%60, total_time%60)) # return model return lbl
def main(): import argparse parser = argparse.ArgumentParser( description="imsitu VSRL. Training, evaluation and prediction.") parser.add_argument("--gpuid", default=-1, help="put GPU id > -1 in GPU mode", type=int) #parser.add_argument("--command", choices = ["train", "eval", "resume", 'predict'], required = True) parser.add_argument('--resume_training', action='store_true', help='Resume training from the model [resume_model]') parser.add_argument('--resume_model', type=str, default='', help='The model we resume') parser.add_argument('--pretrained_buatt_model', type=str, default='', help='pretrained verb module') parser.add_argument('--train_role', action='store_true', help='cnn fix, verb fix, role train from the scratch') parser.add_argument( '--use_pretrained_buatt', action='store_true', help='cnn fix, verb finetune, role train from the scratch') parser.add_argument( '--finetune_cnn', action='store_true', help='cnn finetune, verb finetune, role train from the scratch') parser.add_argument('--output_dir', type=str, default='./trained_models', help='Location to output the model') parser.add_argument('--evaluate', action='store_true', help='Only use the testing mode') parser.add_argument('--test', action='store_true', help='Only use the testing mode') parser.add_argument('--dataset_folder', type=str, default='./imSitu', help='Location of annotations') parser.add_argument('--imgset_dir', type=str, default='./resized_256', help='Location of original images') parser.add_argument('--frcnn_feat_dir', type=str, help='Location of output from detectron') parser.add_argument('--train_file', default="train_new_2000_all.json", type=str, help='trainfile name') parser.add_argument('--dev_file', default="dev_new_2000_all.json", type=str, help='dev file name') parser.add_argument('--test_file', default="test_new_2000_all.json", type=str, help='test file name') parser.add_argument('--model_saving_name', type=str, help='save name of the outpul model') parser.add_argument('--epochs', type=int, default=500) parser.add_argument('--num_hid', type=int, default=1024) parser.add_argument('--model', type=str, default='baseline0grid_imsitu_agent') parser.add_argument('--output', type=str, default='saved_models/exp0') parser.add_argument('--batch_size', type=int, default=64) parser.add_argument('--num_iter', type=int, default=1) parser.add_argument('--seed', type=int, default=1111, help='random seed') #todo: train role module separately with gt verbs args = parser.parse_args() clip_norm = 0.25 n_epoch = args.epochs batch_size = args.batch_size n_worker = 3 #dataset_folder = 'imSitu' #imgset_folder = 'resized_256' dataset_folder = args.dataset_folder imgset_folder = args.imgset_dir print('model spec :, top down att with role q ') train_set = json.load(open(dataset_folder + '/' + args.train_file)) imsitu_roleq = json.load(open("data/imsitu_questions_prev.json")) dict_path = 'data/dictionary_imsitu_roleall.pkl' dictionary = Dictionary.load_from_file(dict_path) w_emb_path = 'data/glove6b_init_imsitu_roleall_300d.npy' encoder = imsitu_encoder(train_set, imsitu_roleq, dictionary) train_set = imsitu_loader_roleq_buatt_place(imgset_folder, train_set, encoder, dictionary, 'train', encoder.train_transform) constructor = 'build_%s' % args.model model = getattr(base_model, constructor)(train_set, args.num_hid, len(encoder.place_label_list), encoder) model.w_emb.init_embedding(w_emb_path) #print('MODEL :', model) train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=n_worker) dev_set = json.load(open(dataset_folder + '/' + args.dev_file)) dev_set = imsitu_loader_roleq_buatt_place(imgset_folder, dev_set, encoder, dictionary, 'val', encoder.dev_transform) dev_loader = torch.utils.data.DataLoader(dev_set, batch_size=batch_size, shuffle=True, num_workers=n_worker) test_set = json.load(open(dataset_folder + '/' + args.test_file)) test_set = imsitu_loader_roleq_buatt_place(imgset_folder, test_set, encoder, dictionary, 'test', encoder.dev_transform) test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=True, num_workers=n_worker) if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) torch.manual_seed(1234) if args.gpuid >= 0: #print('GPU enabled') model.cuda() torch.cuda.manual_seed(1234) torch.backends.cudnn.deterministic = True if args.use_pretrained_buatt: print('Use pretrained from: {}'.format(args.pretrained_buatt_model)) if len(args.pretrained_buatt_model) == 0: raise Exception('[pretrained buatt module] not specified') #model_data = torch.load(args.pretrained_ban_model, map_location='cpu') #model.load_state_dict(model_data.get('model_state', model_data)) utils_imsitu.load_net_ban(args.pretrained_buatt_model, [model], ['module'], ['w_emb', 'classifier']) model_name = 'pre_trained_buatt' elif args.resume_training: print('Resume training from: {}'.format(args.resume_model)) args.train_all = True if len(args.resume_model) == 0: raise Exception('[pretrained module] not specified') utils_imsitu.load_net(args.resume_model, [model]) optimizer_select = 0 model_name = 'resume_all' else: print('Training from the scratch.') model_name = 'train_full' utils_imsitu.set_trainable(model, True) #utils_imsitu.set_trainable(model.classifier, True) #utils_imsitu.set_trainable(model.w_emb, True) #utils_imsitu.set_trainable(model.q_emb, True) optimizer = torch.optim.Adamax([ { 'params': model.classifier.parameters() }, { 'params': model.w_emb.parameters() }, { 'params': model.q_emb.parameters(), 'lr': 5e-4 }, { 'params': model.v_att.parameters(), 'lr': 5e-5 }, { 'params': model.q_net.parameters(), 'lr': 5e-5 }, { 'params': model.v_net.parameters(), 'lr': 5e-5 }, ], lr=1e-3) #utils_imsitu.set_trainable(model, True) #optimizer = torch.optim.Adamax(model.parameters(), lr=1e-3) #optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=lr_step, gamma=lr_gamma) #gradient clipping, grad check scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9) if args.evaluate: top1, top5, val_loss = eval(model, dev_loader, encoder, args.gpuid, write_to_file=True) top1_avg = top1.get_average_results_nouns() top5_avg = top5.get_average_results_nouns() avg_score = top1_avg["verb"] + top1_avg["value"] + top1_avg["value-all"] + top5_avg["verb"] + \ top5_avg["value"] + top5_avg["value-all"] + top5_avg["value*"] + top5_avg["value-all*"] avg_score /= 8 print('Dev average :{:.2f} {} {}'.format( avg_score * 100, utils_imsitu.format_dict(top1_avg, '{:.2f}', '1-'), utils_imsitu.format_dict(top5_avg, '{:.2f}', '5-'))) #write results to csv file role_dict = top1.role_dict fail_val_all = top1.value_all_dict pass_val_dict = top1.vall_all_correct with open('role_pred_data.json', 'w') as fp: json.dump(role_dict, fp, indent=4) with open('fail_val_all.json', 'w') as fp: json.dump(fail_val_all, fp, indent=4) with open('pass_val_all.json', 'w') as fp: json.dump(pass_val_dict, fp, indent=4) print('Writing predictions to file completed !') elif args.test: top1, top5, val_loss = eval(model, test_loader, encoder, args.gpuid, write_to_file=True) top1_avg = top1.get_average_results_nouns() top5_avg = top5.get_average_results_nouns() avg_score = top1_avg["verb"] + top1_avg["value"] + top1_avg["value-all"] + top5_avg["verb"] + \ top5_avg["value"] + top5_avg["value-all"] + top5_avg["value*"] + top5_avg["value-all*"] avg_score /= 8 print('Test average :{:.2f} {} {}'.format( avg_score * 100, utils_imsitu.format_dict(top1_avg, '{:.2f}', '1-'), utils_imsitu.format_dict(top5_avg, '{:.2f}', '5-'))) else: print('Model training started!') train(model, train_loader, dev_loader, None, optimizer, scheduler, n_epoch, args.output_dir, encoder, args.gpuid, clip_norm, None, model_name, args.model_saving_name, args)
def train_lbl(train_data, dev_data, test_data=[], K=20, word_context_sz=2, char_context_sz=2, learning_rate=1.0, rate_update='simple', epochs=10, batch_size=100, rng=None, patience=None, patience_incr=2, improvement_thrs=0.995, validation_freq=1000): """ Train log-bilinear model """ # create vocabulary from train data, plus <s>, </s> vocab = Dictionary.from_corpus(train_data, unk='<unk>') vocab.add_word('<s>') vocab.add_word('</s>') V = vocab.size() # initialize random generator if not provided rng = np.random.RandomState() if not rng else rng # generate (context, target) pairs of word ids train_word_x, train_char_x, train_set_y = make_instances(train_data, vocab, word_context_sz, char_context_sz) dev_word_x, dev_char_x, dev_set_y = make_instances(dev_data, vocab, word_context_sz, char_context_sz) test_word_x, test_char_x, test_set_y = make_instances(test_data, vocab, word_context_sz, char_context_sz) # number of minibatches for training n_train_batches = train_word_x.get_value(borrow=True).shape[0] / batch_size n_dev_batches = dev_word_x.get_value(borrow=True).shape[0] / batch_size n_test_batches = test_word_x.get_value(borrow=True).shape[0] / batch_size # build the model logger.info("Build the model ...") index = T.lscalar() x_word = T.imatrix('x_word') x_char = T.imatrix('x_char') y = T.ivector('y') # create log-bilinear model lbl = LogBilinearLanguageModel(x_word, x_char, V, K, word_context_sz, char_context_sz, rng) # cost function is negative log likelihood of the training data cost = lbl.negative_log_likelihood(y) # compute the gradient gparams = [] for param in lbl.params: gparam = T.grad(cost, param) gparams.append(gparam) # specify how to update the parameter of the model updates = [] for param, gparam in zip(lbl.params, gparams): updates.append((param, param-learning_rate*gparam)) # function that computes log-probability of the dev set logprob_dev = theano.function(inputs=[index], outputs=cost, givens={x_word: dev_word_x[index*batch_size: (index+1)*batch_size], x_char: dev_char_x[index*batch_size: (index+1)*batch_size], y: dev_set_y[index*batch_size: (index+1)*batch_size] }) # function that computes log-probability of the test set logprob_test = theano.function(inputs=[index], outputs=cost, givens={x_word: test_word_x[index*batch_size: (index+1)*batch_size], x_char: test_char_x[index*batch_size: (index+1)*batch_size], y: test_set_y[index*batch_size: (index+1)*batch_size] }) # function that returns the cost and updates the parameter train_model = theano.function(inputs=[index], outputs=cost, updates=updates, givens={x_word: train_word_x[index*batch_size: (index+1)*batch_size], x_char: train_char_x[index*batch_size: (index+1)*batch_size], y: train_set_y[index*batch_size: (index+1)*batch_size] }) # perplexity functions def compute_dev_logp(): return np.mean([logprob_dev(i) for i in xrange(n_dev_batches)]) def compute_test_logp(): return np.mean([logprob_test(i) for i in xrange(n_test_batches)]) def ppl(neg_logp): return np.power(2.0, neg_logp) # train model logger.info("training model...") best_params = None last_epoch_dev_ppl = np.inf best_dev_ppl = np.inf test_ppl = np.inf test_core = 0 start_time = time.clock() done_looping = False for epoch in xrange(epochs): if done_looping: break logger.debug('epoch %i' % epoch) for minibatch_index in xrange(n_train_batches): itr = epoch * n_train_batches + minibatch_index train_logp = train_model(minibatch_index) logger.debug('epoch %i, minibatch %i/%i, train minibatch log prob %.4f ppl %.4f' % (epoch, minibatch_index+1, n_train_batches, train_logp, ppl(train_logp))) if (itr+1) % validation_freq == 0: # compute perplexity on dev set, lower is better dev_logp = compute_dev_logp() dev_ppl = ppl(dev_logp) logger.debug('epoch %i, minibatch %i/%i, dev log prob %.4f ppl %.4f' % (epoch, minibatch_index+1, n_train_batches, dev_logp, ppl(dev_logp))) # if we got the lowest perplexity until now if dev_ppl < best_dev_ppl: # improve patience if loss improvement is good enough if patience and dev_ppl < best_dev_ppl * improvement_thrs: patience = max(patience, itr * patience_incr) best_dev_ppl = dev_ppl test_logp = compute_test_logp() test_ppl = ppl(test_logp) logger.debug('epoch %i, minibatch %i/%i, test log prob %.4f ppl %.4f' % (epoch, minibatch_index+1, n_train_batches, test_logp, ppl(test_logp))) # stop learning if no improvement was seen for a long time if patience and patience <= itr: done_looping = True break # adapt learning rate if rate_update == 'simple': # set learning rate to 1 / (epoch+1) learning_rate = 1.0 / (epoch+1) elif rate_update == 'adaptive': # half learning rate if perplexity increased at end of epoch (Mnih and Teh 2012) this_epoch_dev_ppl = ppl(compute_dev_logp()) if this_epoch_dev_ppl > last_epoch_dev_ppl: learning_rate /= 2.0 last_epoch_dev_ppl = this_epoch_dev_ppl elif rate_update == 'constant': # keep learning rate constant pass else: raise ValueError("Unknown learning rate update strategy: %s" %rate_update) end_time = time.clock() total_time = end_time - start_time logger.info('Optimization complete with best dev ppl of %.4f and test ppl %.4f' % (best_dev_ppl, test_ppl)) logger.info('Training took %d epochs, with %.1f epochs/sec' % (epoch+1, float(epoch+1) / total_time)) logger.info("Total training time %d days %d hours %d min %d sec." % (total_time/60/60/24, total_time/60/60%24, total_time/60%60, total_time%60)) # return model return lbl