def make_vocab_files(): """ Produce the question and answer vocabulary files. """ print 'making question vocab...', config.QUESTION_VOCAB_SPACE qdic, _ = VQADataProvider.load_data(config.QUESTION_VOCAB_SPACE) question_vocab = make_question_vocab(qdic) print 'making answer vocab...', config.ANSWER_VOCAB_SPACE _, adic = VQADataProvider.load_data(config.ANSWER_VOCAB_SPACE) answer_vocab = make_answer_vocab(adic, config.NUM_OUTPUT_UNITS) return question_vocab, answer_vocab
def make_vocab_files(): """ Produce the question and answer vocabulary files. """ write_log('making question vocab... ' + config.QUESTION_VOCAB_SPACE, 'log.txt') write_log('making question character vocab... ' + config.QUESTION_VOCAB_SPACE, 'log.txt') qdic, _ = VQADataProvider.load_data(config.QUESTION_VOCAB_SPACE) question_vocab, question_char_vocab = make_question_vocab(qdic) write_log('making answer vocab... ' + config.ANSWER_VOCAB_SPACE, 'log.txt') _, adic = VQADataProvider.load_data(config.ANSWER_VOCAB_SPACE) answer_vocab = make_answer_vocab(adic, config.NUM_OUTPUT_UNITS) return question_vocab, question_char_vocab, answer_vocab
def make_vocab_files(): """ Produce the question, answer, and explanation vocabulary files. """ print('making question vocab...', config.QUESTION_VOCAB_SPACE) qdic, _, _ = VQADataProvider.load_data(config.QUESTION_VOCAB_SPACE) question_vocab = make_question_vocab(qdic) print('making answer vocab...', config.ANSWER_VOCAB_SPACE) _, adic, _ = VQADataProvider.load_data(config.ANSWER_VOCAB_SPACE) answer_vocab = make_answer_vocab(adic, config.NUM_OUTPUT_UNITS) print('making explanation vocab...', config.EXP_VOCAB_SPACE) _, _, expdic = VQADataProvider.load_data(config.EXP_VOCAB_SPACE) explanation_vocab = make_exp_vocab(expdic) return question_vocab, answer_vocab, explanation_vocab
def make_question_char_vocab(qdic): """ Returns a dictionary that maps characters to indices. """ cdict = {'': 0} vid = 1 for qid in qdic.keys(): q_str = qdic[qid]['qstr'] q_char_list = VQADataProvider.seq_to_char_list(q_str) for c in q_char_list: if not cdict.has_key(c): cdict[c] = vid vid += 1 return cdict
def make_question_vocab(qdic): """ Returns a dictionary that maps words to indices. """ vdict = {'': 0} vid = 1 for qid in qdic.keys(): # sequence to list q_str = qdic[qid]['qstr'] q_list = VQADataProvider.seq_to_list(q_str) # create dict for w in q_list: if not vdict.has_key(w): vdict[w] = vid vid += 1 return vdict
def make_question_vocab(qdic): """ Returns a dictionary that maps words to indices. """ vdict = {'':0} vid = 1 for qid in qdic.keys(): # sequence to list q_str = qdic[qid]['qstr'] q_list = VQADataProvider.seq_to_list(q_str) # create dict for w in q_list: if not vdict.has_key(w): vdict[w] = vid vid +=1 return vdict
def make_exp_vocab(exp_dic): """ Returns a dictionary that maps words to indices. """ exp_vdict = {'<EOS>': 0} exp_vdict[''] = 1 exp_id = 2 for qid in exp_dic.keys(): exp_strs = exp_dic[qid] for exp_str in exp_strs: exp_list = VQADataProvider.seq_to_list(exp_str) for w in exp_list: if w not in exp_vdict: exp_vdict[w] = exp_id exp_id += 1 return exp_vdict
def exec_validation(device_id, mode, it='', visualize=False): caffe.set_device(device_id) caffe.set_mode_gpu() net = caffe.Net('./result/proto_test.prototxt',\ './result/tmp.caffemodel',\ caffe.TEST) dp = VQADataProvider(mode=mode,batchsize=64) total_questions = len(dp.getQuesIds()) epoch = 0 pred_list = [] testloss_list = [] stat_list = [] while epoch == 0: t_word, t_cont, t_img_feature, t_answer, t_glove_matrix, t_qid_list, t_iid_list, epoch = dp.get_batch_vec() net.blobs['data'].data[...] = np.transpose(t_word,(1,0)) net.blobs['cont'].data[...] = np.transpose(t_cont,(1,0)) net.blobs['img_feature'].data[...] = t_img_feature net.blobs['label'].data[...] = t_answer net.blobs['glove'].data[...] = np.transpose(t_glove_matrix, (1,0,2)) net.forward() t_pred_list = net.blobs['prediction'].data.argmax(axis=1) t_pred_str = [dp.vec_to_answer(pred_symbol) for pred_symbol in t_pred_list] testloss_list.append(net.blobs['loss'].data) for qid, iid, ans, pred in zip(t_qid_list, t_iid_list, t_answer.tolist(), t_pred_str): pred_list.append({u'answer':pred, u'question_id': int(dp.getStrippedQuesId(qid))}) if visualize: q_list = dp.seq_to_list(dp.getQuesStr(qid)) if mode == 'test-dev' or 'test': ans_str = '' ans_list = ['']*10 else: ans_str = dp.vec_to_answer(ans) ans_list = [ dp.getAnsObj(qid)[i]['answer'] for i in xrange(10)] stat_list.append({\ 'qid' : qid, 'q_list' : q_list, 'iid' : iid, 'answer': ans_str, 'ans_list': ans_list, 'pred' : pred }) percent = 100 * float(len(pred_list)) / total_questions sys.stdout.write('\r' + ('%.2f' % percent) + '%') sys.stdout.flush() mean_testloss = np.array(testloss_list).mean() if mode == 'val': valFile = './result/val2015_resfile' with open(valFile, 'w') as f: json.dump(pred_list, f) if visualize: visualize_failures(stat_list,mode) annFile = config.DATA_PATHS['val']['ans_file'] quesFile = config.DATA_PATHS['val']['ques_file'] vqa = VQA(annFile, quesFile) vqaRes = vqa.loadRes(valFile, quesFile) vqaEval = VQAEval(vqa, vqaRes, n=2) vqaEval.evaluate() acc_overall = vqaEval.accuracy['overall'] acc_perQuestionType = vqaEval.accuracy['perQuestionType'] acc_perAnswerType = vqaEval.accuracy['perAnswerType'] return mean_testloss, acc_overall, acc_perQuestionType, acc_perAnswerType elif mode == 'test-dev': filename = './result/vqa_OpenEnded_mscoco_test-dev2015_v3t'+str(it).zfill(8)+'_results' with open(filename+'.json', 'w') as f: json.dump(pred_list, f) if visualize: visualize_failures(stat_list,mode) elif mode == 'test': filename = './result/vqa_OpenEnded_mscoco_test2015_v3c'+str(it).zfill(8)+'_results' with open(filename+'.json', 'w') as f: json.dump(pred_list, f) if visualize: visualize_failures(stat_list,mode)
def exec_validation(device_id, mode, it='', visualize=False): caffe.set_device(device_id) caffe.set_mode_gpu() net = caffe.Net('./result/proto_test.prototxt',\ './result/tmp.caffemodel',\ caffe.TEST) dp = VQADataProvider(mode=mode, batchsize=64) total_questions = len(dp.getQuesIds()) epoch = 0 pred_list = [] testloss_list = [] stat_list = [] while epoch == 0: t_word, t_cont, t_img_feature, t_answer, t_qid_list, t_iid_list, epoch = dp.get_batch_vec( ) net.blobs['data'].data[...] = np.transpose(t_word, (1, 0)) net.blobs['cont'].data[...] = np.transpose(t_cont, (1, 0)) net.blobs['img_feature'].data[...] = t_img_feature net.blobs['label'].data[...] = t_answer net.forward() t_pred_list = net.blobs['prediction'].data.argmax(axis=1) t_pred_str = [ dp.vec_to_answer(pred_symbol) for pred_symbol in t_pred_list ] testloss_list.append(net.blobs['loss'].data) for qid, iid, ans, pred in zip(t_qid_list, t_iid_list, t_answer.tolist(), t_pred_str): pred_list.append({ 'answer': pred, 'question_id': int(dp.getStrippedQuesId(qid)) }) if visualize: q_list = dp.seq_to_list(dp.getQuesStr(qid)) if mode == 'test-dev' or 'test': ans_str = '' ans_list = [''] * 10 else: ans_str = dp.vec_to_answer(ans) ans_list = [ dp.getAnsObj(qid)[i]['answer'] for i in range(10) ] stat_list.append({\ 'qid' : qid, 'q_list' : q_list, 'iid' : iid, 'answer': ans_str, 'ans_list': ans_list, 'pred' : pred }) percent = 100 * float(len(pred_list)) / total_questions sys.stdout.write('\r' + ('%.2f' % percent) + '%') sys.stdout.flush() mean_testloss = np.array(testloss_list).mean() if mode == 'val': valFile = './result/val2015_resfile' with open(valFile, 'w') as f: json.dump(pred_list, f) if visualize: visualize_failures(stat_list, mode) annFile = config.DATA_PATHS['val']['ans_file'] quesFile = config.DATA_PATHS['val']['ques_file'] vqa = VQA(annFile, quesFile) vqaRes = vqa.loadRes(valFile, quesFile) vqaEval = VQAEval(vqa, vqaRes, n=2) vqaEval.evaluate() acc_overall = vqaEval.accuracy['overall'] acc_perQuestionType = vqaEval.accuracy['perQuestionType'] acc_perAnswerType = vqaEval.accuracy['perAnswerType'] return mean_testloss, acc_overall, acc_perQuestionType, acc_perAnswerType elif mode == 'test-dev': filename = './result/vqa_OpenEnded_mscoco_test-dev2015_v3t' + str( it).zfill(8) + '_results' with open(filename + '.json', 'w') as f: json.dump(pred_list, f) if visualize: visualize_failures(stat_list, mode) elif mode == 'test': filename = './result/vqa_OpenEnded_mscoco_test2015_v3c' + str( it).zfill(8) + '_results' with open(filename + '.json', 'w') as f: json.dump(pred_list, f) if visualize: visualize_failures(stat_list, mode)
def generate_sentences(args): vqa_proto_path, exp_proto_path, adict_path, vdict_path, exp_vdict_path = \ verify_folder(args.folder, args.use_gt) model_path = args.model_path dp = VQADataProvider(args.ques_file, args.ann_file, args.exp_file, vdict_path, adict_path, exp_vdict_path, args.batch_size, args.data_shape, args.img_feature_prefix, args.max_length, args.exp_max_length, mode='val') total_questions = len(dp.getQuesIds()) print(total_questions, 'total questions') caffe.set_device(args.gpu) caffe.set_mode_gpu() vqa_data_provider_layer.CURRENT_DATA_SHAPE = args.data_shape vqa_data_provider_layer.MAX_WORDS_IN_QUESTION = args.max_length vqa_data_provider_layer.MAX_WORDS_IN_EXP = args.exp_max_length exp_data_provider_layer.CURRENT_DATA_SHAPE = args.data_shape[0] exp_data_provider_layer.MAX_WORDS_IN_EXP = 1 # predict one by one vqa_net = caffe.Net(vqa_proto_path, args.model_path, caffe.TEST) exp_net = caffe.Net(exp_proto_path, args.model_path, caffe.TEST) print('VQA model loaded:', vqa_proto_path, args.model_path) print('EXP model loaded:', exp_proto_path, args.model_path) if not os.path.exists(args.out_dir): os.makedirs(args.out_dir) if args.save_att_map: att_map_save_dir = os.path.join(args.out_dir, 'att_maps') if not os.path.exists(att_map_save_dir): os.makedirs(att_map_save_dir) with open(args.exp_file, 'r') as f: exp_anno = json.load(f) final_results = {} vqa_submit_results = [] epoch = 0 while epoch == 0: qvec, cvec, ivec, avec, exp_vec, exp_vec_out, exp_cvec_1, exp_cvec_2, \ qid_list, _, epoch = dp.get_batch_vec() shape = ivec.shape if vqa_net.blobs['img_feature'].data.shape != shape: vqa_net.blobs['img_feature'].reshape(*shape) vqa_net.blobs['data'].reshape(*np.transpose(qvec, (1, 0)).shape) vqa_net.blobs['cont'].reshape(*np.transpose(cont, (1, 0)).shape) vqa_net.blobs['label'].reshape(*avec.shape) vqa_net.blobs['exp'].reshape(exp_vec.transpose().shape) vqa_net.blobs['exp_out'].reshape(exp_vec_out.transpose().shape) vqa_net.blobs['exp_cont_1'].reshape(exp_cvec_1.transpose().shape) vqa_net.blobs['exp_cont_2'].reshape(exp_cvec_2.transpose().shape) vqa_net.blobs['data'].data[...] = np.transpose(qvec, (1, 0)) vqa_net.blobs['cont'].data[...] = np.transpose(cvec, (1, 0)) vqa_net.blobs['img_feature'].data[...] = ivec vqa_net.blobs['label'].data[...] = avec vqa_net.blobs['exp'].data[...] = exp_vec.transpose() # not used vqa_net.blobs['exp_out'].data[ ...] = exp_vec_out.transpose() # not used vqa_net.blobs['exp_cont_1'].data[ ...] = exp_cvec_1.transpose() # not used vqa_net.blobs['exp_cont_2'].data[ ...] = exp_cvec_2.transpose() # not used vqa_net.forward() vqa_predictions = vqa_net.blobs['prediction'].data.copy() vqa_preds = vqa_predictions.argmax(axis=1) exp_att_feature = vqa_net.blobs['exp_att_feature'].data.copy() exp_att_feature = np.squeeze(exp_att_feature) vqa_att_map = vqa_net.blobs['att_map'].data.copy() exp_att_map = vqa_net.blobs['exp_att_map'].data.copy() if args.save_att_map: save_att_map(qid_list, exp_att_map, att_map_save_dir) finished = np.zeros(args.batch_size) predicted_words = [] conts = [] t = 0 prev_word = exp_vec[:, 0].reshape( (1, args.batch_size)) # Initialize with <SOS> continuation = np.zeros( (1, args.batch_size)) # flush out for the first word while finished.sum() != args.batch_size and t < args.exp_max_length: shape = exp_att_feature.shape if exp_net.blobs['exp_att_feature'].data.shape != shape: exp_net.blob['exp_att_feature'].reshape(*shape) exp_net.blobs['exp'].reshape(*prev_word.shape) exp_net.blobs['exp_out'].reshape(1, args.batch_size) exp_net.blobs['exp_cont_1'].reshape(1, args.batch_size) exp_net.blobs['exp_cont_2'].reshape(1, args.batch_size) exp_net.blobs['exp_att_feature'].data[...] = exp_att_feature exp_net.blobs['exp'].data[...] = prev_word exp_net.blobs['exp_out'].data[...] = exp_vec_out[:, t].reshape( (1, args.batch_size)) exp_net.blobs['exp_cont_1'].data[...] = continuation exp_net.blobs['exp_cont_2'].data[...] = continuation exp_net.forward() predicted_word = exp_net.blobs['exp_prediction'].data.copy() predicted_word = np.squeeze(predicted_word.argmax(axis=2)) completed = np.where(predicted_word == 0) finished[completed] = 1 predicted_words.append(predicted_word) conts.append(continuation) prev_word = predicted_word.reshape((1, args.batch_size)) continuation = (finished != 1).astype(np.int32).reshape( (1, args.batch_size)) t += 1 predicted_words = np.array(predicted_words).transpose() conts = np.array(conts).transpose() r_vdict = reverse(dp.vdict) r_adict = reverse(dp.adict) r_exp_vdict = reverse(dp.exp_vdict) questions_str = batch_to_str('q', qvec, cvec, r_vdict, r_adict, r_exp_vdict) answers_str = batch_to_str('a', avec, np.ones_like(avec), r_vdict, r_adict, r_exp_vdict) pred_str = batch_to_str('a', vqa_preds, np.ones_like(vqa_preds), r_vdict, r_adict, r_exp_vdict) generated_str = batch_to_str('exp', predicted_words, conts, r_vdict, r_adict, r_exp_vdict) for qid, qstr, ans, pred, expl, vqa_att, exp_att in zip( qid_list, questions_str, answers_str, pred_str, generated_str, vqa_att_map, exp_att_map): if ans == '': ans = UNK final_results[qid] = { 'qstr': qstr, 'ans': ans, 'exp': expl, 'pred': pred } vqa_submit_results.append({ u'answer': pred, u'question_id': int(qid) }) with open(os.path.join(args.out_dir, 'exp_results.json'), 'w') as f: json.dump(final_results, f) with open(os.path.join(args.out_dir, 'vqa_results.json'), 'w') as f: json.dump(vqa_submit_results, f)