def ivqa_decoding_beam_search(checkpoint_path=None, subset='kpval'): model_config = ModelConfig() res_file = 'result/quest_vaq_greedy_%s_%s.json' % ( FLAGS.model_type.upper(), subset) # Get model model_fn = get_model_creation_fn(FLAGS.model_type) create_fn = create_reader(FLAGS.model_type, phase='test') # Create the vocabulary. to_sentence = SentenceGenerator(trainset='trainval') # get data reader reader = create_fn(batch_size=100, subset=subset, version=FLAGS.test_version) if checkpoint_path is None: ckpt_dir = FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type) # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/' ckpt = tf.train.get_checkpoint_state(ckpt_dir) checkpoint_path = ckpt.model_checkpoint_path # Build model g = tf.Graph() with g.as_default(): # Build the model. model = model_fn(model_config, 'beam') model.build() # Restore from checkpoint restorer = Restorer(g) sess = tf.Session() restorer.restore(sess, checkpoint_path) num_batches = reader.num_batches print('Running beam search inference...') results = [] for i in range(num_batches): outputs = reader.get_test_batch() # inference quest_ids, image_ids = outputs[-2:] scores, pathes = model.greedy_inference(outputs[:-2], sess) scores, pathes = post_process_prediction(scores, pathes) question = to_sentence.index_to_question(pathes[0]) print('%d/%d: %s' % (i, num_batches, question)) for quest_id, image_id, path in zip(quest_ids, image_ids, pathes): sentence = to_sentence.index_to_question(path) res_i = { 'image_id': int(image_id), 'question_id': int(quest_id), 'question': sentence } results.append(res_i) save_json(res_file, results) return res_file
def vaq_decoding_greedy(checkpoint_path=None, subset='kpval'): model_config = ModelConfig() res_file = 'result/quest_vaq_greedy_%s.json' % FLAGS.model_type.upper() # Get model model_fn = get_model_creation_fn(FLAGS.model_type) create_fn = create_reader(FLAGS.model_type, phase='test') # Create the vocabulary. to_sentence = SentenceGenerator(trainset='trainval') # build data reader reader = create_fn(batch_size=32, subset=subset) if checkpoint_path is None: ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir % FLAGS.model_type) checkpoint_path = ckpt.model_checkpoint_path g = tf.Graph() with g.as_default(): # Build the model. model = model_fn(model_config, 'greedy') model.build() saver = tf.train.Saver() sess = tf.Session() tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path)) saver.restore(sess, checkpoint_path) num_batches = reader.num_batches print('Running greedy inference...') results = [] for i in range(num_batches): outputs = reader.get_test_batch() # inference quest_ids, image_ids = outputs[-2:] scores, pathes = model.greedy_inference(outputs[:-2], sess) scores, pathes = post_process_prediction(scores, pathes) question = to_sentence.index_to_question(pathes[0]) print('%d/%d: %s' % (i, num_batches, question)) for quest_id, image_id, path in zip(quest_ids, image_ids, pathes): sentence = to_sentence.index_to_question(path) res_i = { 'image_id': int(image_id), 'question_id': int(quest_id), 'question': sentence } results.append(res_i) save_json(res_file, results) return res_file
def sample_cst_questions(checkpoint_path=None, subset='kptrain'): model_config = ModelConfig() model_config.convert = FLAGS.convert model_config.loss_type = 'pairwise' model_config.top_k = 3 batch_size = 8 # Get model create_fn = create_reader(FLAGS.model_type, phase='test') # Create the vocabulary. to_sentence = SentenceGenerator(trainset='trainval') # get data reader reader = create_fn(batch_size=batch_size, subset=subset, version=FLAGS.test_version) # Build model g = tf.Graph() with g.as_default(): # Build the model. model = ContrastQuestionSampler(model_config) model.build() # Restore from checkpoint restorer = Restorer(g) sess = tf.Session() restorer.restore(sess, checkpoint_path) num_batches = reader.num_batches print('Running beam search inference...') for i in range(num_batches): outputs = reader.get_test_batch() # inference quest_ids, image_ids = outputs[-2:] c_ans, c_ans_len, pathes, scores = model.greedy_inference( outputs[:-2], sess) scores, pathes = post_process_prediction(scores, pathes) k = 3 capt, capt_len = outputs[2:4] gt = capt[0, :capt_len[0]] print('gt: %s [%s]' % (to_sentence.index_to_question(gt), to_sentence.index_to_answer(c_ans[0, :c_ans_len[0]]))) for ix in range(k): question = to_sentence.index_to_question(pathes[ix]) answer = to_sentence.index_to_answer(c_ans[ix, :c_ans_len[ix]]) print('%s %d: %s [%s]' % ('pre' if ix == 0 else 'cst', ix, question, answer)) import pdb pdb.set_trace()
def visualise(): mc_ctx = MultiChoiceQuestionManger() to_sentence = SentenceGenerator(trainset='trainval') # writer = ExperimentWriter('latex/examples_replay_buffer_rescore') writer = ExperimentWriter('latex/examples_replay_buffer_rescore_prior') # d = load_json('vqa_replay_buffer/vqa_replay_low_rescore.json') d = load_json('vqa_replay_buffer/vqa_replay_low_rescore_prior_05_04.json') memory = d['memory'] # show random 100 keys = deepcopy(memory.keys()) np.random.seed(123) np.random.shuffle(keys) vis_keys = keys[:100] for i, quest_key in enumerate(vis_keys): pathes = memory[quest_key] if len(pathes) == 0: continue # if it has valid questions quest_id = int(quest_key) image_id = mc_ctx.get_image_id(quest_id) gt_question = mc_ctx.get_question(quest_id) answer = mc_ctx.get_gt_answer(quest_id) head = 'Q: %s A: %s' % (gt_question, answer) im_file = '%s2014/COCO_%s2014_%012d.jpg' % ('val', 'val', image_id) im_path = os.path.join(IM_ROOT, im_file) questions = [] for p in pathes.keys(): conf1, conf2 = pathes[p] _tokens = [int(t) for t in p.split(' ')] sentence = to_sentence.index_to_question(_tokens) descr = '%s (%0.2f-%0.2f)' % (sentence, conf1, conf2) questions.append(descr) writer.add_result(image_id, quest_id, im_path, head, questions) writer.render()
def test_cst_reader(): reader = ContrastiveDataReader(batch_size=4) to_sentence = SentenceGenerator(trainset='trainval') reader.start() for i in range(4): images, quest, quest_len, top_ans, mask = reader.pop_batch() questions = _parse_gt_questions(quest, quest_len) print('\nBatch %d' % i) this_batch_size = images.shape[0] / 2 for idx in range(this_batch_size): print('Real: %s' % to_sentence.index_to_question(questions[idx])) print('Fake: %s\n' % to_sentence.index_to_question(questions[idx + this_batch_size])) print('Mask:') print(mask.astype(np.int32)) reader.stop()
def test(): from util import unpickle import json from inference_utils.question_generator_util import SentenceGenerator from w2v_answer_encoder import MultiChoiceQuestionManger config = MLPConfig() model = SequenceMLP(config, phase='test') model.build() prob = model.prob # Load vocabulary to_sentence = SentenceGenerator(trainset='trainval') # create multiple choice question manger mc_manager = MultiChoiceQuestionManger(subset='trainval', answer_coding='sequence') sess = tf.Session() # Load model ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir) checkpoint_path = ckpt.model_checkpoint_path saver = tf.train.Saver() saver.restore(sess, checkpoint_path) # get data result = [] dataset = unpickle('data/rescore_dev.pkl') for itr, datum in enumerate(dataset): seq_index, att_mask, label = _process_datum(datum) quest_id = datum['quest_id'] quest = seq_index[0].tolist() feed_dict = model.fill_feed_dict([seq_index, att_mask]) scores = sess.run(prob, feed_dict=feed_dict) idx = scores.argmax() # parse question and answer question = to_sentence.index_to_question([0] + quest) mc_ans = mc_manager.get_candidate_answers(quest_id) vaq_answer = mc_ans[idx] real_answer = mc_ans[label.argmax()] # add result result.append({u'answer': vaq_answer, u'question_id': quest_id}) # show results if itr % 100 == 0: print('============== %d ============' % itr) print('question id: %d' % quest_id) print('question\t: %s' % question) print('answer\t: %s' % real_answer) print('VAQ answer\t: %s (%0.2f)' % (vaq_answer, scores[idx])) quest_ids = [res[u'question_id'] for res in result] # save results tf.logging.info('Saving results') res_file = 'result/rescore_dev_dev.json' json.dump(result, open(res_file, 'w')) from vqa_eval import evaluate_model acc = evaluate_model(res_file, quest_ids) print('Over all accuarcy: %0.2f' % acc)
def main(_): # Build the inference graph. config = QuestionGeneratorConfig() reader = TFRecordDataFetcher(FLAGS.input_files, config.image_feature_key) g = tf.Graph() ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) checkpoint_path = ckpt.model_checkpoint_path print(checkpoint_path) with g.as_default(): model = QuestionGenerator(config, phase='evaluate') model.build() # g.finalize() # Create the vocabulary. to_sentence = SentenceGenerator(trainset=FLAGS.model_trainset) filenames = [] for file_pattern in FLAGS.input_files.split(","): filenames.extend(tf.gfile.Glob(file_pattern)) tf.logging.set_verbosity(tf.logging.INFO) tf.logging.info("Running caption generation on %d files matching %s", len(filenames), FLAGS.input_files) with tf.Session(graph=g) as sess: # Load the model from checkpoint. saver = tf.train.Saver(var_list=tf.all_variables()) saver.restore(sess, checkpoint_path) itr = 0 while not reader.eof(): outputs = reader.pop_batch() im_ids, quest_id, im_feat, ans_w2v, quest_ids, ans_ids = outputs inputs = post_processing_data(outputs) perplexity = sess.run(model.likelihood, feed_dict=model.fill_feed_dict(inputs)) # generated = [generated[0]] # sample 3 question = to_sentence.index_to_question(quest_ids) answer = to_sentence.index_to_answer(ans_ids) print('============== %d ============' % itr) print('image id: %d, question id: %d' % (im_ids, quest_id)) print('question\t: %s' % question) elems = question.split(' ') tmp = ' '.join([ '%s (%0.2f)' % (w, p) for w, p in zip(elems, perplexity.flatten()) ][:-1]) print('question\t' + tmp) print('answer\t: %s' % answer) print('perplexity\t: %0.2f\n' % perplexity.mean()) itr += 1
def test_rerank_reader(): reader = RetrievalDataReader(batch_size=1, n_contrast=10, subset='train') reader.start() outputs = reader.pop_batch() im_feat, quest_arr, quest_len, ans_arr, ans_len = outputs from inference_utils.question_generator_util import SentenceGenerator to_sentence = SentenceGenerator( trainset='trainval', ans_vocab_file='data/vqa_trainval_question_answer_word_counts.txt', quest_vocab_file='data/vqa_trainval_question_answer_word_counts.txt') for q_seq, q_len, a_seq, a_len in zip(quest_arr, quest_len, ans_arr, ans_len): q_ = np.array([0] + q_seq[:q_len].tolist() + [0]) a_ = np.array([0] + a_seq[:a_len].tolist() + [0]) q = to_sentence.index_to_question(q_) a = to_sentence.index_to_answer(a_) print('Q: %s' % q) print('A: %s\n' % a) reader.stop()
def main(): # params k = 80 res_file = 'result/quest_vaq_nn.json' # sentence generator to_sentence = SentenceGenerator(trainset='trainval') # load distances val_qids, nn_ids = load_image_nn() # create nn model nn_model = QuestionPool() num = len(val_qids) results = [] for i, (v_qid, v_nn) in enumerate(zip(val_qids, nn_ids)): # run nn search t = time() tr_qid, tr_path = nn_model.get_candidates(v_nn[:k]) sent = to_sentence.index_to_question(tr_path) print(sent) print('Processing %d/%d, time %0.2f sec.' % (i, num, time() - t)) res_i = {'question_id': int(v_qid), 'question': sent} results.append(res_i) save_json(res_file, results) cider = evaluate_question(res_file, subset='kpval', version='v1')
def ivqa_decoding_beam_search(checkpoint_path=None, subset='kptest'): model_config = ModelConfig() res_file = 'result/aug_var_vaq_kl0_greedy_%s.json' % FLAGS.model_type.upper( ) # Get model model_fn = get_model_creation_fn(FLAGS.model_type) create_fn = create_reader('VAQ-Var', phase='test') # Create the vocabulary. to_sentence = SentenceGenerator(trainset='trainval') # get data reader reader = create_fn(batch_size=1, subset=subset, version=FLAGS.test_version) if checkpoint_path is None: ckpt_dir = FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type) # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/' ckpt = tf.train.get_checkpoint_state(ckpt_dir) checkpoint_path = ckpt.model_checkpoint_path # Build model g = tf.Graph() with g.as_default(): # Build the model. model = model_fn(model_config, 'sampling_beam') model.build() # Restore from checkpoint restorer = Restorer(g) sess = tf.Session() restorer.restore(sess, checkpoint_path) num_batches = reader.num_batches print('Running beam search inference...') results = [] for i in range(num_batches): print('iter: %d/%d' % (i, num_batches)) outputs = reader.get_test_batch() # inference quest_ids, image_ids = outputs[-2:] scores, pathes = model.greedy_inference(outputs[:-2], sess) scores = np.tile(scores[:, np.newaxis], [1, pathes.shape[1]]) # scores, pathes = post_process_prediction(scores, pathes) _ntot = len(pathes) scores, pathes, ivqa_counts = post_process_variation_questions_with_count( scores, pathes, 1) question_id = int(quest_ids[0]) image_id = image_ids[0] print('%d/%d' % (len(pathes[0]), _ntot)) for _p_idx, (path, sc) in enumerate(zip(pathes[0], scores[0])): sentence = to_sentence.index_to_question(path) aug_quest_id = question_id * 1000 + _p_idx # res_i = {'image_id': int(image_id), # 'question_id': aug_quest_id, # 'question': sentence} res_i = { 'image_id': int(image_id), 'question_id': aug_quest_id, 'question': sentence, 'question_inds': path, 'counts': len(pathes), 'probs': float(sc) } results.append(res_i) save_json(res_file, results) return res_file
def ivqa_decoding_beam_search(checkpoint_path=None, subset=FLAGS.subset): model_config = ModelConfig() _model_suffix = 'var_' if FLAGS.use_var else '' res_file = 'data_rl/%sivqa_%s_questions.json' % (_model_suffix, FLAGS.subset) # Get model model_fn = get_model_creation_fn('VAQ-Var') create_fn = create_reader('VAQ-Var', phase='test') # Create the vocabulary. to_sentence = SentenceGenerator(trainset='trainval') # get data reader batch_size = 64 reader = create_fn(batch_size=batch_size, subset=subset, version=FLAGS.test_version) if checkpoint_path is None: if FLAGS.use_var: # variational models ckpt_dir = FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type) else: # standard models ckpt_dir = FLAGS.checkpoint_dir % ('kprestval', FLAGS.model_type) # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/' ckpt = tf.train.get_checkpoint_state(ckpt_dir) checkpoint_path = ckpt.model_checkpoint_path mode = 'sampling' if FLAGS.use_var else 'beam' # Build model g = tf.Graph() with g.as_default(): # Build the model. model = model_fn(model_config, mode) model.build() # Restore from checkpoint restorer = Restorer(g) sess = tf.Session() restorer.restore(sess, checkpoint_path) num_batches = reader.num_batches print('Running beam search inference...') results = [] extend_questions = [] extended_question_ids = [] for i in range(num_batches): print('iter: %d/%d' % (i, num_batches)) outputs = reader.get_test_batch() # inference quest_ids, image_ids = outputs[-2:] scores, pathes = model.greedy_inference(outputs[:-2], sess) scores, pathes = post_process_prediction(scores, pathes, add_start_end=False) # process for each sample _this_batch_size = quest_ids.shape[0] num_sampled = int(len(pathes) / _this_batch_size) _noise_offset = np.arange(0, num_sampled, dtype=np.int32) * _this_batch_size for _s_id in range(_this_batch_size): _index = _noise_offset + _s_id try: cur_scores = [scores[_idx] for _idx in _index] cur_pathes = [pathes[_idx] for _idx in _index] except Exception, e: print(str(e)) pdb.set_trace() cur_scores, cur_pathes = find_unique_pathes(cur_scores, cur_pathes) question_id = int(quest_ids[_s_id]) image_id = image_ids[_s_id] for _pid, path in enumerate(cur_pathes): sentence = to_sentence.index_to_question(path) extended_question_ids.append([question_id, _pid]) aug_quest_id = question_id * 1000 + _pid res_i = { 'image_id': int(image_id), 'question_id': aug_quest_id, 'question': sentence } results.append(res_i) extend_questions += cur_pathes
def ivqa_decoding_beam_search(checkpoint_path=None, subset='kptest'): model_config = ModelConfig() res_file = 'result/var_vaq_beam_%s_%s.json' % (FLAGS.model_type.upper(), FLAGS.mode) # Get model model_fn = get_model_creation_fn(FLAGS.model_type) create_fn = create_reader(FLAGS.model_type, phase='test') # Create the vocabulary. to_sentence = SentenceGenerator(trainset='trainval') # get data reader reader = create_fn(batch_size=50, subset=subset, version=FLAGS.test_version) if checkpoint_path is None: ckpt_dir = FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type) # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/' ckpt = tf.train.get_checkpoint_state(ckpt_dir) checkpoint_path = ckpt.model_checkpoint_path # Build model g = tf.Graph() with g.as_default(): # Build the model. model = model_fn(model_config, 'sampling_beam') model.build() # Restore from checkpoint restorer = Restorer(g) sess = tf.Session() restorer.restore(sess, checkpoint_path) num_batches = reader.num_batches print('Running beam search inference...') results = [] for i in range(num_batches): print('iter: %d/%d' % (i, num_batches)) # if i >= 10: # break outputs = reader.get_test_batch() # inference quest_ids, image_ids = outputs[-2:] scores, pathes = model.greedy_inference(outputs[:-2], sess) # wrap inputs _this_batch_size = quest_ids.size seq_len = pathes.shape[1] dummy_scores = np.tile(scores[:, np.newaxis], [1, seq_len]) # dummy_scores = np.zeros_like(pathes, dtype=np.float32) ivqa_scores, ivqa_pathes, ivqa_counts = post_process_variation_questions_with_count( dummy_scores, pathes, _this_batch_size) # scores, pathes = convert_to_unique_questions(scores, pathes) for _q_idx, (ps, scs, cs) in enumerate( zip(ivqa_pathes, ivqa_scores, ivqa_counts)): image_id = image_ids[_q_idx] question_id = int(quest_ids[_q_idx]) if FLAGS.mode == 'full': for _p_idx, p in enumerate(ps): sentence = to_sentence.index_to_question(p) aug_quest_id = question_id * 1000 + _p_idx res_i = { 'image_id': int(image_id), 'question_id': aug_quest_id, 'question': sentence } results.append(res_i) else: p = pick_question(scs, ps, cs) sentence = to_sentence.index_to_question(p) # print(sentence) res_i = { 'image_id': int(image_id), 'question_id': question_id, 'question': sentence } results.append(res_i) save_json(res_file, results) return res_file
def test(checkpoint_path=None): subset = 'kptest' config = ModelConfig() config.phase = 'other' use_answer_type = FLAGS.model_type in ['VAQ-IAS', 'VQG'] config.model_type = FLAGS.model_type mc_ctx = MultiChoiceQuestionManger(subset='val') # build data reader reader = Reader(batch_size=1, subset=subset, output_attr=True, output_im=False, output_qa=True, output_capt=False, output_ans_seq=True, attr_type='res152') if checkpoint_path is None: ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir % FLAGS.model_type) checkpoint_path = ckpt.model_checkpoint_path res_file = 'result/quest_vaq_%s_%s.json' % (FLAGS.model_type.upper(), subset) print(res_file) # build and restore model model = load_model_inferencer() restore_fn = model.build_graph_from_config(config, checkpoint_path) sess = tf.Session(graph=tf.get_default_graph()) tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path)) restore_fn(sess) # Create the vocabulary. to_sentence = SentenceGenerator(trainset=FLAGS.model_trainset) generator = caption_generator.CaptionGenerator(model, to_sentence.question_vocab) results = [] print('Running inference on split %s...' % subset) num_batches = reader.num_batches for i in range(num_batches): inputs, info, quest_gt_vis = pre_process_inputs( reader.get_test_batch(), mc_ctx, use_answer_type) quest_id, image_id = info captions = generator.beam_search(sess, inputs) question = to_sentence.index_to_question(quest_gt_vis) # answer = to_sentence.index_to_top_answer(ans_feed) print('============== %d ============' % i) print('image id: %d, question id: %d' % (image_id, quest_id)) print('question\t: %s' % question) tmp = [] for c, g in enumerate(captions[0:3]): quest = to_sentence.index_to_question(g.sentence) tmp.append(quest) print('<question %d>\t: %s' % (c, quest)) # print('answer\t: %s\n' % answer) caption = captions[0] sentence = to_sentence.index_to_question(caption.sentence) res_i = { 'image_id': image_id, 'question_id': quest_id, 'question': sentence } results.append(res_i) save_json(res_file, results) return res_file
def test(T=3.0, num_cands=10): # Build the inference graph. cand_file = 'result/vqa_cands.json' config = QuestionGeneratorConfig() reader = TFRecordDataFetcher(FLAGS.input_files, config.image_feature_key) # Create model creator model_creator = create_model_fn(FLAGS.model_type) # create multiple choice question manger oe_manager = CandidateAnswerManager(cand_file, max_num_cands=10) # Create reader post-processing function reader_post_proc_fn = build_mc_reader_proc_fn(model_creator.ans_coding) g = tf.Graph() ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) checkpoint_path = ckpt.model_checkpoint_path print(checkpoint_path) with g.as_default(): model = model_creator(config, phase='evaluate') model.build() # Create the vocabulary. to_sentence = SentenceGenerator(trainset=FLAGS.model_trainset) filenames = [] for file_pattern in FLAGS.input_files.split(","): filenames.extend(tf.gfile.Glob(file_pattern)) tf.logging.set_verbosity(tf.logging.INFO) tf.logging.info("Running caption generation on %d files matching %s", len(filenames), FLAGS.input_files) result = [] with tf.Session(graph=g) as sess: # Load the model from checkpoint. saver = tf.train.Saver(var_list=tf.all_variables()) saver.restore(sess, checkpoint_path) itr = 0 while not reader.eof(): outputs = reader.pop_batch() im_ids, quest_id, im_feat, ans_w2v, quest_ids, ans_ids = outputs oe_ans, oe_coding, scores = oe_manager.get_answer_sequence(quest_id) inputs = reader_post_proc_fn(outputs, oe_coding) perplexity, state = sess.run([model.likelihood, model.final_decoder_state], feed_dict=model.fill_feed_dict(inputs)) perplexity = perplexity.reshape(inputs[-1].shape) loss = perplexity[:, :-1].mean(axis=1) weight = np.exp(-loss * T) weight = weight / weight.sum() # l1 normalise score = scores * weight score = score[:num_cands] question = to_sentence.index_to_question(quest_ids) answer = to_sentence.index_to_answer(ans_ids) top1_ans = oe_ans[score.argmax()] result.append({u'answer': top1_ans, u'question_id': quest_id}) if itr % 100 == 0: print('============== %d ============' % itr) print('image id: %d, question id: %d' % (im_ids, quest_id)) print('question\t: %s' % question) print('answer\t: %s' % answer) top_k_ids = (-score).argsort()[:3].tolist() print('VQA answer\t: %s' % oe_ans[0]) for i, idx in enumerate(top_k_ids): t_mc_ans = oe_ans[idx] print('VAQ answer <%d>\t: %s (%0.2f)' % (i, t_mc_ans, weight[idx])) itr += 1 quest_ids = [res[u'question_id'] for res in result] # save results tf.logging.info('Saving results') res_file = FLAGS.result_file % get_model_iteration(checkpoint_path) json.dump(result, open(res_file, 'w')) return res_file, quest_ids
from readers.vqa_irrelevance_data_fetcher import AttentionDataReader as Reader from post_process_variation_questions import _parse_gt_questions from inference_utils.question_generator_util import SentenceGenerator reader = Reader(batch_size=10, subset='trainval', model_name='something', epsilon=0.5, feat_type='res5c', version='v1', counter_sampling=False) to_sentence = SentenceGenerator(trainset='trainval') reader.start() for i in range(5): print('--------- BATCH %d ---------' % i) res5c, quest, quest_len, labels = reader.pop_batch() pathes = _parse_gt_questions(quest, quest_len) for _p, lbl in zip(pathes, labels): print('%s %d' % (to_sentence.index_to_question(_p), lbl)) reader.stop()
def ivqa_decoding_beam_search(checkpoint_path=None, subset='kpval'): model_config = ModelConfig() res_file = 'result/quest_vaq_greedy_%s.json' % FLAGS.model_type.upper() # Get model # model_fn = get_model_creation_fn(FLAGS.model_type) create_fn = create_reader(FLAGS.model_type, phase='test') # Create the vocabulary. to_sentence = SentenceGenerator(trainset='trainval') # get data reader reader = create_fn(batch_size=80, subset=subset, version=FLAGS.test_version) # if checkpoint_path is None: # ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir % # (FLAGS.version, # FLAGS.model_type)) # checkpoint_path = ckpt.model_checkpoint_path # Build model # g = tf.Graph() # with g.as_default(): # # Build the model. # model = model_fn(model_config, 'beam') # model.build() # # Restore from checkpoint # restorer = Restorer(g) # sess = tf.Session() # restorer.restore(sess, checkpoint_path) num_batches = reader.num_batches print('Running beam search inference...') results = [] for i in range(num_batches): outputs = reader.get_test_batch() # inference # quest_ids, image_ids = outputs[-2:] # scores, pathes = model.greedy_inference(outputs[:-2], sess) im, capt, capt_len, ans_seq, ans_seq_len, quest_ids, image_ids = outputs _, res, res_len, _, _, _, _, = reader.get_test_batch() pathes = parse_gt_questions(capt, capt_len) question = to_sentence.index_to_question(pathes[0]) gts = [to_sentence.index_to_question(q) for q in pathes] gts_token = [' '.join([str(t) for t in path]) for path in pathes] respathes = parse_gt_questions(res, res_len) res = [to_sentence.index_to_question(q) for q in respathes] res_token = [' '.join([str(t) for t in path]) for path in respathes] scores = compute_cider_token_1vsall(quest_ids, res_token) import pdb pdb.set_trace() # gts_token = [] # # compute_cider(quest_ids, gts, res) # compute_cider_token(quest_ids, gts_token, res_token) # import pdb # pdb.set_trace() # print('%d/%d: %s' % (i, num_batches, question)) # # for quest_id, image_id, path in zip(quest_ids, image_ids, pathes): # sentence = to_sentence.index_to_question(path) # res_i = {'image_id': int(image_id), 'question_id': int(quest_id), 'question': sentence} # results.append(res_i) save_json(res_file, results) return res_file
class RerankAnalysiser(object): def __init__(self): self.labels = [] self.rerank_preds = [] self.vqa_top_scores = [] self.vqa_top_preds = [] self.vqa_cands = [] self.to_sentence = SentenceGenerator(trainset='trainval') self.file_stream = open('result/rerank_analysis.txt', 'w') def update(self, reader_outputs, model_prediction): _, _, quest, quest_len, label, _, _, quest_id, image_id = reader_outputs score, reranked, vqa_cands, vqa_scores = model_prediction # save vqa predictions self.vqa_top_preds.append(vqa_cands[:, 0]) self.vqa_top_scores.append(vqa_scores[:, 0]) self.vqa_cands.append(vqa_cands) # save ivqa predictions self.rerank_preds.append(reranked) self.labels.append(label) self.update_log(quest, quest_len, vqa_cands, vqa_scores, reranked, label, image_id, quest_id) def update_log(self, quest, quest_len, vqa_cands, vqa_scores, rerank, label, image_id, quest_id): idx = nr.randint(len(quest)) quest_seq = quest[idx][:quest_len[idx]] _log = '-------- image_id: %d, quest_id: %d --------\n' % ( image_id[idx], quest_id[idx]) self.file_stream.write(_log) question = self.to_sentence.index_to_question(quest_seq) gt_label = label[idx] answer = self.to_sentence.index_to_top_answer( label[idx]) if gt_label < 2000 else 'UNK' _log = 'Q: %s\n' % question self.file_stream.write(_log) _log = 'A: %s\n' % answer self.file_stream.write(_log) r_id = rerank[idx] for i, (c_id, c_score) in enumerate(zip(vqa_cands[idx], vqa_scores[idx])): cand_answer = self.to_sentence.index_to_top_answer(c_id) if c_id == r_id: _log = '[%d]: %s (%0.2f)\t<<\n' % (i, cand_answer, c_score) else: _log = '[%d]: %s (%0.2f)\n' % (i, cand_answer, c_score) self.file_stream.write(_log) def refine_prediction(self, thresh=0.2): rep_tab = self.vqa_top_scores < thresh preds = self.vqa_top_preds.copy() preds[rep_tab] = self.rerank_preds[rep_tab] return preds def compute_accuracy(self): self.vqa_cands = np.concatenate(self.vqa_cands) self.vqa_top_preds = np.concatenate(self.vqa_top_preds) self.vqa_top_scores = np.concatenate(self.vqa_top_scores) self.labels = np.concatenate(self.labels) self.rerank_preds = np.concatenate(self.rerank_preds) valid_tab = self.labels < 2000 def _get_num_col(x): if len(x.shape) == 1: return 1 else: return x.shape[1] def compute_recall(preds, cond_tab=None): top_k = _get_num_col(preds) preds = preds.reshape([-1, top_k]) num = preds.shape[0] match = np.zeros(num) for k in range(top_k): pred = preds[:, k] match += np.equal(pred, self.labels) correct = np.greater(match, 0) if cond_tab is None: cond_tab = valid_tab else: cond_tab = np.logical_and(valid_tab, cond_tab) valid_correct = correct[cond_tab] acc = valid_correct.sum() / float(valid_correct.size) prop = cond_tab.sum() / float(valid_tab.sum()) return acc * 100, prop * 100 print('\n') print('VQA acc@1: %0.2f [%0.1f%%]' % compute_recall(self.vqa_top_preds)) print('VQA acc@3: %0.2f [%0.1f%%]' % compute_recall(self.vqa_cands)) print('iVQA acc@1: %0.2f [%0.1f%%]' % compute_recall(self.rerank_preds)) print('VQA and iVQA acc@1: %0.2f [%0.1f%%]' % compute_recall(self.vqa_top_preds, np.equal(self.vqa_top_preds, self.rerank_preds))) thresh = np.arange(0.1, 1, 0.1, np.float32) for t in thresh: acc, p = compute_recall(self.vqa_top_preds, np.greater(self.vqa_top_scores, t)) print('VQA acc@1 [t=%0.1f]: %0.2f [%0.1f%%]' % (t, acc, p)) print('\nRefine:') thresh = np.arange(0.05, 1, 0.05, np.float32) for t in thresh: acc, p = compute_recall(self.refine_prediction(t)) print('Refine VQA acc@1 [t=%0.2f]: %0.2f [%0.1f%%]' % (t, acc, p)) def close(self): self.file_stream.close()
def test(): # Build the inference graph. config = QuestionGeneratorConfig() reader = TFRecordDataFetcher(FLAGS.input_files, config.image_feature_key) # Create model creator model_creator = create_model_fn(FLAGS.model_type) # create multiple choice question manger mc_manager = MultiChoiceQuestionManger( subset='trainval', answer_coding=model_creator.ans_coding) # Create reader post-processing function reader_post_proc_fn = build_mc_reader_proc_fn(model_creator.ans_coding) g = tf.Graph() ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) checkpoint_path = ckpt.model_checkpoint_path print(checkpoint_path) with g.as_default(): model = model_creator(config, phase='evaluate') model.build() # g.finalize() # Create the vocabulary. to_sentence = SentenceGenerator(trainset=FLAGS.model_trainset) filenames = [] for file_pattern in FLAGS.input_files.split(","): filenames.extend(tf.gfile.Glob(file_pattern)) tf.logging.set_verbosity(tf.logging.INFO) tf.logging.info("Running caption generation on %d files matching %s", len(filenames), FLAGS.input_files) result, rescore_data, state_rescore_data = [], [], [] with tf.Session(graph=g) as sess: # Load the model from checkpoint. saver = tf.train.Saver(var_list=tf.all_variables()) saver.restore(sess, checkpoint_path) itr = 0 while not reader.eof(): if itr > 50000: # cache at most 50k questions break outputs = reader.pop_batch() im_ids, quest_id, im_feat, ans_w2v, quest_ids, ans_ids = outputs mc_ans, mc_coding = mc_manager.get_candidate_answer_and_word_coding( quest_id) inputs = reader_post_proc_fn(outputs, mc_coding) perplexity, state = sess.run( [model.likelihood, model.final_decoder_state], feed_dict=model.fill_feed_dict(inputs)) perplexity = perplexity.reshape(inputs[-1].shape) loss = perplexity[:, :-1].mean(axis=1) # generated = [generated[0]] # sample 3 question = to_sentence.index_to_question(quest_ids) answer = to_sentence.index_to_answer(ans_ids) top1_mc_ans = mc_ans[loss.argmin()] result.append({u'answer': top1_mc_ans, u'question_id': quest_id}) # add hidden state saver label = mc_manager.get_binary_label(quest_id) state_sv = {'quest_id': quest_id, 'states': state, 'label': label} state_rescore_data.append(state_sv) if itr % 100 == 0: print('============== %d ============' % itr) print('image id: %d, question id: %d' % (im_ids, quest_id)) print('question\t: %s' % question) print('answer\t: %s' % answer) top_k_ids = loss.argsort()[:3].tolist() for i, idx in enumerate(top_k_ids): t_mc_ans = mc_ans[idx] print('VAQ answer <%d>\t: %s (%0.2f)' % (i, t_mc_ans, loss[idx])) itr += 1 # save information for train classifier mc_label = np.array([a == answer for a in mc_ans], dtype=np.float32) quest_target = inputs[-2] datum = { 'quest_seq': quest_target, 'perplex': perplexity, 'label': mc_label, 'quest_id': quest_id } rescore_data.append(datum) quest_ids = [res[u'question_id'] for res in result] # save results tf.logging.info('Saving results') res_file = FLAGS.result_file % get_model_iteration(checkpoint_path) json.dump(result, open(res_file, 'w')) tf.logging.info('Saving rescore data...') from util import pickle # pickle('data/rescore_dev.pkl', rescore_data) pickle('data/rescore_state_dev.pkl', state_rescore_data) tf.logging.info('Done!') return res_file, quest_ids
class VarIVQAModelWrapper(object): def __init__(self): self.to_sentence = SentenceGenerator(trainset='trainval') self.sent_encoder = SentenceEncoder() self.g = tf.Graph() self.ckpt_file = 'model/v1_var_kptrain_VAQ-VarDS/model.ckpt-3300000' from models.variational_ds_ivqa_model import VariationIVQAModel from config import ModelConfig config = ModelConfig() self._top_k = 10 self.name = ' ------- VarIVQA ------- ' with self.g.as_default(): self.sess = tf.Session() self.model = VariationIVQAModel(config, phase='sampling_beam') self.model.build() vars = tf.trainable_variables() self.saver = tf.train.Saver(var_list=vars) self.saver.restore(self.sess, self.ckpt_file) self._init_image_cache() def _init_image_cache(self): from util import load_hdf5 d = load_hdf5('data/attribute_std_mscoco_kpval.data') # d = load_hdf5('data/res152_std_mscoco_kpval.data') image_ids = d['image_ids'] self.im_feats = d['att_arr'] image_id2index = {image_id: idx for idx, image_id in enumerate(image_ids)} self.image_id2index = image_id2index def _load_image(self, image_id): idx = self.image_id2index[image_id] return self.im_feats[idx][np.newaxis, :] def _process_answer(self, answers): arr, arr_len = self.sent_encoder.encode_sentence(answers) return arr, arr_len def inference(self, image_id, answer): image = self._load_image(image_id) arr, arr_len = self._process_answer(answer) scores, pathes = self.model.greedy_inference([image, arr, arr_len], self.sess) self.show_prediction(scores, pathes) return scores def show_prediction(self, scores, pathes): # wrap inputs _this_batch_size = 1 seq_len = pathes.shape[1] dummy_scores = np.tile(scores[:, np.newaxis], [1, seq_len]) # dummy_scores = np.zeros_like(pathes, dtype=np.float32) ivqa_scores, ivqa_pathes, ivqa_counts = post_process_variation_questions_with_count_v2(dummy_scores, pathes, _this_batch_size) for _q_idx, (ps, scs, cs) in enumerate(zip(ivqa_pathes, ivqa_scores, ivqa_counts)): inds = np.argsort(-np.array(scs))[:self._top_k] for _p_idx, _pick_id in enumerate(inds): _p = ps[_pick_id] _s = scs[_pick_id] # / (len(_p) - 2) sentence = self.to_sentence.index_to_question(_p) print('%s (%0.2f)' % (sentence, _s))
def test(checkpoint_path=None): config = ModelConfig() config.phase = 'other' use_answer_sequence = 'lstm' in FLAGS.model_type or FLAGS.model_type == 'VAQ-A' config.model_type = FLAGS.model_type # build data reader reader = Reader(batch_size=1, subset='dev', output_attr=True, output_im=False, output_qa=True, output_capt=False, output_ans_seq=use_answer_sequence) if checkpoint_path is None: ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir % FLAGS.model_type) checkpoint_path = ckpt.model_checkpoint_path res_file = 'result/quest_vaq_%s.json' % FLAGS.model_type.upper() res_file = 'result/quest_vaq_%s.json' % FLAGS.model_type.upper() # build and restore model model = InferenceWrapper() restore_fn = model.build_graph_from_config(config, checkpoint_path) sess = tf.Session(graph=tf.get_default_graph()) tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path)) restore_fn(sess) # Create the vocabulary. to_sentence = SentenceGenerator(trainset=FLAGS.model_trainset) generator = caption_generator.CaptionGenerator(model, to_sentence.question_vocab) results = [] print('Running inference on split %s...' % TEST_SET) num_batches = reader.num_batches for i in range(num_batches): outputs = reader.get_test_batch() im_feed, quest, _, ans_feed, quest_id, image_id = outputs if ans_feed == 2000: continue image_id = int(image_id) quest_id = int(quest_id) im_feed = np.squeeze(im_feed) quest = np.squeeze(quest) # print('\n============== %d ============\n' % i) captions = generator.beam_search(sess, [im_feed, ans_feed]) question = to_sentence.index_to_question(quest.tolist()) answer = to_sentence.index_to_top_answer(ans_feed) print('============== %d ============' % i) print('image id: %d, question id: %d' % (image_id, quest_id)) print('question\t: %s' % question) tmp = [] for c, g in enumerate(captions[0:3]): quest = to_sentence.index_to_question(g.sentence) tmp.append(quest) print('<question %d>\t: %s' % (c, quest)) print('answer\t: %s\n' % answer) caption = captions[0] sentence = to_sentence.index_to_question(caption.sentence) res_i = { 'image_id': image_id, 'question_id': quest_id, 'question': sentence } results.append(res_i) save_json(res_file, results) return res_file
def test(checkpoint_path=None): config = ModelConfig() config.phase = 'other' config.model_type = FLAGS.model_type config.cell_option = 5 # config.cell_option = 4 beam_size = 3 subset = 'kptest' # build data reader reader = Reader(batch_size=1, subset=subset, output_attr=True, output_im=True, output_qa=True, output_capt=False, output_ans_seq=True) if checkpoint_path is None: ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir % FLAGS.model_type) checkpoint_path = ckpt.model_checkpoint_path res_file = 'result/beamsearch_vaq_%s_%s.json' % (FLAGS.model_type.upper(), subset) # rerank_file = 'result/beamsearch_vaq_reank_cands_%s_val.json' % FLAGS.model_type.upper() # build and restore model model = InferenceWrapper() restore_fn = model.build_graph_from_config(config, checkpoint_path) sess = tf.Session(graph=tf.get_default_graph()) tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path)) restore_fn(sess) # Create the vocabulary. to_sentence = SentenceGenerator(trainset=FLAGS.model_trainset) generator = caption_generator.CaptionGenerator(model, to_sentence.question_vocab, beam_size=beam_size) results = [] re_rank_cands = [] print('Running inference on split %s...' % TEST_SET) num_batches = reader.num_batches for i in range(num_batches): outputs = reader.get_test_batch() im_feed, attr, quest, quest_len, _, ans_seq, ans_seq_len, quest_id, image_id = outputs image_id = int(image_id) quest_id = int(quest_id) im_feed = np.squeeze(im_feed) quest = np.squeeze(quest) # print('\n============== %d ============\n' % i) captions = generator.beam_search(sess, [im_feed, attr, ans_seq, ans_seq_len]) question = to_sentence.index_to_question(quest.tolist()) # answer = to_sentence.index_to_top_answer(ans_feed) print('============== %d ============' % i) print('image id: %d, question id: %d' % (image_id, quest_id)) print('question\t: %s' % question) tmp, tmp_scores = [], [] vaq_cands = {'question_id': quest_id} for c, g in enumerate(captions): quest = to_sentence.index_to_question(g.sentence) tmp.append(quest) tmp_scores.append(g.logprob) print('<question %d>\t: %s' % (c, quest)) # print('answer\t: %s\n' % answer) vaq_cands['questions'] = tmp vaq_cands['confidence'] = tmp_scores re_rank_cands.append(vaq_cands) caption = captions[0] sentence = to_sentence.index_to_question(caption.sentence) res_i = { 'image_id': image_id, 'question_id': quest_id, 'question': sentence } results.append(res_i) save_json(res_file, results) # save_json(rerank_file, re_rank_cands) return res_file
def ivqa_decoding_beam_search(ckpt_dir, method): model_config = ModelConfig() inf_type = 'beam' assert (inf_type in ['beam', 'rand']) # method = FLAGS.method if inf_type == 'rand': res_file = 'result/bs_RL2_cands_LM_%s.json' % method else: res_file = 'result/bs_RL2_cands_LM_%s_BEAM.json' % method if os.path.exists(res_file): print('File %s already exist, skipped' % res_file) return # score_file = 'result/bs_vqa_scores_%s.mat' % method # Get model model_fn = get_model_creation_fn('VAQ-Var') create_fn = create_reader('VAQ-VVIS', phase='test') # Create the vocabulary. to_sentence = SentenceGenerator(trainset='trainval') # get data reader subset = 'bs_test' reader = create_fn(batch_size=1, subset=subset, version=FLAGS.test_version) exemplar = ExemplarLanguageModel() # if checkpoint_path is None: # if FLAGS.checkpoint_dir: # ckpt_dir = FLAGS.checkpoint_dir # else: # ckpt_dir = FLAGS.checkpoint_pat % (FLAGS.version, FLAGS.model_type) # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/' ckpt = tf.train.get_checkpoint_state(ckpt_dir) checkpoint_path = ckpt.model_checkpoint_path # Build model g = tf.Graph() with g.as_default(): # Build the model.ex if inf_type == 'rand': model = model_fn(model_config, 'sampling') model.set_num_sampling_points(1000) else: model = model_fn(model_config, 'sampling_beam') model.set_num_sampling_points(1000) model.build() # Restore from checkpoint restorer = Restorer(g) sess = tf.Session() restorer.restore(sess, checkpoint_path) # build language model language_model = LanguageModel() language_model.build() language_model.set_cache_dir('test_empty') # language_model.set_cache_dir('v1_var_att_lowthresh_cache_restval_VAQ-VarRL') language_model.set_session(sess) language_model.setup_model() # build VQA model # vqa_model = N2MNWrapper() # vqa_model = MLBWrapper() num_batches = reader.num_batches print('Running beam search inference...') results = {} # batch_vqa_scores = [] num = FLAGS.max_iters if FLAGS.max_iters > 0 else num_batches for i in range(num): outputs = reader.get_test_batch() # inference quest_ids, image_ids = outputs[-2:] im, _, _, top_ans, ans_tokens, ans_len = outputs[:-2] # pdb.set_trace() if top_ans == 2000: continue print('\n%d/%d' % (i, num)) question_id = int(quest_ids[0]) image_id = int(image_ids[0]) t1 = time() pathes, scores = model.greedy_inference([im, ans_tokens, ans_len], sess) # find unique ivqa_scores, ivqa_pathes = process_one(scores, pathes) t2 = time() print('Time for sample generation: %0.2fs' % (t2 - t1)) # apply language model language_model_inputs = wrap_samples_for_language_model([ivqa_pathes], pad_token=model.pad_token - 1, max_length=20) match_gt = exemplar.query(ivqa_pathes) legality_scores = language_model.inference(language_model_inputs) legality_scores[match_gt] = 1.0 num_keep = max(100, (legality_scores > 0.3).sum()) # no less than 100 valid_inds = (-legality_scores).argsort()[:num_keep] t3 = time() print('Time for language model filtration: %0.2fs' % (t3 - t2)) # for idx in valid_inds: # path = ivqa_pathes[idx] # sc = legality_scores[idx] # sentence = to_sentence.index_to_question(path) # # questions.append(sentence) # print('%s (%0.3f)' % (sentence, sc)) # apply VQA model sampled = [ivqa_pathes[_idx] for _idx in valid_inds] legality_scores = legality_scores[valid_inds] result_key = int(question_id) tmp = [] for idx, path in enumerate(sampled): # path = sampled[idx] sc = legality_scores[idx] sentence = to_sentence.index_to_question(path) # aug_quest_id = question_id * 1000 + _pid res_i = {'image_id': int(image_id), 'aug_id': idx, 'question_id': question_id, 'question': sentence, 'score': float(sc)} tmp.append(res_i) print('Number of unique questions: %d' % len(tmp)) results[result_key] = tmp save_json(res_file, results)
def ivqa_decoding_beam_search(checkpoint_path=None, subset='kpval'): model_config = ModelConfig() res_file = 'result/quest_vaq_greedy_%s.json' % FLAGS.model_type.upper() # Get model model_fn = get_model_creation_fn('VAQ-Var') create_fn = create_reader('VAQ-Var', phase='test') writer = ExperimentWriter('latex/examples_noimage_tmp') # Create the vocabulary. to_sentence = SentenceGenerator(trainset='trainval') # get data reader subset = 'kpval' reader = create_fn(batch_size=1, subset=subset, version=FLAGS.test_version) if checkpoint_path is None: # ckpt_dir = FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type) ckpt_dir = 'model/v1_var_att_noimage_cache_restval_VAQ-VarRL' ckpt = tf.train.get_checkpoint_state(ckpt_dir) checkpoint_path = ckpt.model_checkpoint_path # Build model g = tf.Graph() with g.as_default(): # Build the model. model = model_fn(model_config, 'sampling') model.build() # Restore from checkpoint restorer = Restorer(g) sess = tf.Session() restorer.restore(sess, checkpoint_path) num_batches = reader.num_batches print('Running beam search inference...') results = [] for i in range(num_batches): outputs = reader.get_test_batch() # inference quest_ids, image_ids = outputs[-2:] scores, pathes = model.greedy_inference(outputs[:-2], sess) scores, pathes = post_process_prediction(scores, pathes) pathes, pathes_len = put_to_array(pathes) scores, pathes = find_unique_rows(scores, pathes) scores, pathes = post_process_prediction(scores, pathes[:, 1:]) # question = to_sentence.index_to_question(pathes[0]) # print('%d/%d: %s' % (i, num_batches, question)) # show image os.system('clear') im_file = '%s2014/COCO_%s2014_%012d.jpg' % ('val', 'val', image_ids[0]) im_path = os.path.join(IM_ROOT, im_file) # im = imread(im_path) # plt.imshow(im) ans, ans_len = outputs[1:1 + 2] answers = extract_gt(ans, ans_len) answer = to_sentence.index_to_answer(answers[0]) # plt.title(answer) print('Answer: %s' % answer) questions = [] for path in pathes: sentence = to_sentence.index_to_question(path) questions.append(sentence) print(sentence) # plt.show() writer.add_result(image_ids[0], quest_ids[0], im_path, answer, questions) for quest_id, image_id, path in zip(quest_ids, image_ids, pathes): sentence = to_sentence.index_to_question(path) res_i = { 'image_id': int(image_id), 'question_id': int(quest_id), 'question': sentence } results.append(res_i) if i == 40: break writer.render() return save_json(res_file, results) return res_file
class IVQARewards(object): def __init__(self, metric='cider', gt_has_start_end_token=False, pred_has_start_end_token=True, use_end_token=True, subset='kptrain'): self.gt_has_start_end_token = gt_has_start_end_token self.pred_has_start_end_token = pred_has_start_end_token self.use_end_token = use_end_token if metric == 'cider': self.scorer = ciderEval('vqa_%s_idxs_end' % subset) elif metric == 'bleu': self.scorer = Bleu(n=4) assert (metric == 'cider') self.to_sentence = SentenceGenerator(trainset='trainval') self._num_call = long(0) self.print_iterval = 100 def get_reward(self, sampled, gts): """ compute rewards given a sampled sentence and gt, the reward is computed based on CIDEr-D :param sampled: a list of list of pathes :param gts: a list of ground-truth samples [seq, seq_len] :param answers: numpy.array of ground-truth top answer index of VQA :return: numpy array of size (N,) of reward for each sample """ gts = self.process_gt(gts) # convert to list sampled = self.process_sampled(sampled) # convert to list wrapped_gt, wrapped_sample = self.wrap_samples(sampled, gts) _, rewards = self.scorer.evaluate(wrapped_gt, wrapped_sample) # if not self._num_call % self.print_iterval: # self.print_questions(gts, sampled, rewards) # self._num_call += 1 # rewards = supress_cider_score(rewards) return rewards / 10. # normalise to [0-1] def print_questions(self, gts, sampled, rewards): n_vis = 2 num_tot = len(gts) vis_ids = np.random.choice(num_tot, size=(n_vis, ), replace=False) offsets = np.cumsum([len(sms) for sms in sampled]).tolist() offsets = [0] + offsets for _vis_id in vis_ids: _gt = gts[_vis_id] sent = self.to_sentence.index_to_question(_gt[:-1]) print('\nGT: %s' % sent) _sms = sampled[_vis_id] _offset = offsets[_vis_id] for _sid, sm in enumerate(_sms): _r = rewards[_offset + _sid] sent = self.to_sentence.index_to_question(sm[:-1]) print('%s (%0.3f)' % (sent, _r)) print('\n') @staticmethod def wrap_samples(sampled, gts): wrapped_gt = OrderedDict() wrapped_sample = [] idx = 0 for _var_s, _gt in zip(sampled, gts): _gt_pat = serialize_path(_gt) for _s in _var_s: _key = str(idx) _s_pat = serialize_path(_s) wrapped_gt[_key] = [_gt_pat] wrapped_sample.append({'image_id': _key, 'caption': [_s_pat]}) idx += 1 return wrapped_gt, wrapped_sample def process_gt(self, gts): capt, capt_len = gts seqs = [] for c, clen in zip(capt, capt_len): _gt = c[:clen].tolist() if self.gt_has_start_end_token: _gt = _gt[1:] else: _gt += [END_TOKEN] if not self.use_end_token: _gt = _gt[:-1] seqs.append(_gt) return seqs def process_sampled(self, sampled): new_sampled = [] for ps in sampled: tmp = [] for p in ps: if self.pred_has_start_end_token: _p = p[1:] else: _p = p + [END_TOKEN] if not self.use_end_token: _p = _p[:-1] tmp.append(_p) new_sampled.append(tmp) return new_sampled
from inference_utils.question_generator_util import SentenceGenerator to_sentence = SentenceGenerator(trainset='trainval') reader = AttentionDataReader(batch_size=4, subset='trainval', counter_sampling=True) reader.start() from time import sleep t = time() for i in range(4): data = reader.pop_batch() data[0].mean() feats, q, q_len, a = data for c in range(2): q1 = to_sentence.index_to_question(q[c]) a1 = to_sentence.index_to_top_answer(a[c]) q2 = to_sentence.index_to_question(q[c + 2]) a2 = to_sentence.index_to_top_answer(a[c + 2]) if a1 == 2000 or a2 == 2000: continue print('Index: %d' % i) print('Q1: %s\nA1: %s \n' % (q1, a1)) print('Q2: %s\nA2: %s \n' % (q2, a2)) print('\n') sleep(0.4) # print(data[1].mean()) # print(data[2].max()) # print(data[0].shape) reader.update_loss(np.random.rand(4))
def ivqa_decoding_beam_search(checkpoint_path=None): model_config = ModelConfig() method = FLAGS.method res_file = 'result/bs_cand_for_vis.json' # Get model model_fn = get_model_creation_fn('VAQ-Var') create_fn = create_reader('VAQ-VVIS', phase='test') # Create the vocabulary. to_sentence = SentenceGenerator(trainset='trainval', top_ans_file='../VQA-tensorflow/data/vqa_trainval_top2000_answers.txt') # get data reader subset = 'kpval' reader = create_fn(batch_size=1, subset=subset, version=FLAGS.test_version) exemplar = ExemplarLanguageModel() if checkpoint_path is None: if FLAGS.checkpoint_dir: ckpt_dir = FLAGS.checkpoint_dir else: ckpt_dir = FLAGS.checkpoint_pat % (FLAGS.version, FLAGS.model_type) # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/' ckpt = tf.train.get_checkpoint_state(ckpt_dir) checkpoint_path = ckpt.model_checkpoint_path # Build model g = tf.Graph() with g.as_default(): # Build the model.ex model = model_fn(model_config, 'sampling') model.set_num_sampling_points(5000) model.build() # Restore from checkpoint restorer = Restorer(g) sess = tf.Session() restorer.restore(sess, checkpoint_path) # build language model language_model = LanguageModel() language_model.build() language_model.set_cache_dir('test_empty') # language_model.set_cache_dir('v1_var_att_lowthresh_cache_restval_VAQ-VarRL') language_model.set_session(sess) language_model.setup_model() # build VQA model # vqa_model = N2MNWrapper() # vqa_model = MLBWrapper() num_batches = reader.num_batches quest_ids_to_vis = {5682052: 'bread', 965492: 'plane', 681282: 'station'} print('Running beam search inference...') results = [] batch_vqa_scores = [] num = FLAGS.max_iters if FLAGS.max_iters > 0 else num_batches for i in range(num): outputs = reader.get_test_batch() # inference quest_ids, image_ids = outputs[-2:] quest_id_key = int(quest_ids) if quest_id_key not in quest_ids_to_vis: continue # pdb.set_trace() im, gt_q, _, top_ans, ans_tokens, ans_len = outputs[:-2] # pdb.set_trace() if top_ans == 2000: continue print('\n%d/%d' % (i, num)) question_id = int(quest_ids[0]) image_id = int(image_ids[0]) t1 = time() pathes, scores = model.greedy_inference([im, ans_tokens, ans_len], sess) # find unique ivqa_scores, ivqa_pathes = process_one(scores, pathes) t2 = time() print('Time for sample generation: %0.2fs' % (t2 - t1)) # apply language model language_model_inputs = wrap_samples_for_language_model([ivqa_pathes], pad_token=model.pad_token - 1, max_length=20) match_gt = exemplar.query(ivqa_pathes) legality_scores = language_model.inference(language_model_inputs) legality_scores[match_gt] = 1.0 num_keep = max(100, (legality_scores > 0.1).sum()) # no less than 100 valid_inds = (-legality_scores).argsort()[:num_keep] print('keep: %d/%d' % (num_keep, len(ivqa_pathes))) t3 = time() print('Time for language model filtration: %0.2fs' % (t3 - t2)) def token_arr_to_list(arr): return arr.flatten().tolist() for _pid, idx in enumerate(valid_inds): path = ivqa_pathes[idx] # sc = vqa_scores[idx] sentence = to_sentence.index_to_question(path) aug_quest_id = question_id * 1000 + _pid res_i = {'image_id': int(image_id), 'aug_id': aug_quest_id, 'question_id': question_id, 'target': sentence, 'top_ans_id': int(top_ans), 'question': to_sentence.index_to_question(token_arr_to_list(gt_q)), 'answer': to_sentence.index_to_answer(token_arr_to_list(ans_tokens))} results.append(res_i) save_json(res_file, results) return None
class MixReward(object): def __init__(self, thresh=0.3, cider_w=0.6, dis_vqa_reward=False): self.vqa_reward = VQARewards(use_dis_reward=dis_vqa_reward) self.cider_reward = IVQARewards() self.diversity_reward = DiversityReward() self.thresh = thresh self.cider_w = cider_w self.to_sentence = SentenceGenerator(trainset='trainval') self._num_call = long(0) self.print_iterval = 100 self.language_thresh = 0.2 self.cider_thresh = 0.05 self.use_cider = True self.lm = None self.replay_buffer = None def set_language_model(self, model): self.lm = model def set_vqa_model(self, vqa): self.vqa_reward.set_vqa_model(vqa) def get_vqa_model(self): return self.vqa_reward.model def set_replay_buffer(self, insert_thresh=0.5, sv_dir='vqa_replay_buffer'): from vqa_replay_buffer import VQAReplayBuffer self.replay_buffer = VQAReplayBuffer(insert_thresh=insert_thresh, sv_dir=sv_dir) def cache_questions(self, quest_ids, questions, rewards): vqa_reward, _, language_reward, _, _ = rewards mask = self.apply_language_mask(language_reward) # is grammar correct self.replay_buffer.insert(quest_ids, questions, vqa_reward * mask) def set_cider_state(self, use_cider): self.use_cider = use_cider def set_language_thresh(self, t): self.language_thresh = t def compute_lm_reward(self, _lm_inputs): return self.lm.inference(_lm_inputs) def apply_cider_mask(self, cider_scores): return (cider_scores >= self.cider_thresh).astype(np.float32) def apply_language_mask(self, language_scores): return (language_scores >= self.language_thresh).astype(np.float32) def apply_mask(self, rewards): [vqa_reward, cider_reward, language_reward, diversity_reward] = rewards mask = self.apply_cider_mask(cider_reward) mask *= self.apply_language_mask(language_reward) return vqa_reward * mask * diversity_reward def get_reward(self, sampled, gts, context): diversity_reward, is_gt = self.diversity_reward.get_reward( sampled, context[2]) vqa_reward, aug_data = self.vqa_reward.get_reward(sampled, context[0]) cider_reward = self.cider_reward.get_reward(sampled, gts) # cider language_reward = self.compute_lm_reward(context[1]) language_reward[is_gt] = 1.0 # correct language model prediction rewards = [vqa_reward, cider_reward, language_reward, diversity_reward] overall_reward = self.apply_mask(rewards) rewards.append(overall_reward) # cache and print questions if self.replay_buffer: self.cache_questions(context[3], sampled, rewards) self.print_questions(_parse_gt_questions(*gts), sampled, rewards) rewards = self.concat_rewards(rewards) return overall_reward, rewards, is_gt, aug_data def concat_rewards(self, inputs): return np.concatenate([_in[:, np.newaxis] for _in in inputs], axis=1) def print_questions(self, gts, sampled, rewards): self._num_call += 1 if self._num_call % self.print_iterval: return n_vis = 2 num_tot = len(gts) r1, r2, r3, r0, r = rewards vis_ids = np.random.choice(num_tot, size=(n_vis, ), replace=False) offsets = np.cumsum([len(sms) for sms in sampled]).tolist() offsets = [0] + offsets for _vis_id in vis_ids: _gt = gts[_vis_id] sent = self.to_sentence.index_to_question(_gt) print('\nGT: %s' % sent) _sms = sampled[_vis_id] _offset = offsets[_vis_id] for _sid, sm in enumerate(_sms): _r0 = r0[_offset + _sid] _r1 = r1[_offset + _sid] _r2 = r2[_offset + _sid] _r3 = r3[_offset + _sid] _r = r[_offset + _sid] sent = self.to_sentence.index_to_question(sm[1:-1]) print( '%s (vqa:%0.3f, cider:%0.3f, lm:%0.3f, diver: %0.3f, overall:%0.3f)' % (sent, _r1, _r2, _r3, _r0, _r)) print('\n')
def test(checkpoint_path=None): config = ModelConfig() config.phase = 'other' config.model_type = FLAGS.model_type beam_size = 10 subset = 'kptest' # build data reader create_fn = create_reader(FLAGS.model_type, phase='test') reader = create_fn(batch_size=1, subset=subset, version='v1') if checkpoint_path is None: ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir % FLAGS.model_type) checkpoint_path = ckpt.model_checkpoint_path res_file = 'result/beamsearch_%s_%s.json' % (FLAGS.model_type.upper(), subset) cand_file = 'result/sampling_%s_%s.json' % (FLAGS.model_type.upper(), subset) # build and restore model model = InferenceWrapper() restore_fn = model.build_graph_from_config(config, checkpoint_path) sess = tf.Session(graph=tf.get_default_graph()) tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path)) restore_fn(sess) # Create the vocabulary. to_sentence = SentenceGenerator(trainset=FLAGS.model_trainset) generator = caption_generator.CaptionGenerator(model, to_sentence.question_vocab, beam_size=beam_size) results = [] candidates = [] print('Running inference on split %s...' % TEST_SET) num_batches = reader.num_batches for i in range(num_batches): outputs = reader.get_test_batch() im_feed, attr, ans_seq, ans_seq_len, quest_id, image_id = outputs image_id = int(image_id) quest_id = int(quest_id) im_feed = np.squeeze(im_feed) captions = generator.beam_search(sess, [im_feed, attr, ans_seq, ans_seq_len]) print('============== %d ============' % i) print('image id: %d, question id: %d' % (image_id, quest_id)) # print('question\t: %s' % question) tmp = [] vaq_cands_i = {'question_id': quest_id, 'image_id': image_id} for c, g in enumerate(captions): quest = to_sentence.index_to_question(g.sentence) tmp.append(quest) print('[%02d]: %s' % (c, quest)) vaq_cands_i['candidates'] = tmp candidates.append(vaq_cands_i) caption = captions[0] sentence = to_sentence.index_to_question(caption.sentence) res_i = {'image_id': image_id, 'question_id': quest_id, 'question': sentence} results.append(res_i) save_json(res_file, results) save_json(cand_file, candidates) return res_file
def ivqa_decoding_beam_search(checkpoint_path=None): model_config = ModelConfig() method = FLAGS.method res_file = 'result/bs_gen_%s.json' % method score_file = 'result/bs_vqa_scores_%s.mat' % method # Get model model_fn = get_model_creation_fn('VAQ-Var') create_fn = create_reader('VAQ-VVIS', phase='test') # Create the vocabulary. to_sentence = SentenceGenerator(trainset='trainval') # get data reader subset = 'kptest' reader = create_fn(batch_size=1, subset=subset, version=FLAGS.test_version) exemplar = ExemplarLanguageModel() if checkpoint_path is None: if FLAGS.checkpoint_dir: ckpt_dir = FLAGS.checkpoint_dir else: ckpt_dir = FLAGS.checkpoint_pat % (FLAGS.version, FLAGS.model_type) # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/' ckpt = tf.train.get_checkpoint_state(ckpt_dir) checkpoint_path = ckpt.model_checkpoint_path # Build model g = tf.Graph() with g.as_default(): # Build the model.ex model = model_fn(model_config, 'sampling') model.set_num_sampling_points(1000) model.build() # Restore from checkpoint restorer = Restorer(g) sess = tf.Session() restorer.restore(sess, checkpoint_path) # build language model language_model = LanguageModel() language_model.build() language_model.set_cache_dir('test_empty') # language_model.set_cache_dir('v1_var_att_lowthresh_cache_restval_VAQ-VarRL') language_model.set_session(sess) language_model.setup_model() # build VQA model vqa_model = VQAWrapper(g, sess) # vqa_model = MLBWrapper() num_batches = reader.num_batches print('Running beam search inference...') results = [] batch_vqa_scores = [] num = FLAGS.max_iters if FLAGS.max_iters > 0 else num_batches for i in range(num): outputs = reader.get_test_batch() # inference quest_ids, image_ids = outputs[-2:] im, _, _, top_ans, ans_tokens, ans_len = outputs[:-2] # pdb.set_trace() if top_ans == 2000: continue print('\n%d/%d' % (i, num)) question_id = int(quest_ids[0]) image_id = int(image_ids[0]) t1 = time() pathes, scores = model.greedy_inference([im, ans_tokens, ans_len], sess) # find unique ivqa_scores, ivqa_pathes = process_one(scores, pathes) t2 = time() print('Time for sample generation: %0.2fs' % (t2 - t1)) # apply language model language_model_inputs = wrap_samples_for_language_model( [ivqa_pathes], pad_token=model.pad_token - 1, max_length=20) match_gt = exemplar.query(ivqa_pathes) legality_scores = language_model.inference(language_model_inputs) legality_scores[match_gt] = 1.0 num_keep = max(100, (legality_scores > 0.1).sum()) # no less than 100 valid_inds = (-legality_scores).argsort()[:num_keep] t3 = time() print('Time for language model filtration: %0.2fs' % (t3 - t2)) # for idx in valid_inds: # path = ivqa_pathes[idx] # sc = legality_scores[idx] # sentence = to_sentence.index_to_question(path) # # questions.append(sentence) # print('%s (%0.3f)' % (sentence, sc)) # apply VQA model sampled = [ivqa_pathes[_idx] for _idx in valid_inds] # vqa_scores = vqa_model.get_scores(sampled, image_id, top_ans) vqa_scores, is_valid = vqa_model.get_scores(sampled, im, top_ans) # conf_inds = (-vqa_scores).argsort()[:20] conf_inds = np.where(is_valid)[0] # pdb.set_trace() # conf_inds = (-vqa_scores).argsort()[:40] t4 = time() print('Time for VQA verification: %0.2fs' % (t4 - t3)) this_mean_vqa_score = vqa_scores[conf_inds].mean() print('sampled: %d, unique: %d, legal: %d, gt: %d, mean score %0.2f' % (pathes.shape[0], len(ivqa_pathes), num_keep, match_gt.sum(), this_mean_vqa_score)) batch_vqa_scores.append(this_mean_vqa_score) for _pid, idx in enumerate(conf_inds): path = sampled[idx] sc = vqa_scores[idx] sentence = to_sentence.index_to_question(path) aug_quest_id = question_id * 1000 + _pid res_i = { 'image_id': int(image_id), 'question_id': aug_quest_id, 'question': sentence, 'score': float(sc) } results.append(res_i) save_json(res_file, results) batch_vqa_scores = np.array(batch_vqa_scores, dtype=np.float32) mean_vqa_score = batch_vqa_scores.mean() from scipy.io import savemat savemat(score_file, { 'scores': batch_vqa_scores, 'mean_score': mean_vqa_score }) print('BS mean VQA score: %0.3f' % mean_vqa_score) return res_file, mean_vqa_score
def var_vqa_decoding_beam_search(checkpoint_path=None, subset='kpval'): model_config = ModelConfig() res_file = 'result/quest_vaq_greedy_%s.json' % FLAGS.model_type.upper() # Get model model_fn = get_model_creation_fn(FLAGS.model_type) create_fn = create_reader('V7W-VarDS', phase='test') writer = ExperimentWriter('latex/v7w_%s' % FLAGS.model_type.lower()) # Create the vocabulary. to_sentence = SentenceGenerator( trainset='train', ans_vocab_file='data2/v7w_train_answer_word_counts.txt', quest_vocab_file='data2/v7w_train_question_word_counts.txt', top_ans_file='data2/v7w_train_top2000_answers.txt') # get data reader subset = 'val' reader = create_fn(batch_size=1, subset=subset, version=FLAGS.test_version) if checkpoint_path is None: ckpt_dir = FLAGS.checkpoint_dir % (FLAGS.trainset, FLAGS.model_type) # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/' ckpt = tf.train.get_checkpoint_state(ckpt_dir) checkpoint_path = ckpt.model_checkpoint_path # Build model g = tf.Graph() with g.as_default(): # Build the model. model = model_fn(model_config, 'sampling') model.build() # Restore from checkpoint restorer = Restorer(g) sess = tf.Session() restorer.restore(sess, checkpoint_path) num_batches = reader.num_batches print('Running beam search inference...') results = [] for i in range(num_batches): outputs = reader.get_test_batch() # pdb.set_trace() # inference images, quest, quest_len, ans, ans_len, quest_ids, image_ids = outputs scores, pathes = model.greedy_inference([images, quest, quest_len], sess) scores, pathes = post_process_prediction(scores, pathes) pathes, pathes_len = put_to_array(pathes) scores, pathes = find_unique_rows(scores, pathes) scores, pathes = post_process_prediction(scores, pathes[:, 1:]) # question = to_sentence.index_to_question(pathes[0]) # print('%d/%d: %s' % (i, num_batches, question)) # show image os.system('clear') image_id = image_ids[0] im_path = _get_vg_image_root(image_id) # im = imread(im_path) # plt.imshow(im) questions = extract_gt(quest, quest_len) question = to_sentence.index_to_question(questions[0]) print('Question: %s' % question) answers = extract_gt(ans, ans_len) answer = to_sentence.index_to_answer(answers[0]) # plt.title(answer) print('Answer: %s' % answer) answers = [] for path in pathes: sentence = to_sentence.index_to_answer(path) answers.append(sentence) print(sentence) # plt.show() qa = '%s - %s' % (question, answer) writer.add_result(image_ids[0], quest_ids[0], im_path, qa, answers) for quest_id, image_id, path in zip(quest_ids, image_ids, pathes): sentence = to_sentence.index_to_question(path) res_i = { 'image_id': int(image_id), 'question_id': int(quest_id), 'question': sentence } results.append(res_i) if i == 40: break writer.render() return