def __init__(self, thresh=0.3, cider_w=0.6, dis_vqa_reward=False, attention_vqa=False): if attention_vqa: self.vqa_reward = AttentionVQARewards( use_dis_reward=dis_vqa_reward) else: self.vqa_reward = VQARewards( 'model/kprestval_VQA-BaseNorm/model.ckpt-26000', use_dis_reward=dis_vqa_reward) self.cider_reward = VisualFactReward() # self.cider_reward = IVQARewards() self.diversity_reward = DiversityReward() self.thresh = thresh self.cider_w = cider_w self.to_sentence = SentenceGenerator(trainset='trainval') self._num_call = long(0) self.print_iterval = 100 self.language_thresh = 0.2 self.cider_thresh = 0.05 self.use_cider = True self.lm = None self.replay_buffer = None
def test(): # Load data def load_data(fpath): d = load_hdf5(fpath) return d['quest_ids'], d['ans_preds'] w = 0.8 quest_ids, preds1 = load_data('data5/kpval_VQA-BaseNorm_scores.data') check_quest_ids, preds2 = load_data( 'data5/kpval_VQA-BaseNorm_scores_flt.data') scores = w * preds1 + (1.0 - w) * preds2 scores[:, -1] = -1.0 ans_ids = scores.argmax(axis=1) # Create the vocabulary. top_ans_file = '../VQA-tensorflow/data/vqa_trainval_top2000_answers.txt' to_sentence = SentenceGenerator(trainset='trainval', top_ans_file=top_ans_file) result = [{ u'answer': to_sentence.index_to_top_answer(aid), u'question_id': qid } for aid, qid in zip(ans_ids, quest_ids)] # save results tf.logging.info('Saving results') res_file = FLAGS.result_format % (FLAGS.version, TEST_SET) json.dump(result, open(res_file, 'w')) tf.logging.info('Done!') tf.logging.info('#Num eval samples %d' % len(result)) return res_file, quest_ids
def test(): top_ans_file = '/import/vision-ephemeral/fl302/code/' \ 'VQA-tensorflow/data/vqa_trainval_top2000_answers.txt' # top_ans_file = 'data/vqa_trainval_top2000_answers.txt' mc_ctx = MultiChoiceQuestionManger(subset='val', load_ans=True, top_ans_file=top_ans_file) to_sentence = SentenceGenerator(trainset='trainval', top_ans_file=top_ans_file) answer_enc = mc_ctx.encoder # quest_ids = mc_ctx._quest_id2image_id.keys() # quest_ids = np.array(quest_ids) # qids = np.random.choice(quest_ids, size=(5,), replace=False) create_fn = create_reader('VAQ-CA', 'train') reader = create_fn(batch_size=4, subset='kprestval') reader.start() for _ in range(20): # inputs = reader.get_test_batch() inputs = reader.pop_batch() _, _, _, _, labels, ans_seq, ans_len, quest_ids, image_ids = inputs b_top_ans = answer_enc.get_top_answers(labels) for i, (quest_id, i_a) in enumerate(zip(quest_ids, b_top_ans)): print('question id: %d' % quest_id) gt = mc_ctx.get_gt_answer(quest_id) print('GT: %s' % gt) print('Top: %s' % i_a) print('SG: top: %s' % to_sentence.index_to_top_answer(labels[i])) seq = ans_seq[i][:ans_len[i]].tolist() print('SG: seq: %s\n' % to_sentence.index_to_answer(seq)) reader.stop()
def visualise(): mc_ctx = MultiChoiceQuestionManger() to_sentence = SentenceGenerator(trainset='trainval') # writer = ExperimentWriter('latex/examples_replay_buffer_rescore') writer = ExperimentWriter('latex/examples_replay_buffer_rescore_prior') # d = load_json('vqa_replay_buffer/vqa_replay_low_rescore.json') d = load_json('vqa_replay_buffer/vqa_replay_low_rescore_prior_05_04.json') memory = d['memory'] # show random 100 keys = deepcopy(memory.keys()) np.random.seed(123) np.random.shuffle(keys) vis_keys = keys[:100] for i, quest_key in enumerate(vis_keys): pathes = memory[quest_key] if len(pathes) == 0: continue # if it has valid questions quest_id = int(quest_key) image_id = mc_ctx.get_image_id(quest_id) gt_question = mc_ctx.get_question(quest_id) answer = mc_ctx.get_gt_answer(quest_id) head = 'Q: %s A: %s' % (gt_question, answer) im_file = '%s2014/COCO_%s2014_%012d.jpg' % ('val', 'val', image_id) im_path = os.path.join(IM_ROOT, im_file) questions = [] for p in pathes.keys(): conf1, conf2 = pathes[p] _tokens = [int(t) for t in p.split(' ')] sentence = to_sentence.index_to_question(_tokens) descr = '%s (%0.2f-%0.2f)' % (sentence, conf1, conf2) questions.append(descr) writer.add_result(image_id, quest_id, im_path, head, questions) writer.render()
def test(checkpoint_path=None): batch_size = 100 config = ModelConfig() # Get model function model_fn = get_model_creation_fn(FLAGS.model_type) # build data reader reader = AttentionFetcher(batch_size=batch_size, subset=TEST_SET, feat_type=config.feat_type, version=FLAGS.version) if checkpoint_path is None: ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type)) checkpoint_path = ckpt.model_checkpoint_path print(checkpoint_path) # build and restore model model = model_fn(config, phase='test') model.build() prob = model.prob sess = tf.Session(graph=tf.get_default_graph()) tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path)) saver = tf.train.Saver() saver.restore(sess, checkpoint_path) # Create the vocabulary. top_ans_file = '../VQA-tensorflow/data/vqa_trainval_top2000_answers.txt' to_sentence = SentenceGenerator(trainset='trainval', top_ans_file=top_ans_file) ans_ids = [] quest_ids = [] print('Running inference on split %s...' % TEST_SET) for i in range(reader.num_batches): if i % 10 == 0: update_progress(i / float(reader.num_batches)) outputs = reader.get_test_batch() generated_ans = sess.run( prob, feed_dict=model.fill_feed_dict(outputs[:-2])) generated_ans[:, -1] = 0 top_ans = np.argmax(generated_ans, axis=1) ans_ids.append(top_ans) quest_id = outputs[-2] quest_ids.append(quest_id) quest_ids = np.concatenate(quest_ids) ans_ids = np.concatenate(ans_ids) result = [{u'answer': to_sentence.index_to_top_answer(aid), u'question_id': qid} for aid, qid in zip(ans_ids, quest_ids)] # save results tf.logging.info('Saving results') res_file = FLAGS.result_format % (FLAGS.version, TEST_SET) json.dump(result, open(res_file, 'w')) tf.logging.info('Done!') tf.logging.info('#Num eval samples %d' % len(result)) return res_file, quest_ids
def test(checkpoint_path=None): batch_size = 100 config = ModelConfig() # Get model function # model_fn = get_model_creation_fn(FLAGS.model_type) # build data reader reader = AttentionFetcher(batch_size=batch_size, subset=TEST_SET, feat_type=config.feat_type, version=FLAGS.version) if checkpoint_path is None: ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type)) checkpoint_path = ckpt.model_checkpoint_path print(checkpoint_path) # build and restore model model = model_fn(config, phase='test') # model.set_agent_ids([0]) model.build() prob = model.prob sess = tf.Session(graph=tf.get_default_graph()) tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path)) saver = tf.train.Saver() saver.restore(sess, checkpoint_path) # Create the vocabulary. top_ans_file = '../VQA-tensorflow/data/vqa_trainval_top2000_answers.txt' to_sentence = SentenceGenerator(trainset='trainval', top_ans_file=top_ans_file) # to_sentence = SentenceGenerator(trainset='trainval') results = [] print('Running inference on split %s...' % TEST_SET) for i in range(reader.num_batches): if i % 10 == 0: update_progress(i / float(reader.num_batches)) outputs = reader.get_test_batch() generated_ans = sess.run(prob, feed_dict=model.fill_feed_dict(outputs[:-2])) generated_ans[:, -1] = 0 ans_cand_ids = np.argsort(-generated_ans, axis=1) quest_ids = outputs[-2] for quest_id, ids in zip(quest_ids, ans_cand_ids): answers = [] for k in range(_K): aid = ids[k] ans = to_sentence.index_to_top_answer(aid) answers.append(ans) res_i = {'question_id': int(quest_id), 'answers': answers} results.append(res_i) eval_recall(results)
def ivqa_decoding_beam_search(checkpoint_path=None, subset='kpval'): model_config = ModelConfig() res_file = 'result/quest_vaq_greedy_%s_%s.json' % ( FLAGS.model_type.upper(), subset) # Get model model_fn = get_model_creation_fn(FLAGS.model_type) create_fn = create_reader(FLAGS.model_type, phase='test') # Create the vocabulary. to_sentence = SentenceGenerator(trainset='trainval') # get data reader reader = create_fn(batch_size=100, subset=subset, version=FLAGS.test_version) if checkpoint_path is None: ckpt_dir = FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type) # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/' ckpt = tf.train.get_checkpoint_state(ckpt_dir) checkpoint_path = ckpt.model_checkpoint_path # Build model g = tf.Graph() with g.as_default(): # Build the model. model = model_fn(model_config, 'beam') model.build() # Restore from checkpoint restorer = Restorer(g) sess = tf.Session() restorer.restore(sess, checkpoint_path) num_batches = reader.num_batches print('Running beam search inference...') results = [] for i in range(num_batches): outputs = reader.get_test_batch() # inference quest_ids, image_ids = outputs[-2:] scores, pathes = model.greedy_inference(outputs[:-2], sess) scores, pathes = post_process_prediction(scores, pathes) question = to_sentence.index_to_question(pathes[0]) print('%d/%d: %s' % (i, num_batches, question)) for quest_id, image_id, path in zip(quest_ids, image_ids, pathes): sentence = to_sentence.index_to_question(path) res_i = { 'image_id': int(image_id), 'question_id': int(quest_id), 'question': sentence } results.append(res_i) save_json(res_file, results) return res_file
def __init__(self): self.labels = [] self.rerank_preds = [] self.vqa_top_scores = [] self.vqa_top_preds = [] self.vqa_cands = [] self.to_sentence = SentenceGenerator(trainset='trainval') self.file_stream = open('result/rerank_analysis.txt', 'w')
def convert(): model_name = 'ivaq_var_restval' checkpoint_path = 'model/var_ivqa_pretrain_restval/model.ckpt-505000' # build model from config import ModelConfig model_config = ModelConfig() model_fn = get_model_creation_fn('VAQ-Var') # create graph g = tf.Graph() with g.as_default(): # Build the model. model = model_fn(model_config, 'beam') model.build() tf_embedding = model._answer_embed tf_answer_feed = model._ans tf_answer_len_feed = model._ans_len # Restore from checkpoint print('Restore from %s' % checkpoint_path) restorer = Restorer(g) sess = tf.Session() restorer.restore(sess, checkpoint_path) # build reader top_ans_file = '/import/vision-ephemeral/fl302/code/' \ 'VQA-tensorflow/data/vqa_trainval_top2000_answers.txt' mc_ctx = MultiChoiceQuestionManger(subset='val', load_ans=True, top_ans_file=top_ans_file) to_sentence = SentenceGenerator(trainset='trainval', top_ans_file=top_ans_file) answer_encoder = mc_ctx.encoder top_answer_inds = range(2000) top_answers = answer_encoder.get_top_answers(top_answer_inds) answer_seqs = answer_encoder.encode_to_sequence(top_answers) for i, (ans, seq) in enumerate(zip(top_answers, answer_seqs)): rec_ans = to_sentence.index_to_answer(seq) ans = ' '.join(_tokenize_sentence(ans)) print('%d: Raw: %s, Rec: %s' % (i + 1, ans, rec_ans)) assert (ans == rec_ans) print('Checking passed') # extract print('Converting...') ans_arr, ans_arr_len = put_to_array(answer_seqs) import pdb pdb.set_trace() embedding = sess.run(tf_embedding, feed_dict={ tf_answer_feed: ans_arr.astype(np.int32), tf_answer_len_feed: ans_arr_len.astype(np.int32) }) # save sv_file = 'data/v1_%s_top2000_lstm_embedding.h5' % model_name from util import save_hdf5 save_hdf5(sv_file, {'answer_embedding': embedding}) print('Done')
def __init__(self, model_name, K=3, do_plot=True): self._gt_mgr = MultiChoiceQuestionManger(subset='trainval', load_ans=True) self._rev_map = SentenceGenerator(trainset='trainval') self._top_k = K self._do_plot = do_plot self._model_name = model_name self._cache_dir = 'att_maps/%s' % self._model_name mkdir_if_missing(self._cache_dir)
def test(): from util import unpickle import json from inference_utils.question_generator_util import SentenceGenerator from w2v_answer_encoder import MultiChoiceQuestionManger config = MLPConfig() model = SequenceMLP(config, phase='test') model.build() prob = model.prob # Load vocabulary to_sentence = SentenceGenerator(trainset='trainval') # create multiple choice question manger mc_manager = MultiChoiceQuestionManger(subset='trainval', answer_coding='sequence') sess = tf.Session() # Load model ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir) checkpoint_path = ckpt.model_checkpoint_path saver = tf.train.Saver() saver.restore(sess, checkpoint_path) # get data result = [] dataset = unpickle('data/rescore_dev.pkl') for itr, datum in enumerate(dataset): seq_index, att_mask, label = _process_datum(datum) quest_id = datum['quest_id'] quest = seq_index[0].tolist() feed_dict = model.fill_feed_dict([seq_index, att_mask]) scores = sess.run(prob, feed_dict=feed_dict) idx = scores.argmax() # parse question and answer question = to_sentence.index_to_question([0] + quest) mc_ans = mc_manager.get_candidate_answers(quest_id) vaq_answer = mc_ans[idx] real_answer = mc_ans[label.argmax()] # add result result.append({u'answer': vaq_answer, u'question_id': quest_id}) # show results if itr % 100 == 0: print('============== %d ============' % itr) print('question id: %d' % quest_id) print('question\t: %s' % question) print('answer\t: %s' % real_answer) print('VAQ answer\t: %s (%0.2f)' % (vaq_answer, scores[idx])) quest_ids = [res[u'question_id'] for res in result] # save results tf.logging.info('Saving results') res_file = 'result/rescore_dev_dev.json' json.dump(result, open(res_file, 'w')) from vqa_eval import evaluate_model acc = evaluate_model(res_file, quest_ids) print('Over all accuarcy: %0.2f' % acc)
def test(checkpoint_path=None): batch_size = 128 # build data reader reader = Reader(batch_size=batch_size, subset=TEST_SET, phase='test', version='v1') if checkpoint_path is None: ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir % ('v1', 'Fusion')) checkpoint_path = ckpt.model_checkpoint_path print(checkpoint_path) # build and restore model model = RerankModel(phase='test', version='v1', num_cands=5) model.build() sess = tf.Session(graph=tf.get_default_graph()) tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path)) saver = tf.train.Saver() saver.restore(sess, checkpoint_path) # Create the vocabulary. to_sentence = SentenceGenerator(trainset='trainval', top_ans_file='../iccv_vaq/data/vqa_trainval_top2000_answers.txt') ans_ids = [] quest_ids = [] print('Running inference on split %s...' % TEST_SET) for i in range(reader.num_batches): if i % 10 == 0: update_progress(i / float(reader.num_batches)) outputs = reader.pop_batch() model_preds = sess.run(model.preds, feed_dict=model.fill_feed_dict(outputs)) local_index = model_preds.argmax(axis=1) # local_index = outputs[-3].argmax(axis=1) # ivqa # local_index = outputs[-4].argmax(axis=1) # vqa top_ans = np.array([cand[idx] for idx, cand in zip(local_index, outputs[3])]) ans_ids.append(top_ans) quest_id = outputs[-1] quest_ids.append(quest_id) ans_ids = np.concatenate(ans_ids) quest_ids = np.concatenate(quest_ids) result = [{u'answer': to_sentence.index_to_top_answer(aid), u'question_id': qid} for aid, qid in zip(ans_ids, quest_ids)] # save results tf.logging.info('Saving results') res_file = FLAGS.result_format % ('v1', TEST_SET) json.dump(result, open(res_file, 'w')) tf.logging.info('Done!') tf.logging.info('#Num eval samples %d' % len(result)) # ana_ctx.close() return res_file, quest_ids
def vaq_decoding_greedy(checkpoint_path=None, subset='kpval'): model_config = ModelConfig() res_file = 'result/quest_vaq_greedy_%s.json' % FLAGS.model_type.upper() # Get model model_fn = get_model_creation_fn(FLAGS.model_type) create_fn = create_reader(FLAGS.model_type, phase='test') # Create the vocabulary. to_sentence = SentenceGenerator(trainset='trainval') # build data reader reader = create_fn(batch_size=32, subset=subset) if checkpoint_path is None: ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir % FLAGS.model_type) checkpoint_path = ckpt.model_checkpoint_path g = tf.Graph() with g.as_default(): # Build the model. model = model_fn(model_config, 'greedy') model.build() saver = tf.train.Saver() sess = tf.Session() tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path)) saver.restore(sess, checkpoint_path) num_batches = reader.num_batches print('Running greedy inference...') results = [] for i in range(num_batches): outputs = reader.get_test_batch() # inference quest_ids, image_ids = outputs[-2:] scores, pathes = model.greedy_inference(outputs[:-2], sess) scores, pathes = post_process_prediction(scores, pathes) question = to_sentence.index_to_question(pathes[0]) print('%d/%d: %s' % (i, num_batches, question)) for quest_id, image_id, path in zip(quest_ids, image_ids, pathes): sentence = to_sentence.index_to_question(path) res_i = { 'image_id': int(image_id), 'question_id': int(quest_id), 'question': sentence } results.append(res_i) save_json(res_file, results) return res_file
def main(_): # Build the inference graph. config = QuestionGeneratorConfig() reader = TFRecordDataFetcher(FLAGS.input_files, config.image_feature_key) g = tf.Graph() ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) checkpoint_path = ckpt.model_checkpoint_path print(checkpoint_path) with g.as_default(): model = QuestionGenerator(config, phase='evaluate') model.build() # g.finalize() # Create the vocabulary. to_sentence = SentenceGenerator(trainset=FLAGS.model_trainset) filenames = [] for file_pattern in FLAGS.input_files.split(","): filenames.extend(tf.gfile.Glob(file_pattern)) tf.logging.set_verbosity(tf.logging.INFO) tf.logging.info("Running caption generation on %d files matching %s", len(filenames), FLAGS.input_files) with tf.Session(graph=g) as sess: # Load the model from checkpoint. saver = tf.train.Saver(var_list=tf.all_variables()) saver.restore(sess, checkpoint_path) itr = 0 while not reader.eof(): outputs = reader.pop_batch() im_ids, quest_id, im_feat, ans_w2v, quest_ids, ans_ids = outputs inputs = post_processing_data(outputs) perplexity = sess.run(model.likelihood, feed_dict=model.fill_feed_dict(inputs)) # generated = [generated[0]] # sample 3 question = to_sentence.index_to_question(quest_ids) answer = to_sentence.index_to_answer(ans_ids) print('============== %d ============' % itr) print('image id: %d, question id: %d' % (im_ids, quest_id)) print('question\t: %s' % question) elems = question.split(' ') tmp = ' '.join([ '%s (%0.2f)' % (w, p) for w, p in zip(elems, perplexity.flatten()) ][:-1]) print('question\t' + tmp) print('answer\t: %s' % answer) print('perplexity\t: %0.2f\n' % perplexity.mean()) itr += 1
def __init__(self, ckpt_file=None): top_ans_file = '../VQA-tensorflow/data/vqa_trainval_top2000_answers.txt' self.to_sentence = SentenceGenerator(trainset='trainval', top_ans_file=top_ans_file) self.sent_encoder = SentenceEncoder() self.model = None self.sess = None self.name = '' self.top_k = 2 self.answer_to_top_ans_id = None
def __init__(self, thresh=0.3, cider_w=0.6, dis_vqa_reward=False): self.vqa_reward = VQARewards(use_dis_reward=dis_vqa_reward) self.cider_reward = IVQARewards() self.diversity_reward = DiversityReward() self.thresh = thresh self.cider_w = cider_w self.to_sentence = SentenceGenerator(trainset='trainval') self._num_call = long(0) self.print_iterval = 100 self.language_thresh = 0.2 self.cider_thresh = 0.05 self.use_cider = True self.lm = None self.replay_buffer = None
def __init__(self, metric='cider', gt_has_start_end_token=False, pred_has_start_end_token=True, use_end_token=True, subset='kptrain'): self.gt_has_start_end_token = gt_has_start_end_token self.pred_has_start_end_token = pred_has_start_end_token self.use_end_token = use_end_token if metric == 'cider': self.scorer = ciderEval('vqa_%s_idxs_end' % subset) elif metric == 'bleu': self.scorer = Bleu(n=4) assert (metric == 'cider') self.to_sentence = SentenceGenerator(trainset='trainval') self._num_call = long(0) self.print_iterval = 100
def test_top_answer_layer(): from inference_utils.question_generator_util import SentenceGenerator to_sentence = SentenceGenerator(trainset='trainval') def visualise_sequence(seqs, seqs_len, idx): seq = seqs[idx] seq_len = seqs_len[idx] vis_seq = seq[:seq_len] answer = to_sentence.index_to_answer(vis_seq) print('%s' % answer) return answer top_ans_file = 'data/top_answer2000_sequences.h5' answer_pool = TopAnswerDataLayer(top_ans_file, k=4) top_answer_list = load_top_answer_list() import numpy as np ind = np.random.randint(low=0, high=len(top_answer_list), size=[5, 4], dtype=np.int32) top_k_ind = tf.constant(ind, dtype=tf.int32) t_ans_arr, t_ans_len = answer_pool.get_top_answer_sequences(top_k_ind) ans_arr = t_ans_arr.eval().reshape([-1, answer_pool.data_len]) ans_len = t_ans_len.eval().reshape([-1]) answer_ind = top_k_ind.eval().reshape([-1]) num_test = ans_len.size num_passed = 0 for i in range(num_test): top_ans = top_answer_list[answer_ind[i]] print(top_ans) seq_ans = visualise_sequence(ans_arr, ans_len, i) print('========================') num_passed += (seq_ans == top_ans) print('\nFinish test top answer layer\nPassed: %d/%d' % (num_passed, num_test))
def main(subset): from multiprocessing import Process # params k = 80 num_proc = 10 # subset = 'kptest' # res_file = 'result/quest_vaq_nn.json' print('Creating Models') # sentence generator to_sentence = SentenceGenerator(trainset='trainval') # assign tasks val_qids, nn_ids = load_image_nn(subset) num = len(val_qids) batch_size = ceil(num / num_proc) print('Launching process') jobs = [] for i in range(num_proc): proc_range = np.arange(start=batch_size * i, stop=min(batch_size * (i + 1), num), dtype=np.int32) p = Process(target=process_worker, args=(subset, i, proc_range, to_sentence)) jobs.append(p) p.start()
def test_cst_reader(): reader = ContrastiveDataReader(batch_size=4) to_sentence = SentenceGenerator(trainset='trainval') reader.start() for i in range(4): images, quest, quest_len, top_ans, mask = reader.pop_batch() questions = _parse_gt_questions(quest, quest_len) print('\nBatch %d' % i) this_batch_size = images.shape[0] / 2 for idx in range(this_batch_size): print('Real: %s' % to_sentence.index_to_question(questions[idx])) print('Fake: %s\n' % to_sentence.index_to_question(questions[idx + this_batch_size])) print('Mask:') print(mask.astype(np.int32)) reader.stop()
def __init__(self, thresh=0.3, cider_w=0.6): from mcb_reward import MCBReward self.to_sentence = SentenceGenerator(trainset='trainval') self.vqa_reward = MCBReward(self.to_sentence) self.cider_reward = VisualFactReward() # self.cider_reward = IVQARewards() self.diversity_reward = DiversityReward() self.thresh = thresh self.cider_w = cider_w self._num_call = long(0) self.print_iterval = 100 self.language_thresh = 0.2 self.cider_thresh = 0.05 self.use_cider = True self.lm = None self.replay_buffer = None
def main(_): batch_size = 4 create_fn = create_reader('VAQ-2Att', phase='train') to_sentence = SentenceGenerator(trainset='trainval') def trim_sequence(seqs, seqs_len, idx): seq = seqs[idx] seq_len = seqs_len[idx] return seq[:seq_len] def test_reader(reader): reader.start() for i in range(5): inputs = reader.pop_batch() im, attr, capt, capt_len, ans_seq, ans_seq_len = inputs question = to_sentence.index_to_question( trim_sequence(capt, capt_len, 1)) answer = to_sentence.index_to_answer( trim_sequence(ans_seq, ans_seq_len, 1)) print('Q: %s\nA: %s\n' % (question, answer)) reader.stop() print('v1:') reader = create_fn(batch_size, subset='kptrain', version='v1') test_reader(reader) del reader print('v2:') reader = create_fn(batch_size, subset='kptrain', version='v2') test_reader(reader) del reader
def test(checkpoint_path=None): batch_size = 4 config = ModelConfig() # Get model function model_fn = get_model_creation_fn(FLAGS.model_type) # build data reader reader = AttentionFetcher(batch_size=batch_size, subset=TEST_SET, feat_type=config.feat_type, version=FLAGS.version) if checkpoint_path is None: ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type)) checkpoint_path = ckpt.model_checkpoint_path print(checkpoint_path) # build and restore model model = model_fn(config, phase='test') model.build() prob = model.prob sess = tf.Session(graph=tf.get_default_graph()) tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path)) saver = tf.train.Saver() saver.restore(sess, checkpoint_path) # Create the vocabulary. to_sentence = SentenceGenerator( trainset='trainval', top_ans_file='../VQA-tensorflow/data/vqa_trainval_top2000_answers.txt') ans_ids = [] quest_ids = [] print('Running inference on split %s...' % TEST_SET) for i in range(reader.num_batches): if i % 10 == 0: update_progress(i / float(reader.num_batches)) outputs = reader.get_test_batch() generated_ans = sess.run(prob, feed_dict=model.fill_feed_dict(outputs[:-2])) generated_ans[:, -1] = 0 top_ans = np.argmax(generated_ans, axis=1) ans_ids.append(top_ans) quest_id = outputs[-2] quest_ids.append(quest_id) quest_ids = np.concatenate(quest_ids) ans_ids = np.concatenate(ans_ids) gt = reader._answer n1, n2 = (gt == ans_ids).sum(), gt.size acc = n1 / float(n2) print('\nAcc: %0.2f, %d/%d' % (acc * 100., n1, n2)) return acc
def test_rerank_reader(): reader = RetrievalDataReader(batch_size=1, n_contrast=10, subset='train') reader.start() outputs = reader.pop_batch() im_feat, quest_arr, quest_len, ans_arr, ans_len = outputs from inference_utils.question_generator_util import SentenceGenerator to_sentence = SentenceGenerator( trainset='trainval', ans_vocab_file='data/vqa_trainval_question_answer_word_counts.txt', quest_vocab_file='data/vqa_trainval_question_answer_word_counts.txt') for q_seq, q_len, a_seq, a_len in zip(quest_arr, quest_len, ans_arr, ans_len): q_ = np.array([0] + q_seq[:q_len].tolist() + [0]) a_ = np.array([0] + a_seq[:a_len].tolist() + [0]) q = to_sentence.index_to_question(q_) a = to_sentence.index_to_answer(a_) print('Q: %s' % q) print('A: %s\n' % a) reader.stop()
def __init__(self): self.to_sentence = SentenceGenerator(trainset='trainval') self.sent_encoder = SentenceEncoder() self.g = tf.Graph() self.ckpt_file = 'model/v1_var_kptrain_VAQ-VarDS/model.ckpt-3300000' from models.variational_ds_ivqa_model import VariationIVQAModel from config import ModelConfig config = ModelConfig() self._top_k = 10 self.name = ' ------- VarIVQA ------- ' with self.g.as_default(): self.sess = tf.Session() self.model = VariationIVQAModel(config, phase='sampling_beam') self.model.build() vars = tf.trainable_variables() self.saver = tf.train.Saver(var_list=vars) self.saver.restore(self.sess, self.ckpt_file) self._init_image_cache()
def test(): from util import load_hdf5 d = load_hdf5('data/rerank_kpval.h5') # Create the vocabulary. to_sentence = SentenceGenerator(trainset='trainval', top_ans_file=None) quest_ids = d['quest_ids'] ans_ids = d['cands'][:, 0] # vqa_scores = d['vqa'] result = [{u'answer': to_sentence.index_to_top_answer(aid), u'question_id': qid} for aid, qid in zip(ans_ids, quest_ids)] # save results tf.logging.info('Saving results') res_file = FLAGS.result_format % ('v2', 'kpval') json.dump(result, open(res_file, 'w')) tf.logging.info('Done!') tf.logging.info('#Num eval samples %d' % len(result)) # ana_ctx.close() return res_file, quest_ids
def score_fusion(): subset = 'kpval' EPS = 1e-12 T = 3.0 save_file = 'data/%s_vqa_vaq_rerank_%s.hdf5' % ((FLAGS.model_type).lower(), subset) d = load_hdf5(save_file) quest_ids = d['quest_ids'] vqa_scores = d['vqa_scores'] vaq_scores = d['vaq_scores'] vqa_pred_labels = d['vqa_pred_labels'] # context to_sentence = SentenceGenerator(trainset='trainval') # fusion ans_ids = [] for i, (quest_id, vqa_score, vaq_score, pred_label) in enumerate(zip(quest_ids, vqa_scores, vaq_scores, vqa_pred_labels)): vaq_score = np.exp(-T * vaq_score) vaq_score /= (vaq_score.sum() + EPS) score = vaq_score * vqa_score score = vqa_score idx = score.argmax() pred = pred_label[idx] # add this to result ans_ids.append(pred) result = [{u'answer': to_sentence.index_to_top_answer(aid), u'question_id': int(qid)} for aid, qid in zip(ans_ids, quest_ids)] # save results tf.logging.info('Saving results') res_file = 'vaq_on_vqa_proposal_tmp.json' json.dump(result, open(res_file, 'w')) tf.logging.info('Done!') tf.logging.info('#Num eval samples %d' % len(result)) return res_file, quest_ids
def main(): # params k = 80 res_file = 'result/quest_vaq_nn.json' # sentence generator to_sentence = SentenceGenerator(trainset='trainval') # load distances val_qids, nn_ids = load_image_nn() # create nn model nn_model = QuestionPool() num = len(val_qids) results = [] for i, (v_qid, v_nn) in enumerate(zip(val_qids, nn_ids)): # run nn search t = time() tr_qid, tr_path = nn_model.get_candidates(v_nn[:k]) sent = to_sentence.index_to_question(tr_path) print(sent) print('Processing %d/%d, time %0.2f sec.' % (i, num, time() - t)) res_i = {'question_id': int(v_qid), 'question': sent} results.append(res_i) save_json(res_file, results) cider = evaluate_question(res_file, subset='kpval', version='v1')
def sample_cst_questions(checkpoint_path=None, subset='kptrain'): model_config = ModelConfig() model_config.convert = FLAGS.convert model_config.loss_type = 'pairwise' model_config.top_k = 3 batch_size = 8 # Get model create_fn = create_reader(FLAGS.model_type, phase='test') # Create the vocabulary. to_sentence = SentenceGenerator(trainset='trainval') # get data reader reader = create_fn(batch_size=batch_size, subset=subset, version=FLAGS.test_version) # Build model g = tf.Graph() with g.as_default(): # Build the model. model = ContrastQuestionSampler(model_config) model.build() # Restore from checkpoint restorer = Restorer(g) sess = tf.Session() restorer.restore(sess, checkpoint_path) num_batches = reader.num_batches print('Running beam search inference...') for i in range(num_batches): outputs = reader.get_test_batch() # inference quest_ids, image_ids = outputs[-2:] c_ans, c_ans_len, pathes, scores = model.greedy_inference( outputs[:-2], sess) scores, pathes = post_process_prediction(scores, pathes) k = 3 capt, capt_len = outputs[2:4] gt = capt[0, :capt_len[0]] print('gt: %s [%s]' % (to_sentence.index_to_question(gt), to_sentence.index_to_answer(c_ans[0, :c_ans_len[0]]))) for ix in range(k): question = to_sentence.index_to_question(pathes[ix]) answer = to_sentence.index_to_answer(c_ans[ix, :c_ans_len[ix]]) print('%s %d: %s [%s]' % ('pre' if ix == 0 else 'cst', ix, question, answer)) import pdb pdb.set_trace()
def ivqa_decoding_beam_search(checkpoint_path=None): model_config = ModelConfig() method = FLAGS.method res_file = 'result/bs_gen_%s.json' % method score_file = 'result/bs_vqa_scores_%s.mat' % method # Get model model_fn = get_model_creation_fn('VAQ-Var') create_fn = create_reader('VAQ-VVIS', phase='test') # Create the vocabulary. to_sentence = SentenceGenerator(trainset='trainval') # get data reader subset = 'kptest' reader = create_fn(batch_size=1, subset=subset, version=FLAGS.test_version) exemplar = ExemplarLanguageModel() if checkpoint_path is None: if FLAGS.checkpoint_dir: ckpt_dir = FLAGS.checkpoint_dir else: ckpt_dir = FLAGS.checkpoint_pat % (FLAGS.version, FLAGS.model_type) # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/' ckpt = tf.train.get_checkpoint_state(ckpt_dir) checkpoint_path = ckpt.model_checkpoint_path # Build model g = tf.Graph() with g.as_default(): # Build the model.ex model = model_fn(model_config, 'sampling') model.set_num_sampling_points(1000) model.build() # Restore from checkpoint restorer = Restorer(g) sess = tf.Session() restorer.restore(sess, checkpoint_path) # build language model language_model = LanguageModel() language_model.build() language_model.set_cache_dir('test_empty') # language_model.set_cache_dir('v1_var_att_lowthresh_cache_restval_VAQ-VarRL') language_model.set_session(sess) language_model.setup_model() # build VQA model vqa_model = VQAWrapper(g, sess) # vqa_model = MLBWrapper() num_batches = reader.num_batches print('Running beam search inference...') results = [] batch_vqa_scores = [] num = FLAGS.max_iters if FLAGS.max_iters > 0 else num_batches for i in range(num): outputs = reader.get_test_batch() # inference quest_ids, image_ids = outputs[-2:] im, _, _, top_ans, ans_tokens, ans_len = outputs[:-2] # pdb.set_trace() if top_ans == 2000: continue print('\n%d/%d' % (i, num)) question_id = int(quest_ids[0]) image_id = int(image_ids[0]) t1 = time() pathes, scores = model.greedy_inference([im, ans_tokens, ans_len], sess) # find unique ivqa_scores, ivqa_pathes = process_one(scores, pathes) t2 = time() print('Time for sample generation: %0.2fs' % (t2 - t1)) # apply language model language_model_inputs = wrap_samples_for_language_model( [ivqa_pathes], pad_token=model.pad_token - 1, max_length=20) match_gt = exemplar.query(ivqa_pathes) legality_scores = language_model.inference(language_model_inputs) legality_scores[match_gt] = 1.0 num_keep = max(100, (legality_scores > 0.1).sum()) # no less than 100 valid_inds = (-legality_scores).argsort()[:num_keep] t3 = time() print('Time for language model filtration: %0.2fs' % (t3 - t2)) # for idx in valid_inds: # path = ivqa_pathes[idx] # sc = legality_scores[idx] # sentence = to_sentence.index_to_question(path) # # questions.append(sentence) # print('%s (%0.3f)' % (sentence, sc)) # apply VQA model sampled = [ivqa_pathes[_idx] for _idx in valid_inds] # vqa_scores = vqa_model.get_scores(sampled, image_id, top_ans) vqa_scores, is_valid = vqa_model.get_scores(sampled, im, top_ans) # conf_inds = (-vqa_scores).argsort()[:20] conf_inds = np.where(is_valid)[0] # pdb.set_trace() # conf_inds = (-vqa_scores).argsort()[:40] t4 = time() print('Time for VQA verification: %0.2fs' % (t4 - t3)) this_mean_vqa_score = vqa_scores[conf_inds].mean() print('sampled: %d, unique: %d, legal: %d, gt: %d, mean score %0.2f' % (pathes.shape[0], len(ivqa_pathes), num_keep, match_gt.sum(), this_mean_vqa_score)) batch_vqa_scores.append(this_mean_vqa_score) for _pid, idx in enumerate(conf_inds): path = sampled[idx] sc = vqa_scores[idx] sentence = to_sentence.index_to_question(path) aug_quest_id = question_id * 1000 + _pid res_i = { 'image_id': int(image_id), 'question_id': aug_quest_id, 'question': sentence, 'score': float(sc) } results.append(res_i) save_json(res_file, results) batch_vqa_scores = np.array(batch_vqa_scores, dtype=np.float32) mean_vqa_score = batch_vqa_scores.mean() from scipy.io import savemat savemat(score_file, { 'scores': batch_vqa_scores, 'mean_score': mean_vqa_score }) print('BS mean VQA score: %0.3f' % mean_vqa_score) return res_file, mean_vqa_score