def test(checkpoint_path=None): subsets = ['kpval', 'kptest', 'kprestval'] quest_ids = [] result = [] config = ModelConfig() config.sample_negative = FLAGS.sample_negative config.use_fb_bn = FLAGS.use_fb_bn # Get model function model_fn = get_model_creation_fn(FLAGS.model_type) # build and restore model model = model_fn(config, phase='test') model.build() sess = tf.Session(graph=tf.get_default_graph()) tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path)) saver = tf.train.Saver() saver.restore(sess, checkpoint_path) for subset in subsets: _quest_ids, _result = test_worker(model, sess, subset) quest_ids += _quest_ids result += _result quest_ids = np.concatenate(quest_ids) # save results tf.logging.info('Saving results') res_file = FLAGS.result_format % (FLAGS.version, 'val') json.dump(result, open(res_file, 'w')) tf.logging.info('Done!') tf.logging.info('#Num eval samples %d' % len(result)) return res_file, quest_ids
def test(checkpoint_path=None): batch_size = 100 config = ModelConfig() # Get model function model_fn = get_model_creation_fn(FLAGS.model_type) # build data reader reader = AttentionFetcher(batch_size=batch_size, subset=TEST_SET, feat_type=config.feat_type, version=FLAGS.version) if checkpoint_path is None: ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type)) checkpoint_path = ckpt.model_checkpoint_path print(checkpoint_path) # build and restore model model = model_fn(config, phase='test') model.build() prob = model.prob sess = tf.Session(graph=tf.get_default_graph()) tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path)) saver = tf.train.Saver() saver.restore(sess, checkpoint_path) # Create the vocabulary. top_ans_file = '../VQA-tensorflow/data/vqa_trainval_top2000_answers.txt' to_sentence = SentenceGenerator(trainset='trainval', top_ans_file=top_ans_file) ans_ids = [] quest_ids = [] print('Running inference on split %s...' % TEST_SET) for i in range(reader.num_batches): if i % 10 == 0: update_progress(i / float(reader.num_batches)) outputs = reader.get_test_batch() generated_ans = sess.run( prob, feed_dict=model.fill_feed_dict(outputs[:-2])) generated_ans[:, -1] = 0 top_ans = np.argmax(generated_ans, axis=1) ans_ids.append(top_ans) quest_id = outputs[-2] quest_ids.append(quest_id) quest_ids = np.concatenate(quest_ids) ans_ids = np.concatenate(ans_ids) result = [{u'answer': to_sentence.index_to_top_answer(aid), u'question_id': qid} for aid, qid in zip(ans_ids, quest_ids)] # save results tf.logging.info('Saving results') res_file = FLAGS.result_format % (FLAGS.version, TEST_SET) json.dump(result, open(res_file, 'w')) tf.logging.info('Done!') tf.logging.info('#Num eval samples %d' % len(result)) return res_file, quest_ids
def vaq_multiple_choices(checkpoint_path=None, subset='kpval'): need_attr = True need_im_feat = False use_answer_type = False feat_type = 'semantic' model_config = ModelConfig() # Get model print(FLAGS.model_type) model_fn = get_model_creation_fn(FLAGS.model_type) mc_ctx = MultipleChoiceEvaluater(subset='test', need_im_feat=need_im_feat, need_attr=need_attr, feat_type=feat_type, use_ans_type=use_answer_type) if checkpoint_path is None: ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir % ('v1', FLAGS.model_type)) checkpoint_path = ckpt.model_checkpoint_path g = tf.Graph() with g.as_default(): # Build the model. model = model_fn(model_config, phase='condition') model.build() losses = model.losses saver = tf.train.Saver() sess = tf.Session() tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path)) saver.restore(sess, checkpoint_path) num_batches = mc_ctx.num_samples print('Running multiple choices...') predictions, answer_ids = [], [] for i in range(num_batches): if i % 1000 == 0 and i > 0: print('Running multiple choices: %d/%d' % (i, num_batches)) # break outputs = mc_ctx.get_task_data() im, capt, capt_len, _, ans_seq, ans_seq_len, _, _ = outputs im = im[np.newaxis, :] ans_seq = ans_seq[np.newaxis, :] # pdb.set_trace() inputs = [im, capt, capt_len, ans_seq, ans_seq_len] feed_dict = model.fill_feed_dict(inputs) llh = sess.run(losses, feed_dict=feed_dict) llh = np.squeeze(llh, axis=0) # 24x16 scores = llh.sum(axis=1)[np.newaxis, :] predictions.append(scores) answer_ids.append(outputs[-2]) predictions = np.concatenate(predictions, axis=0) answer_ids = np.array(answer_ids) # evaluate mc_ctx.evaluate_results(answer_ids, predictions, model_type=FLAGS.model_type)
def ivqa_decoding_beam_search(checkpoint_path=None, subset='kpval'): model_config = ModelConfig() res_file = 'result/quest_vaq_greedy_%s_%s.json' % ( FLAGS.model_type.upper(), subset) # Get model model_fn = get_model_creation_fn(FLAGS.model_type) create_fn = create_reader(FLAGS.model_type, phase='test') # Create the vocabulary. to_sentence = SentenceGenerator(trainset='trainval') # get data reader reader = create_fn(batch_size=100, subset=subset, version=FLAGS.test_version) if checkpoint_path is None: ckpt_dir = FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type) # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/' ckpt = tf.train.get_checkpoint_state(ckpt_dir) checkpoint_path = ckpt.model_checkpoint_path # Build model g = tf.Graph() with g.as_default(): # Build the model. model = model_fn(model_config, 'beam') model.build() # Restore from checkpoint restorer = Restorer(g) sess = tf.Session() restorer.restore(sess, checkpoint_path) num_batches = reader.num_batches print('Running beam search inference...') results = [] for i in range(num_batches): outputs = reader.get_test_batch() # inference quest_ids, image_ids = outputs[-2:] scores, pathes = model.greedy_inference(outputs[:-2], sess) scores, pathes = post_process_prediction(scores, pathes) question = to_sentence.index_to_question(pathes[0]) print('%d/%d: %s' % (i, num_batches, question)) for quest_id, image_id, path in zip(quest_ids, image_ids, pathes): sentence = to_sentence.index_to_question(path) res_i = { 'image_id': int(image_id), 'question_id': int(quest_id), 'question': sentence } results.append(res_i) save_json(res_file, results) return res_file
def convert(): model_name = 'ivaq_var_restval' checkpoint_path = 'model/var_ivqa_pretrain_restval/model.ckpt-505000' # build model from config import ModelConfig model_config = ModelConfig() model_fn = get_model_creation_fn('VAQ-Var') # create graph g = tf.Graph() with g.as_default(): # Build the model. model = model_fn(model_config, 'beam') model.build() tf_embedding = model._answer_embed tf_answer_feed = model._ans tf_answer_len_feed = model._ans_len # Restore from checkpoint print('Restore from %s' % checkpoint_path) restorer = Restorer(g) sess = tf.Session() restorer.restore(sess, checkpoint_path) # build reader top_ans_file = '/import/vision-ephemeral/fl302/code/' \ 'VQA-tensorflow/data/vqa_trainval_top2000_answers.txt' mc_ctx = MultiChoiceQuestionManger(subset='val', load_ans=True, top_ans_file=top_ans_file) to_sentence = SentenceGenerator(trainset='trainval', top_ans_file=top_ans_file) answer_encoder = mc_ctx.encoder top_answer_inds = range(2000) top_answers = answer_encoder.get_top_answers(top_answer_inds) answer_seqs = answer_encoder.encode_to_sequence(top_answers) for i, (ans, seq) in enumerate(zip(top_answers, answer_seqs)): rec_ans = to_sentence.index_to_answer(seq) ans = ' '.join(_tokenize_sentence(ans)) print('%d: Raw: %s, Rec: %s' % (i + 1, ans, rec_ans)) assert (ans == rec_ans) print('Checking passed') # extract print('Converting...') ans_arr, ans_arr_len = put_to_array(answer_seqs) import pdb pdb.set_trace() embedding = sess.run(tf_embedding, feed_dict={ tf_answer_feed: ans_arr.astype(np.int32), tf_answer_len_feed: ans_arr_len.astype(np.int32) }) # save sv_file = 'data/v1_%s_top2000_lstm_embedding.h5' % model_name from util import save_hdf5 save_hdf5(sv_file, {'answer_embedding': embedding}) print('Done')
def test(checkpoint_path=None): batch_size = 64 config = ModelConfig() config.sample_negative = FLAGS.sample_negative config.use_fb_bn = FLAGS.use_fb_bn # Get model function model_fn = get_model_creation_fn(FLAGS.model_type) # build data reader reader = TestReader(batch_size=batch_size, subset=TEST_SET, use_fb_data=FLAGS.use_fb_data) if checkpoint_path is None: ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type)) checkpoint_path = ckpt.model_checkpoint_path print(checkpoint_path) # build and restore model model = model_fn(config, phase='test') model.build() prob = model.prob sess = tf.Session(graph=tf.get_default_graph()) tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path)) saver = tf.train.Saver() saver.restore(sess, checkpoint_path) quest_ids = [] result = [] print('Running inference on split %s...' % TEST_SET) for i in range(reader.num_batches): if i % 10 == 0: update_progress(i / float(reader.num_batches)) outputs = reader.get_test_batch() mc_scores = sess.run(model._logits, feed_dict=model.fill_feed_dict(outputs[:-3])) choice_idx = np.argmax(mc_scores, axis=1) cands, _qids, image_ids = outputs[-3:] for qid, cid, mcs in zip(_qids, choice_idx, cands): answer = mcs['cands'][cid] assert (mcs['quest_id'] == qid) result.append({u'answer': answer, u'question_id': qid}) quest_ids.append(_qids) quest_ids = np.concatenate(quest_ids) # save results tf.logging.info('Saving results') res_file = FLAGS.result_format % (FLAGS.version, TEST_SET) json.dump(result, open(res_file, 'w')) tf.logging.info('Done!') tf.logging.info('#Num eval samples %d' % len(result)) return res_file, quest_ids
def test(checkpoint_path=None): batch_size = 4 config = ModelConfig() # Get model function model_fn = get_model_creation_fn(FLAGS.model_type) # build data reader reader = AttentionFetcher(batch_size=batch_size, subset=TEST_SET, feat_type=config.feat_type, version=FLAGS.version) if checkpoint_path is None: ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type)) checkpoint_path = ckpt.model_checkpoint_path print(checkpoint_path) # build and restore model model = model_fn(config, phase='test') model.build() prob = model.prob sess = tf.Session(graph=tf.get_default_graph()) tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path)) saver = tf.train.Saver() saver.restore(sess, checkpoint_path) # Create the vocabulary. to_sentence = SentenceGenerator( trainset='trainval', top_ans_file='../VQA-tensorflow/data/vqa_trainval_top2000_answers.txt') ans_ids = [] quest_ids = [] print('Running inference on split %s...' % TEST_SET) for i in range(reader.num_batches): if i % 10 == 0: update_progress(i / float(reader.num_batches)) outputs = reader.get_test_batch() generated_ans = sess.run(prob, feed_dict=model.fill_feed_dict(outputs[:-2])) generated_ans[:, -1] = 0 top_ans = np.argmax(generated_ans, axis=1) ans_ids.append(top_ans) quest_id = outputs[-2] quest_ids.append(quest_id) quest_ids = np.concatenate(quest_ids) ans_ids = np.concatenate(ans_ids) gt = reader._answer n1, n2 = (gt == ans_ids).sum(), gt.size acc = n1 / float(n2) print('\nAcc: %0.2f, %d/%d' % (acc * 100., n1, n2)) return acc
def test(): from config import ModelConfig model_fn = get_model_creation_fn('VAQ-CA') config = ModelConfig() config.top_answer_file = 'data/top_answer2000_sequences.h5' config.vqa_agent_ckpt = '/import/vision-ephemeral/fl302/code/vqa2.0/model/' \ 'curr_VQA-Soft_Res5c/model.ckpt-325000' model = model_fn(config, phase='train') model.build()
def test(checkpoint_path=None): batch_size = 128 config = ModelConfig() # Get model function model_fn = get_model_creation_fn(FLAGS.model_type) # build data reader reader = TestReader(batch_size=batch_size, subset=TEST_SET) if checkpoint_path is None: ckpt = tf.train.get_checkpoint_state( FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type, FLAGS.delta)) checkpoint_path = ckpt.model_checkpoint_path print(checkpoint_path) # build and restore model model = model_fn(config, phase='test') model.build() sess = tf.Session(graph=tf.get_default_graph()) tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path)) saver = tf.train.Saver() saver.restore(sess, checkpoint_path) print('Running inference on split %s...' % TEST_SET) aug_quest_ids, scores = [], [] for i in range(reader.num_batches): if i % 10 == 0: update_progress(i / float(reader.num_batches)) outputs = reader.get_test_batch() rank_score = sess.run(model.prob, feed_dict=model.fill_feed_dict(outputs[:3])) _, quest_ids, image_ids = outputs[3:] scores.append(rank_score) aug_quest_ids.append(quest_ids) aug_quest_ids = np.concatenate(aug_quest_ids) scores = np.concatenate(scores) return convert_to_questions(aug_quest_ids, scores)
def ivqa_decoding_beam_search(checkpoint_path=None): model_config = ModelConfig() method = FLAGS.method res_file = 'result/bs_gen_%s.json' % method score_file = 'result/bs_vqa_scores_%s.mat' % method # Get model model_fn = get_model_creation_fn('VAQ-Var') create_fn = create_reader('VAQ-VVIS', phase='test') # Create the vocabulary. to_sentence = SentenceGenerator(trainset='trainval') # get data reader subset = 'kptrain' reader = create_fn(batch_size=1, subset=subset, version=FLAGS.test_version) exemplar = ExemplarLanguageModel() if checkpoint_path is None: if FLAGS.checkpoint_dir: ckpt_dir = FLAGS.checkpoint_dir else: ckpt_dir = FLAGS.checkpoint_pat % (FLAGS.version, FLAGS.model_type) # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/' ckpt = tf.train.get_checkpoint_state(ckpt_dir) checkpoint_path = ckpt.model_checkpoint_path # Build model g = tf.Graph() with g.as_default(): # Build the model.ex model = model_fn(model_config, 'sampling') model.set_num_sampling_points(5) model.build() # Restore from checkpoint restorer = Restorer(g) sess = tf.Session() restorer.restore(sess, checkpoint_path) # build language model language_model = LanguageModel() language_model.build() language_model.set_cache_dir('test_empty') # language_model.set_cache_dir('v1_var_att_lowthresh_cache_restval_VAQ-VarRL') language_model.set_session(sess) language_model.setup_model() num_batches = reader.num_batches print('Running beam search inference...') num = FLAGS.max_iters if FLAGS.max_iters > 0 else num_batches neg_pathes = [] need_stop = False for i in range(num): outputs = reader.get_test_batch() # inference im, _, _, top_ans, ans_tokens, ans_len = outputs[:-2] if top_ans == 2000: continue print('\n%d/%d' % (i, num)) t1 = time() pathes, scores = model.greedy_inference([im, ans_tokens, ans_len], sess) # find unique ivqa_scores, ivqa_pathes = process_one(scores, pathes) t2 = time() print('Time for sample generation: %0.2fs' % (t2 - t1)) # apply language model language_model_inputs = wrap_samples_for_language_model( [ivqa_pathes], pad_token=model.pad_token - 1, max_length=20) match_gt = exemplar.query(ivqa_pathes) legality_scores = language_model.inference(language_model_inputs) legality_scores[match_gt] = 1.0 neg_inds = np.where(legality_scores < 0.2)[0] for idx in neg_inds: ser_neg = serialize_path(ivqa_pathes[idx][1:]) neg_pathes.append(ser_neg) if len(neg_pathes) > 100000: need_stop = True break # if len(neg_pathes) > 1000: # need_stop = True # break # print('Neg size: %d' % len(neg_pathes)) if need_stop: break sv_file = 'data/lm_init_neg_pathes.json' save_json(sv_file, neg_pathes)
def train(): model_config = ModelConfig() training_config = TrainConfig() # Get model model_fn = get_model_creation_fn(FLAGS.model_type) # Create training directory. train_dir = FLAGS.train_dir % (FLAGS.model_trainset, FLAGS.model_type) do_counter_sampling = FLAGS.version == 'v2' if not tf.gfile.IsDirectory(train_dir): tf.logging.info("Creating training directory: %s", train_dir) tf.gfile.MakeDirs(train_dir) g = tf.Graph() with g.as_default(): # Build the model. model = model_fn(model_config, phase='train') model.build() # Set up the learning rate learning_rate = tf.constant(training_config.initial_learning_rate) def _learning_rate_decay_fn(learn_rate, global_step): return tf.train.exponential_decay( learn_rate, global_step, decay_steps=training_config.decay_step, decay_rate=training_config.decay_factor, staircase=False) learning_rate_decay_fn = _learning_rate_decay_fn train_op = tf.contrib.layers.optimize_loss( loss=model.loss, global_step=model.global_step, learning_rate=learning_rate, optimizer=training_config.optimizer, clip_gradients=training_config.clip_gradients, learning_rate_decay_fn=learning_rate_decay_fn) # Set up the Saver for saving and restoring model checkpoints. saver = tf.train.Saver( max_to_keep=training_config.max_checkpoints_to_keep) # setup summaries summary_op = tf.summary.merge_all() # create reader # model_name = os.path.split(train_dir)[1] reader = Reader( batch_size=32, subset=FLAGS.model_trainset, cst_file='vqa_replay_buffer/vqa_replay_low_rescore_prior_05_04.json') # reader = Reader(batch_size=64, # known_set='kprestval', # unknown_set='kptrain', # 'kptrain' # un_ratio=1, # hide_label=False) # Run training. training_util.train(train_op, train_dir, log_every_n_steps=FLAGS.log_every_n_steps, graph=g, global_step=model.global_step, number_of_steps=FLAGS.number_of_steps, init_fn=model.init_fn, saver=saver, reader=reader, feed_fn=model.fill_feed_dict)
def ivqa_decoding_beam_search(checkpoint_path=None, subset=FLAGS.subset): model_config = ModelConfig() _model_suffix = 'var_' if FLAGS.use_var else '' res_file = 'data_rl/%sivqa_%s_questions.json' % (_model_suffix, FLAGS.subset) # Get model model_fn = get_model_creation_fn('VAQ-Var') create_fn = create_reader('VAQ-Var', phase='test') # Create the vocabulary. to_sentence = SentenceGenerator(trainset='trainval') # get data reader batch_size = 64 reader = create_fn(batch_size=batch_size, subset=subset, version=FLAGS.test_version) if checkpoint_path is None: if FLAGS.use_var: # variational models ckpt_dir = FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type) else: # standard models ckpt_dir = FLAGS.checkpoint_dir % ('kprestval', FLAGS.model_type) # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/' ckpt = tf.train.get_checkpoint_state(ckpt_dir) checkpoint_path = ckpt.model_checkpoint_path mode = 'sampling' if FLAGS.use_var else 'beam' # Build model g = tf.Graph() with g.as_default(): # Build the model. model = model_fn(model_config, mode) model.build() # Restore from checkpoint restorer = Restorer(g) sess = tf.Session() restorer.restore(sess, checkpoint_path) num_batches = reader.num_batches print('Running beam search inference...') results = [] extend_questions = [] extended_question_ids = [] for i in range(num_batches): print('iter: %d/%d' % (i, num_batches)) outputs = reader.get_test_batch() # inference quest_ids, image_ids = outputs[-2:] scores, pathes = model.greedy_inference(outputs[:-2], sess) scores, pathes = post_process_prediction(scores, pathes, add_start_end=False) # process for each sample _this_batch_size = quest_ids.shape[0] num_sampled = int(len(pathes) / _this_batch_size) _noise_offset = np.arange(0, num_sampled, dtype=np.int32) * _this_batch_size for _s_id in range(_this_batch_size): _index = _noise_offset + _s_id try: cur_scores = [scores[_idx] for _idx in _index] cur_pathes = [pathes[_idx] for _idx in _index] except Exception, e: print(str(e)) pdb.set_trace() cur_scores, cur_pathes = find_unique_pathes(cur_scores, cur_pathes) question_id = int(quest_ids[_s_id]) image_id = image_ids[_s_id] for _pid, path in enumerate(cur_pathes): sentence = to_sentence.index_to_question(path) extended_question_ids.append([question_id, _pid]) aug_quest_id = question_id * 1000 + _pid res_i = { 'image_id': int(image_id), 'question_id': aug_quest_id, 'question': sentence } results.append(res_i) extend_questions += cur_pathes
def ivqa_decoding_beam_search(checkpoint_path=None, subset='kptest'): model_config = ModelConfig() res_file = 'result/aug_var_vaq_kl0_greedy_%s.json' % FLAGS.model_type.upper( ) # Get model model_fn = get_model_creation_fn(FLAGS.model_type) create_fn = create_reader('VAQ-Var', phase='test') # Create the vocabulary. to_sentence = SentenceGenerator(trainset='trainval') # get data reader reader = create_fn(batch_size=1, subset=subset, version=FLAGS.test_version) if checkpoint_path is None: ckpt_dir = FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type) # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/' ckpt = tf.train.get_checkpoint_state(ckpt_dir) checkpoint_path = ckpt.model_checkpoint_path # Build model g = tf.Graph() with g.as_default(): # Build the model. model = model_fn(model_config, 'sampling_beam') model.build() # Restore from checkpoint restorer = Restorer(g) sess = tf.Session() restorer.restore(sess, checkpoint_path) num_batches = reader.num_batches print('Running beam search inference...') results = [] for i in range(num_batches): print('iter: %d/%d' % (i, num_batches)) outputs = reader.get_test_batch() # inference quest_ids, image_ids = outputs[-2:] scores, pathes = model.greedy_inference(outputs[:-2], sess) scores = np.tile(scores[:, np.newaxis], [1, pathes.shape[1]]) # scores, pathes = post_process_prediction(scores, pathes) _ntot = len(pathes) scores, pathes, ivqa_counts = post_process_variation_questions_with_count( scores, pathes, 1) question_id = int(quest_ids[0]) image_id = image_ids[0] print('%d/%d' % (len(pathes[0]), _ntot)) for _p_idx, (path, sc) in enumerate(zip(pathes[0], scores[0])): sentence = to_sentence.index_to_question(path) aug_quest_id = question_id * 1000 + _p_idx # res_i = {'image_id': int(image_id), # 'question_id': aug_quest_id, # 'question': sentence} res_i = { 'image_id': int(image_id), 'question_id': aug_quest_id, 'question': sentence, 'question_inds': path, 'counts': len(pathes), 'probs': float(sc) } results.append(res_i) save_json(res_file, results) return res_file
def train(): model_config = ModelConfig() training_config = TrainConfig() # Get model model_fn = get_model_creation_fn(FLAGS.model_type) reader_fn = create_reader('VAQ-Var', phase='train') env = MixReward() env.diversity_reward.mode = 'winner_take_all' env.set_cider_state(False) env.set_language_thresh(0.2) # Create training directory. train_dir = FLAGS.train_dir % (FLAGS.version, FLAGS.model_type) if not tf.gfile.IsDirectory(train_dir): tf.logging.info("Creating training directory: %s", train_dir) tf.gfile.MakeDirs(train_dir) g = tf.Graph() with g.as_default(): # Build the model. model = model_fn(model_config, 'train') model.build() # Set up the learning rate.u learning_rate = tf.constant(training_config.initial_learning_rate * 0.1) def _learning_rate_decay_fn(learn_rate, global_step): return tf.train.exponential_decay( learn_rate, global_step, decay_steps=training_config.decay_step, decay_rate=training_config.decay_factor, staircase=False) learning_rate_decay_fn = _learning_rate_decay_fn train_op = tf.contrib.layers.optimize_loss( loss=model.loss, global_step=model.global_step, learning_rate=learning_rate, optimizer=training_config.optimizer, clip_gradients=training_config.clip_gradients, learning_rate_decay_fn=learning_rate_decay_fn) # Set up the Saver for saving and restoring model checkpoints. saver = tf.train.Saver( max_to_keep=training_config.max_checkpoints_to_keep) # Setup summaries summary_op = tf.summary.merge_all() # Setup language model lm = LanguageModel() lm.build() env.set_language_model(lm) # create reader reader = reader_fn( batch_size=16, subset='kprestval', # 'kptrain' version=FLAGS.version) # Run training. training_util.train(train_op, train_dir, log_every_n_steps=FLAGS.log_every_n_steps, graph=g, global_step=model.global_step, number_of_steps=FLAGS.number_of_steps, init_fn=model.init_fn, saver=saver, reader=reader, model=model, summary_op=summary_op, env=env)
def var_vqa_decoding_beam_search(checkpoint_path=None, subset='kpval'): model_config = ModelConfig() res_file = 'result/quest_vaq_greedy_%s.json' % FLAGS.model_type.upper() # Get model model_fn = get_model_creation_fn(FLAGS.model_type) create_fn = create_reader(FLAGS.model_type, phase='test') # Create the vocabulary. to_sentence = SentenceGenerator(trainset='trainval') # get data reader subset = 'kpval' reader = create_fn(batch_size=1, subset=subset, version=FLAGS.test_version) if checkpoint_path is None: ckpt_dir = FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type) # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/' ckpt = tf.train.get_checkpoint_state(ckpt_dir) checkpoint_path = ckpt.model_checkpoint_path # Build model g = tf.Graph() with g.as_default(): # Build the model. model = model_fn(model_config, 'sampling') model.build() # Restore from checkpoint restorer = Restorer(g) sess = tf.Session() restorer.restore(sess, checkpoint_path) num_batches = reader.num_batches print('Running beam search inference...') results = [] for i in range(num_batches): outputs = reader.get_test_batch() # pdb.set_trace() if i % 100 == 0: print('batch: %d/%d' % (i, num_batches)) # inference images, quest, quest_len, ans, ans_len, quest_ids, image_ids = outputs scores, pathes = model.greedy_inference([images, quest, quest_len], sess) scores, pathes = post_process_prediction(scores, pathes) pathes, pathes_len = put_to_array(pathes) scores, pathes = find_unique_rows(scores, pathes) scores, pathes = post_process_prediction(scores, pathes[:, 1:]) # question = to_sentence.index_to_question(pathes[0]) # print('%d/%d: %s' % (i, num_batches, question)) answers = [] for path in pathes: sentence = to_sentence.index_to_answer(path) answers.append(sentence) # print(sentence) res_i = {'question_id': int(quest_ids[0]), 'answers': answers} results.append(res_i) eval_recall(results) return
def train(): model_config = ModelConfig() training_config = TrainConfig() # training_config.initial_learning_rate = 0.01 training_config.decay_step = 100000 training_config.decay_factor = 0.1 # training_config.optimizer = lambda: tf.train.MomentumOptimizer(0.5, momentum=0.5) # Get model model_fn = get_model_creation_fn(FLAGS.model_type) # Create training directory. train_dir = FLAGS.train_dir % (FLAGS.version, FLAGS.model_type) if FLAGS.sample_negative: train_dir += '_sn' if FLAGS.use_fb_data: train_dir += '_fb' if FLAGS.use_fb_bn: train_dir += '_bn' do_counter_sampling = FLAGS.version == 'v2' if not tf.gfile.IsDirectory(train_dir): tf.logging.info("Creating training directory: %s", train_dir) tf.gfile.MakeDirs(train_dir) g = tf.Graph() with g.as_default(): # Build the model. model_config.sample_negative = FLAGS.sample_negative model_config.use_fb_bn = FLAGS.use_fb_bn model = model_fn(model_config, phase='train') model.build() # Set up the learning rate learning_rate = tf.constant(training_config.initial_learning_rate) def _learning_rate_decay_fn(learn_rate, global_step): return tf.train.exponential_decay( learn_rate, global_step, decay_steps=training_config.decay_step, decay_rate=training_config.decay_factor, staircase=True) # staircase=False) learning_rate_decay_fn = _learning_rate_decay_fn train_op = tf.contrib.layers.optimize_loss( loss=model.loss, global_step=model.global_step, learning_rate=learning_rate, # optimizer=tf.train.MomentumOptimizer(learning_rate, 0.9), optimizer=training_config.optimizer, clip_gradients=None, learning_rate_decay_fn=learning_rate_decay_fn) # Set up the Saver for saving and restoring model checkpoints. saver = tf.train.Saver( max_to_keep=training_config.max_checkpoints_to_keep) # setup summaries summary_op = tf.summary.merge_all() # create reader batch_size = 256 if FLAGS.sample_negative else 64 reader = Reader(batch_size=batch_size, subset='kptrain', sample_negative=FLAGS.sample_negative, use_fb_data=FLAGS.use_fb_data) # Run training. training_util.train(train_op, train_dir, log_every_n_steps=FLAGS.log_every_n_steps, graph=g, global_step=model.global_step, number_of_steps=FLAGS.number_of_steps, init_fn=model.init_fn, saver=saver, reader=reader, feed_fn=model.fill_feed_dict, summary_op=summary_op)
from models.model_creater import get_model_creation_fn import tensorflow as tf import numpy as np # from test_variation_ivqa_beam_search import post_process_prediction from inference_utils.question_generator_util import SentenceGenerator from readers.ivqa_reader_creater import create_reader from post_process_variation_questions import post_process_variation_questions_noise, prepare_reinforce_data from var_ivqa_rewards import IVQARewards, _parse_gt_questions, VQARewards from time import time import pdb model_fn = get_model_creation_fn('VAQ-VarRL') model = model_fn() model.build() sess = tf.Session() print('Init model') model.init_fn(sess) to_sentence = SentenceGenerator(trainset='trainval') env = IVQARewards() # env1 = IVQARewards(subset='kprestval') env1 = VQARewards(ckpt_file='model/kprestval_VQA-BaseNorm/model.ckpt-26000') create_fn = create_reader('VAQ-Var', phase='train') reader = create_fn(batch_size=100, subset='kpval', version='v1') reader.start() import grammar_check
def train(): model_config = ModelConfig() training_config = TrainConfig() model_config.convert = FLAGS.convert # model_config.batch_size = 2 # Get model model_fn = get_model_creation_fn(FLAGS.model_type) reader_fn = create_reader(FLAGS.model_type, phase='train') # setup environment env = IVQARewards(metric='bleu') # Create training directory. train_dir = FLAGS.train_dir % (FLAGS.version, FLAGS.model_type) if not tf.gfile.IsDirectory(train_dir): tf.logging.info("Creating training directory: %s", train_dir) tf.gfile.MakeDirs(train_dir) g = tf.Graph() with g.as_default(): # Build the model. model = model_fn(model_config, 'train') model.build() # Set up the learning rate learning_rate = tf.constant(5e-5) # def _learning_rate_decay_fn(learn_rate, global_step): # return tf.train.exponential_decay( # learn_rate, # global_step, # decay_steps=training_config.decay_step, # decay_rate=training_config.decay_factor, # staircase=False) # # learning_rate_decay_fn = _learning_rate_decay_fn train_op = tf.contrib.layers.optimize_loss( loss=model.loss, global_step=model.global_step, learning_rate=learning_rate, optimizer=training_config.optimizer, clip_gradients=training_config.clip_gradients, learning_rate_decay_fn=None, variables=model.model_vars) # Set up the Saver for saving and restoring model checkpoints. saver = tf.train.Saver( max_to_keep=training_config.max_checkpoints_to_keep) # create reader reader = reader_fn(batch_size=16, subset='kptrain', version=FLAGS.version) # Run training. training_util.train(train_op, model, train_dir, log_every_n_steps=FLAGS.log_every_n_steps, graph=g, global_step=model.global_step, number_of_steps=FLAGS.number_of_steps, init_fn=model.init_fn, saver=saver, reader=reader, feed_fn=model.fill_feed_dict, env=env)
def train(): model_config = ModelConfig() training_config = TrainConfig() # Get model reader_fn = create_reader('VAQ-Var', phase='train') # Create training directory. train_dir = FLAGS.train_dir % (FLAGS.version, FLAGS.model_type) if not tf.gfile.IsDirectory(train_dir): tf.logging.info("Creating training directory: %s", train_dir) tf.gfile.MakeDirs(train_dir) g = tf.Graph() with g.as_default(): # Build the model. sample_fn = get_model_creation_fn('VAQ-VarRL') sampler = sample_fn(model_config, 'train') sampler.set_epsion(0.98) sampler.build() # Build language model lm_fn = get_model_creation_fn(FLAGS.model_type) language_model = lm_fn() language_model.build() # Set up the learning rate.u learning_rate = tf.constant(training_config.initial_learning_rate) def _learning_rate_decay_fn(learn_rate, global_step): return tf.train.exponential_decay( learn_rate, global_step, decay_steps=training_config.decay_step, decay_rate=training_config.decay_factor, staircase=False) learning_rate_decay_fn = _learning_rate_decay_fn train_op = tf.contrib.layers.optimize_loss( loss=language_model.loss, learning_rate=learning_rate, global_step=sampler.global_step, optimizer=training_config.optimizer, clip_gradients=training_config.clip_gradients, learning_rate_decay_fn=learning_rate_decay_fn) # Set up the Saver for saving and restoring model checkpoints. var_list = tf.get_collection(tf.GraphKeys.VARIABLES, 'LM') saver = tf.train.Saver( var_list=var_list, max_to_keep=training_config.max_checkpoints_to_keep) # Setup summaries summary_op = tf.summary.merge_all() # create reader reader = reader_fn( batch_size=16, subset='kprestval', # 'kptrain' version=FLAGS.version) # Run training. training_util.train(train_op, train_dir, log_every_n_steps=FLAGS.log_every_n_steps, graph=g, global_step=sampler.global_step, number_of_steps=FLAGS.number_of_steps, init_fn=sampler.init_fn, saver=saver, reader=reader, model=language_model, summary_op=summary_op, sampler=sampler)
def ivqa_decoding_beam_search(ckpt_dir, method): model_config = ModelConfig() inf_type = 'beam' assert (inf_type in ['beam', 'rand']) # method = FLAGS.method if inf_type == 'rand': res_file = 'result/bs_RL2_cands_LM_%s.json' % method else: res_file = 'result/bs_RL2_cands_LM_%s_BEAM.json' % method if os.path.exists(res_file): print('File %s already exist, skipped' % res_file) return # score_file = 'result/bs_vqa_scores_%s.mat' % method # Get model model_fn = get_model_creation_fn('VAQ-Var') create_fn = create_reader('VAQ-VVIS', phase='test') # Create the vocabulary. to_sentence = SentenceGenerator(trainset='trainval') # get data reader subset = 'bs_test' reader = create_fn(batch_size=1, subset=subset, version=FLAGS.test_version) exemplar = ExemplarLanguageModel() # if checkpoint_path is None: # if FLAGS.checkpoint_dir: # ckpt_dir = FLAGS.checkpoint_dir # else: # ckpt_dir = FLAGS.checkpoint_pat % (FLAGS.version, FLAGS.model_type) # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/' ckpt = tf.train.get_checkpoint_state(ckpt_dir) checkpoint_path = ckpt.model_checkpoint_path # Build model g = tf.Graph() with g.as_default(): # Build the model.ex if inf_type == 'rand': model = model_fn(model_config, 'sampling') model.set_num_sampling_points(1000) else: model = model_fn(model_config, 'sampling_beam') model.set_num_sampling_points(1000) model.build() # Restore from checkpoint restorer = Restorer(g) sess = tf.Session() restorer.restore(sess, checkpoint_path) # build language model language_model = LanguageModel() language_model.build() language_model.set_cache_dir('test_empty') # language_model.set_cache_dir('v1_var_att_lowthresh_cache_restval_VAQ-VarRL') language_model.set_session(sess) language_model.setup_model() # build VQA model # vqa_model = N2MNWrapper() # vqa_model = MLBWrapper() num_batches = reader.num_batches print('Running beam search inference...') results = {} # batch_vqa_scores = [] num = FLAGS.max_iters if FLAGS.max_iters > 0 else num_batches for i in range(num): outputs = reader.get_test_batch() # inference quest_ids, image_ids = outputs[-2:] im, _, _, top_ans, ans_tokens, ans_len = outputs[:-2] # pdb.set_trace() if top_ans == 2000: continue print('\n%d/%d' % (i, num)) question_id = int(quest_ids[0]) image_id = int(image_ids[0]) t1 = time() pathes, scores = model.greedy_inference([im, ans_tokens, ans_len], sess) # find unique ivqa_scores, ivqa_pathes = process_one(scores, pathes) t2 = time() print('Time for sample generation: %0.2fs' % (t2 - t1)) # apply language model language_model_inputs = wrap_samples_for_language_model([ivqa_pathes], pad_token=model.pad_token - 1, max_length=20) match_gt = exemplar.query(ivqa_pathes) legality_scores = language_model.inference(language_model_inputs) legality_scores[match_gt] = 1.0 num_keep = max(100, (legality_scores > 0.3).sum()) # no less than 100 valid_inds = (-legality_scores).argsort()[:num_keep] t3 = time() print('Time for language model filtration: %0.2fs' % (t3 - t2)) # for idx in valid_inds: # path = ivqa_pathes[idx] # sc = legality_scores[idx] # sentence = to_sentence.index_to_question(path) # # questions.append(sentence) # print('%s (%0.3f)' % (sentence, sc)) # apply VQA model sampled = [ivqa_pathes[_idx] for _idx in valid_inds] legality_scores = legality_scores[valid_inds] result_key = int(question_id) tmp = [] for idx, path in enumerate(sampled): # path = sampled[idx] sc = legality_scores[idx] sentence = to_sentence.index_to_question(path) # aug_quest_id = question_id * 1000 + _pid res_i = {'image_id': int(image_id), 'aug_id': idx, 'question_id': question_id, 'question': sentence, 'score': float(sc)} tmp.append(res_i) print('Number of unique questions: %d' % len(tmp)) results[result_key] = tmp save_json(res_file, results)
def train(): model_config = ModelConfig() training_config = TrainConfig() # Get model model_fn = get_model_creation_fn(FLAGS.model_type) reader_fn = create_reader('VAQ-EpochAtt', phase='train') env = MixReward(attention_vqa=True) env.set_cider_state(use_cider=True) env.diversity_reward.mode = 'winner_take_all' env.set_language_thresh(1.0 / 3.0) # env.set_replay_buffer(insert_thresh=0.1, # sv_dir='vqa_replay_buffer/low_att') # if 0.5, already fooled others # Create training directory. train_dir = FLAGS.train_dir % (FLAGS.version, FLAGS.model_type) if not tf.gfile.IsDirectory(train_dir): tf.logging.info("Creating training directory: %s", train_dir) tf.gfile.MakeDirs(train_dir) ckpt_suffix = train_dir.split('/')[-1] g = tf.Graph() with g.as_default(): # Build the model. model = model_fn(model_config, 'train') model.set_init_ckpt( 'model/v1_var_ivqa_restvalr2_VAQ-Var/model.ckpt-374000') model.build() # Set up the learning rate.u learning_rate = tf.constant(training_config.initial_learning_rate * 0.1) def _learning_rate_decay_fn(learn_rate, global_step): return tf.train.exponential_decay( learn_rate, global_step, decay_steps=training_config.decay_step, decay_rate=training_config.decay_factor, staircase=False) learning_rate_decay_fn = _learning_rate_decay_fn train_op = tf.contrib.layers.optimize_loss( loss=model.loss, global_step=model.global_step, learning_rate=learning_rate, optimizer=training_config.optimizer, clip_gradients=training_config.clip_gradients, learning_rate_decay_fn=learning_rate_decay_fn) # Set up the Saver for saving and restoring model checkpoints. saver = tf.train.Saver( max_to_keep=training_config.max_checkpoints_to_keep) # Setup summaries summary_op = tf.summary.merge_all() # Setup language model lm = LanguageModel() lm.build() lm.set_cache_dir(ckpt_suffix) env.set_language_model(lm) # create reader reader = reader_fn( batch_size=16, subset='kprestval', # 'kptrain' version=FLAGS.version) # Run training. training_util.train(train_op, train_dir, log_every_n_steps=FLAGS.log_every_n_steps, graph=g, global_step=model.global_step, number_of_steps=FLAGS.number_of_steps, init_fn=model.init_fn, saver=saver, reader=reader, model=model, summary_op=summary_op, env=env)
def train(): model_config = ModelConfig() training_config = TrainConfig() # Get model model_fn = get_model_creation_fn(FLAGS.model_type) # Create training directory. train_dir = FLAGS.train_dir % (FLAGS.version, FLAGS.model_type) do_counter_sampling = FLAGS.version == 'v2' if not tf.gfile.IsDirectory(train_dir): tf.logging.info("Creating training directory: %s", train_dir) tf.gfile.MakeDirs(train_dir) g = tf.Graph() with g.as_default(): # Build the model. model = model_fn(model_config, phase='train') model.build() # Set up the learning rate learning_rate = tf.constant(training_config.initial_learning_rate) def _learning_rate_decay_fn(learn_rate, global_step): return tf.train.exponential_decay( learn_rate, global_step, decay_steps=training_config.decay_step, decay_rate=training_config.decay_factor, staircase=False) learning_rate_decay_fn = _learning_rate_decay_fn train_op = tf.contrib.layers.optimize_loss( loss=model.loss, global_step=model.global_step, learning_rate=learning_rate, optimizer=training_config.optimizer, clip_gradients=training_config.clip_gradients, learning_rate_decay_fn=learning_rate_decay_fn) # Set up the Saver for saving and restoring model checkpoints. saver = tf.train.Saver( max_to_keep=training_config.max_checkpoints_to_keep) # setup summaries summary_op = tf.summary.merge_all() # create reader model_name = os.path.split(train_dir)[1] reader = Reader(batch_size=model_config.batch_size, subset='trainval', model_name=model_name, epsilon=0.5, feat_type='res5c', version=FLAGS.version, counter_sampling=do_counter_sampling) # Run training. train_framework_curriculum.train(train_op, train_dir, log_every_n_steps=FLAGS.log_every_n_steps, graph=g, global_step=model.global_step, number_of_steps=FLAGS.number_of_steps, init_fn=model.init_fn, saver=saver, reader=reader, feed_fn=model.fill_feed_dict, loss_op=model.mle_losses, summary_op=summary_op)
def train(): model_config = ModelConfig() training_config = TrainConfig() # model_config.batch_size = 8 # Get model model_fn = get_model_creation_fn(FLAGS.model_type) reader_fn = create_reader('V7W-VarDS', phase='train') # Create training directory. train_dir = FLAGS.train_dir % (FLAGS.trainset, FLAGS.model_type) if not tf.gfile.IsDirectory(train_dir): tf.logging.info("Creating training directory: %s", train_dir) tf.gfile.MakeDirs(train_dir) g = tf.Graph() with g.as_default(): # Build the model. model = model_fn(model_config, 'train') model.build() # Set up the learning rate.u learning_rate = tf.constant(training_config.initial_learning_rate) def _learning_rate_decay_fn(learn_rate, global_step): return tf.train.exponential_decay( learn_rate, global_step, decay_steps=training_config.decay_step, decay_rate=training_config.decay_factor, staircase=False) learning_rate_decay_fn = _learning_rate_decay_fn train_op = tf.contrib.layers.optimize_loss( loss=model.loss, global_step=model.global_step, learning_rate=learning_rate, optimizer=training_config.optimizer, clip_gradients=training_config.clip_gradients, learning_rate_decay_fn=learning_rate_decay_fn) # Set up the Saver for saving and restoring model checkpoints. saver = tf.train.Saver( max_to_keep=training_config.max_checkpoints_to_keep) # Setup summaries summary_op = tf.summary.merge_all() # create reader reader = reader_fn( batch_size=model_config.batch_size, subset=FLAGS.trainset, # 'kptrain' version='v1') # Run training. training_util.train(train_op, train_dir, log_every_n_steps=FLAGS.log_every_n_steps, graph=g, global_step=model.global_step, number_of_steps=FLAGS.number_of_steps, init_fn=model.init_fn, saver=saver, reader=reader, feed_fn=model.fill_feed_dict, summary_op=summary_op)
def test(checkpoint_path=None): batch_size = 40 config = ModelConfig() config.convert = True config.ivqa_rerank = True # VQA baseline or re-rank config.loss_type = FLAGS.loss_type # Get model function model_fn = get_model_creation_fn(FLAGS.model_type) # ana_ctx = RerankAnalysiser() # build data reader reader_fn = create_reader(FLAGS.model_type, phase='test') reader = reader_fn(batch_size=batch_size, subset='kp%s' % FLAGS.testset, version=FLAGS.version) if checkpoint_path is None: ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type)) checkpoint_path = ckpt.model_checkpoint_path print(checkpoint_path) # build and restore model model = model_fn(config, phase='evaluate') model.build() # prob = model.prob sess = tf.Session(graph=tf.get_default_graph()) tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path)) if FLAGS.restore: saver = tf.train.Saver() saver.restore(sess, checkpoint_path) else: sess.run(tf.initialize_all_variables()) model.init_fn(sess) # Create the vocabulary. to_sentence = SentenceGenerator(trainset='trainval') ans_ids = [] quest_ids = [] b_rerank_scores = [] b_vqa_scores = [] b_cand_labels = [] print('Running inference on split %s...' % FLAGS.testset) for i in range(reader.num_batches): if i % 10 == 0: update_progress(i / float(reader.num_batches)) outputs = reader.get_test_batch() model_preds = model.inference_rerank_vqa(outputs[:4], sess) score, top_ans, _, _, _ = model_preds ivqa_score, ivqa_top_ans, ivqa_scores, vqa_top_ans, vqa_scores = model_preds b_rerank_scores.append(ivqa_scores) b_vqa_scores.append(vqa_scores) b_cand_labels.append(vqa_top_ans) # if i > 100: # break # ana_ctx.update(outputs, model_preds) ans_ids.append(top_ans) quest_id = outputs[-2] quest_ids.append(quest_id) # save preds b_rerank_scores = np.concatenate(b_rerank_scores, axis=0) b_vqa_scores = np.concatenate(b_vqa_scores, axis=0) b_cand_labels = np.concatenate(b_cand_labels, axis=0) quest_ids = np.concatenate(quest_ids) from util import save_hdf5 save_hdf5('data/rerank_kptest.h5', {'ivqa': b_rerank_scores, 'vqa': b_vqa_scores, 'cands': b_cand_labels, 'quest_ids': quest_ids}) # ana_ctx.compute_accuracy() ans_ids = np.concatenate(ans_ids) result = [{u'answer': to_sentence.index_to_top_answer(aid), u'question_id': qid} for aid, qid in zip(ans_ids, quest_ids)] # save results tf.logging.info('Saving results') res_file = FLAGS.result_format % (FLAGS.version, FLAGS.testset) json.dump(result, open(res_file, 'w')) tf.logging.info('Done!') tf.logging.info('#Num eval samples %d' % len(result)) # ana_ctx.close() return res_file, quest_ids
def ivqa_decoding_beam_search(checkpoint_path=None): model_config = ModelConfig() method = FLAGS.method res_file = 'result/bs_cand_for_vis.json' # Get model model_fn = get_model_creation_fn('VAQ-Var') create_fn = create_reader('VAQ-VVIS', phase='test') # Create the vocabulary. to_sentence = SentenceGenerator(trainset='trainval', top_ans_file='../VQA-tensorflow/data/vqa_trainval_top2000_answers.txt') # get data reader subset = 'kpval' reader = create_fn(batch_size=1, subset=subset, version=FLAGS.test_version) exemplar = ExemplarLanguageModel() if checkpoint_path is None: if FLAGS.checkpoint_dir: ckpt_dir = FLAGS.checkpoint_dir else: ckpt_dir = FLAGS.checkpoint_pat % (FLAGS.version, FLAGS.model_type) # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/' ckpt = tf.train.get_checkpoint_state(ckpt_dir) checkpoint_path = ckpt.model_checkpoint_path # Build model g = tf.Graph() with g.as_default(): # Build the model.ex model = model_fn(model_config, 'sampling') model.set_num_sampling_points(5000) model.build() # Restore from checkpoint restorer = Restorer(g) sess = tf.Session() restorer.restore(sess, checkpoint_path) # build language model language_model = LanguageModel() language_model.build() language_model.set_cache_dir('test_empty') # language_model.set_cache_dir('v1_var_att_lowthresh_cache_restval_VAQ-VarRL') language_model.set_session(sess) language_model.setup_model() # build VQA model # vqa_model = N2MNWrapper() # vqa_model = MLBWrapper() num_batches = reader.num_batches quest_ids_to_vis = {5682052: 'bread', 965492: 'plane', 681282: 'station'} print('Running beam search inference...') results = [] batch_vqa_scores = [] num = FLAGS.max_iters if FLAGS.max_iters > 0 else num_batches for i in range(num): outputs = reader.get_test_batch() # inference quest_ids, image_ids = outputs[-2:] quest_id_key = int(quest_ids) if quest_id_key not in quest_ids_to_vis: continue # pdb.set_trace() im, gt_q, _, top_ans, ans_tokens, ans_len = outputs[:-2] # pdb.set_trace() if top_ans == 2000: continue print('\n%d/%d' % (i, num)) question_id = int(quest_ids[0]) image_id = int(image_ids[0]) t1 = time() pathes, scores = model.greedy_inference([im, ans_tokens, ans_len], sess) # find unique ivqa_scores, ivqa_pathes = process_one(scores, pathes) t2 = time() print('Time for sample generation: %0.2fs' % (t2 - t1)) # apply language model language_model_inputs = wrap_samples_for_language_model([ivqa_pathes], pad_token=model.pad_token - 1, max_length=20) match_gt = exemplar.query(ivqa_pathes) legality_scores = language_model.inference(language_model_inputs) legality_scores[match_gt] = 1.0 num_keep = max(100, (legality_scores > 0.1).sum()) # no less than 100 valid_inds = (-legality_scores).argsort()[:num_keep] print('keep: %d/%d' % (num_keep, len(ivqa_pathes))) t3 = time() print('Time for language model filtration: %0.2fs' % (t3 - t2)) def token_arr_to_list(arr): return arr.flatten().tolist() for _pid, idx in enumerate(valid_inds): path = ivqa_pathes[idx] # sc = vqa_scores[idx] sentence = to_sentence.index_to_question(path) aug_quest_id = question_id * 1000 + _pid res_i = {'image_id': int(image_id), 'aug_id': aug_quest_id, 'question_id': question_id, 'target': sentence, 'top_ans_id': int(top_ans), 'question': to_sentence.index_to_question(token_arr_to_list(gt_q)), 'answer': to_sentence.index_to_answer(token_arr_to_list(ans_tokens))} results.append(res_i) save_json(res_file, results) return None
def ivqa_decoding_beam_search(checkpoint_path=None, subset='kptest'): model_config = ModelConfig() res_file = 'result/var_vaq_beam_%s_%s.json' % (FLAGS.model_type.upper(), FLAGS.mode) # Get model model_fn = get_model_creation_fn(FLAGS.model_type) create_fn = create_reader(FLAGS.model_type, phase='test') # Create the vocabulary. to_sentence = SentenceGenerator(trainset='trainval') # get data reader reader = create_fn(batch_size=50, subset=subset, version=FLAGS.test_version) if checkpoint_path is None: ckpt_dir = FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type) # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/' ckpt = tf.train.get_checkpoint_state(ckpt_dir) checkpoint_path = ckpt.model_checkpoint_path # Build model g = tf.Graph() with g.as_default(): # Build the model. model = model_fn(model_config, 'sampling_beam') model.build() # Restore from checkpoint restorer = Restorer(g) sess = tf.Session() restorer.restore(sess, checkpoint_path) num_batches = reader.num_batches print('Running beam search inference...') results = [] for i in range(num_batches): print('iter: %d/%d' % (i, num_batches)) # if i >= 10: # break outputs = reader.get_test_batch() # inference quest_ids, image_ids = outputs[-2:] scores, pathes = model.greedy_inference(outputs[:-2], sess) # wrap inputs _this_batch_size = quest_ids.size seq_len = pathes.shape[1] dummy_scores = np.tile(scores[:, np.newaxis], [1, seq_len]) # dummy_scores = np.zeros_like(pathes, dtype=np.float32) ivqa_scores, ivqa_pathes, ivqa_counts = post_process_variation_questions_with_count( dummy_scores, pathes, _this_batch_size) # scores, pathes = convert_to_unique_questions(scores, pathes) for _q_idx, (ps, scs, cs) in enumerate( zip(ivqa_pathes, ivqa_scores, ivqa_counts)): image_id = image_ids[_q_idx] question_id = int(quest_ids[_q_idx]) if FLAGS.mode == 'full': for _p_idx, p in enumerate(ps): sentence = to_sentence.index_to_question(p) aug_quest_id = question_id * 1000 + _p_idx res_i = { 'image_id': int(image_id), 'question_id': aug_quest_id, 'question': sentence } results.append(res_i) else: p = pick_question(scs, ps, cs) sentence = to_sentence.index_to_question(p) # print(sentence) res_i = { 'image_id': int(image_id), 'question_id': question_id, 'question': sentence } results.append(res_i) save_json(res_file, results) return res_file
def ivqa_decoding_beam_search(checkpoint_path=None): model_config = ModelConfig() method = FLAGS.method res_file = 'result/bs_gen_%s.json' % method score_file = 'result/bs_vqa_scores_%s.mat' % method # Get model model_fn = get_model_creation_fn('VAQ-Var') create_fn = create_reader('VAQ-VVIS', phase='test') # Create the vocabulary. to_sentence = SentenceGenerator(trainset='trainval') # get data reader subset = 'kptest' reader = create_fn(batch_size=1, subset=subset, version=FLAGS.test_version) exemplar = ExemplarLanguageModel() if checkpoint_path is None: if FLAGS.checkpoint_dir: ckpt_dir = FLAGS.checkpoint_dir else: ckpt_dir = FLAGS.checkpoint_pat % (FLAGS.version, FLAGS.model_type) # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/' ckpt = tf.train.get_checkpoint_state(ckpt_dir) checkpoint_path = ckpt.model_checkpoint_path # Build model g = tf.Graph() with g.as_default(): # Build the model.ex model = model_fn(model_config, 'sampling') model.set_num_sampling_points(1000) model.build() # Restore from checkpoint restorer = Restorer(g) sess = tf.Session() restorer.restore(sess, checkpoint_path) # build language model language_model = LanguageModel() language_model.build() language_model.set_cache_dir('test_empty') # language_model.set_cache_dir('v1_var_att_lowthresh_cache_restval_VAQ-VarRL') language_model.set_session(sess) language_model.setup_model() # build VQA model vqa_model = VQAWrapper(g, sess) # vqa_model = MLBWrapper() num_batches = reader.num_batches print('Running beam search inference...') results = [] batch_vqa_scores = [] num = FLAGS.max_iters if FLAGS.max_iters > 0 else num_batches for i in range(num): outputs = reader.get_test_batch() # inference quest_ids, image_ids = outputs[-2:] im, _, _, top_ans, ans_tokens, ans_len = outputs[:-2] # pdb.set_trace() if top_ans == 2000: continue print('\n%d/%d' % (i, num)) question_id = int(quest_ids[0]) image_id = int(image_ids[0]) t1 = time() pathes, scores = model.greedy_inference([im, ans_tokens, ans_len], sess) # find unique ivqa_scores, ivqa_pathes = process_one(scores, pathes) t2 = time() print('Time for sample generation: %0.2fs' % (t2 - t1)) # apply language model language_model_inputs = wrap_samples_for_language_model( [ivqa_pathes], pad_token=model.pad_token - 1, max_length=20) match_gt = exemplar.query(ivqa_pathes) legality_scores = language_model.inference(language_model_inputs) legality_scores[match_gt] = 1.0 num_keep = max(100, (legality_scores > 0.1).sum()) # no less than 100 valid_inds = (-legality_scores).argsort()[:num_keep] t3 = time() print('Time for language model filtration: %0.2fs' % (t3 - t2)) # for idx in valid_inds: # path = ivqa_pathes[idx] # sc = legality_scores[idx] # sentence = to_sentence.index_to_question(path) # # questions.append(sentence) # print('%s (%0.3f)' % (sentence, sc)) # apply VQA model sampled = [ivqa_pathes[_idx] for _idx in valid_inds] # vqa_scores = vqa_model.get_scores(sampled, image_id, top_ans) vqa_scores, is_valid = vqa_model.get_scores(sampled, im, top_ans) # conf_inds = (-vqa_scores).argsort()[:20] conf_inds = np.where(is_valid)[0] # pdb.set_trace() # conf_inds = (-vqa_scores).argsort()[:40] t4 = time() print('Time for VQA verification: %0.2fs' % (t4 - t3)) this_mean_vqa_score = vqa_scores[conf_inds].mean() print('sampled: %d, unique: %d, legal: %d, gt: %d, mean score %0.2f' % (pathes.shape[0], len(ivqa_pathes), num_keep, match_gt.sum(), this_mean_vqa_score)) batch_vqa_scores.append(this_mean_vqa_score) for _pid, idx in enumerate(conf_inds): path = sampled[idx] sc = vqa_scores[idx] sentence = to_sentence.index_to_question(path) aug_quest_id = question_id * 1000 + _pid res_i = { 'image_id': int(image_id), 'question_id': aug_quest_id, 'question': sentence, 'score': float(sc) } results.append(res_i) save_json(res_file, results) batch_vqa_scores = np.array(batch_vqa_scores, dtype=np.float32) mean_vqa_score = batch_vqa_scores.mean() from scipy.io import savemat savemat(score_file, { 'scores': batch_vqa_scores, 'mean_score': mean_vqa_score }) print('BS mean VQA score: %0.3f' % mean_vqa_score) return res_file, mean_vqa_score
def build_model(self, model_config): model_creator = get_model_creation_fn(model_config.model_type) model = model_creator(model_config, phase=model_config.phase) model.build() return model
def ivqa_decoding_beam_search(checkpoint_path=None, subset='kpval'): model_config = ModelConfig() res_file = 'result/quest_vaq_greedy_%s.json' % FLAGS.model_type.upper() # Get model model_fn = get_model_creation_fn('VAQ-Var') create_fn = create_reader('VAQ-Var', phase='test') writer = ExperimentWriter('latex/examples_noimage_tmp') # Create the vocabulary. to_sentence = SentenceGenerator(trainset='trainval') # get data reader subset = 'kpval' reader = create_fn(batch_size=1, subset=subset, version=FLAGS.test_version) if checkpoint_path is None: # ckpt_dir = FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type) ckpt_dir = 'model/v1_var_att_noimage_cache_restval_VAQ-VarRL' ckpt = tf.train.get_checkpoint_state(ckpt_dir) checkpoint_path = ckpt.model_checkpoint_path # Build model g = tf.Graph() with g.as_default(): # Build the model. model = model_fn(model_config, 'sampling') model.build() # Restore from checkpoint restorer = Restorer(g) sess = tf.Session() restorer.restore(sess, checkpoint_path) num_batches = reader.num_batches print('Running beam search inference...') results = [] for i in range(num_batches): outputs = reader.get_test_batch() # inference quest_ids, image_ids = outputs[-2:] scores, pathes = model.greedy_inference(outputs[:-2], sess) scores, pathes = post_process_prediction(scores, pathes) pathes, pathes_len = put_to_array(pathes) scores, pathes = find_unique_rows(scores, pathes) scores, pathes = post_process_prediction(scores, pathes[:, 1:]) # question = to_sentence.index_to_question(pathes[0]) # print('%d/%d: %s' % (i, num_batches, question)) # show image os.system('clear') im_file = '%s2014/COCO_%s2014_%012d.jpg' % ('val', 'val', image_ids[0]) im_path = os.path.join(IM_ROOT, im_file) # im = imread(im_path) # plt.imshow(im) ans, ans_len = outputs[1:1 + 2] answers = extract_gt(ans, ans_len) answer = to_sentence.index_to_answer(answers[0]) # plt.title(answer) print('Answer: %s' % answer) questions = [] for path in pathes: sentence = to_sentence.index_to_question(path) questions.append(sentence) print(sentence) # plt.show() writer.add_result(image_ids[0], quest_ids[0], im_path, answer, questions) for quest_id, image_id, path in zip(quest_ids, image_ids, pathes): sentence = to_sentence.index_to_question(path) res_i = { 'image_id': int(image_id), 'question_id': int(quest_id), 'question': sentence } results.append(res_i) if i == 40: break writer.render() return save_json(res_file, results) return res_file
def var_vqa_decoding_beam_search(checkpoint_path=None, subset='kpval'): model_config = ModelConfig() res_file = 'result/quest_vaq_greedy_%s.json' % FLAGS.model_type.upper() # Get model model_fn = get_model_creation_fn(FLAGS.model_type) create_fn = create_reader('V7W-VarDS', phase='test') writer = ExperimentWriter('latex/v7w_%s' % FLAGS.model_type.lower()) # Create the vocabulary. to_sentence = SentenceGenerator( trainset='train', ans_vocab_file='data2/v7w_train_answer_word_counts.txt', quest_vocab_file='data2/v7w_train_question_word_counts.txt', top_ans_file='data2/v7w_train_top2000_answers.txt') # get data reader subset = 'val' reader = create_fn(batch_size=1, subset=subset, version=FLAGS.test_version) if checkpoint_path is None: ckpt_dir = FLAGS.checkpoint_dir % (FLAGS.trainset, FLAGS.model_type) # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/' ckpt = tf.train.get_checkpoint_state(ckpt_dir) checkpoint_path = ckpt.model_checkpoint_path # Build model g = tf.Graph() with g.as_default(): # Build the model. model = model_fn(model_config, 'sampling') model.build() # Restore from checkpoint restorer = Restorer(g) sess = tf.Session() restorer.restore(sess, checkpoint_path) num_batches = reader.num_batches print('Running beam search inference...') results = [] for i in range(num_batches): outputs = reader.get_test_batch() # pdb.set_trace() # inference images, quest, quest_len, ans, ans_len, quest_ids, image_ids = outputs scores, pathes = model.greedy_inference([images, quest, quest_len], sess) scores, pathes = post_process_prediction(scores, pathes) pathes, pathes_len = put_to_array(pathes) scores, pathes = find_unique_rows(scores, pathes) scores, pathes = post_process_prediction(scores, pathes[:, 1:]) # question = to_sentence.index_to_question(pathes[0]) # print('%d/%d: %s' % (i, num_batches, question)) # show image os.system('clear') image_id = image_ids[0] im_path = _get_vg_image_root(image_id) # im = imread(im_path) # plt.imshow(im) questions = extract_gt(quest, quest_len) question = to_sentence.index_to_question(questions[0]) print('Question: %s' % question) answers = extract_gt(ans, ans_len) answer = to_sentence.index_to_answer(answers[0]) # plt.title(answer) print('Answer: %s' % answer) answers = [] for path in pathes: sentence = to_sentence.index_to_answer(path) answers.append(sentence) print(sentence) # plt.show() qa = '%s - %s' % (question, answer) writer.add_result(image_ids[0], quest_ids[0], im_path, qa, answers) for quest_id, image_id, path in zip(quest_ids, image_ids, pathes): sentence = to_sentence.index_to_question(path) res_i = { 'image_id': int(image_id), 'question_id': int(quest_id), 'question': sentence } results.append(res_i) if i == 40: break writer.render() return