def _show_examples(arr, arr_len, _rewards, name): ps = _parse_gt_questions(arr, arr_len) print('\n%s:' % (name)) for p, r in zip(ps, _rewards): if p[-1] == 2: p = p[:-1] sent = env.to_sentence.index_to_question(p) print('%s (%d)' % (sent, r))
def _show_examples(arr, arr_len, name): _rewards = model.inference([arr, arr_len]) ps = _parse_gt_questions(arr, arr_len) print('\n%s:' % (name)) for p, r in zip(ps, _rewards): if p[-1] == 2: p = p[:-1] sent = _SENT.index_to_question(p) print('%s (%0.3f)' % (sent, r))
def __init__(self, batch_size, pad_token): self.batch_size = batch_size from util import load_hdf5 data_file = 'data/vqa_std_mscoco_kprestval.data' d = load_hdf5(data_file) gts = _parse_gt_questions(d['quest_arr'], d['quest_len']) gts = [_g + [2] for _g in gts] self._quest, self._quest_len = put_to_array(gts, pad_token, max_length=20) self.num = self._quest_len.size
def test_cst_reader(): reader = ContrastiveDataReader(batch_size=4) to_sentence = SentenceGenerator(trainset='trainval') reader.start() for i in range(4): images, quest, quest_len, top_ans, mask = reader.pop_batch() questions = _parse_gt_questions(quest, quest_len) print('\nBatch %d' % i) this_batch_size = images.shape[0] / 2 for idx in range(this_batch_size): print('Real: %s' % to_sentence.index_to_question(questions[idx])) print('Fake: %s\n' % to_sentence.index_to_question(questions[idx + this_batch_size])) print('Mask:') print(mask.astype(np.int32)) reader.stop()
def reinforce_trainstep(reader_outputs, model, env, sess, task_ops, _VQA_Belief): # reader_outputs = reader.pop_batch() # quest_ids, images, quest, quest_len, top_ans, ans, ans_len = reader_outputs # select the first image # idx = 0 # # def _reshape_array(v): # if type(v) == np.ndarray: # return v[np.newaxis, :] # else: # return np.reshape(v, (1,)) # # selected = [_reshape_array(v[idx]) for v in reader_outputs] res5c, images, quest, quest_len, top_ans, ans, ans_len, quest_ids, image_ids = reader_outputs # random sampling noise_vec, pathes, scores = model.random_sampling([images, ans, ans_len], sess) _this_batch_size = images.shape[0] scores, pathes, noise = post_process_variation_questions_noise( scores, pathes, noise_vec, _this_batch_size, find_unique=False) lm_inputs = wrap_samples_for_language_model(sampled=pathes, pad_token=model.pad_token - 1, gts=[quest, quest_len], max_length=20) def _show_examples(arr, arr_len, _rewards, name): ps = _parse_gt_questions(arr, arr_len) print('\n%s:' % (name)) for p, r in zip(ps, _rewards): if p[-1] == 2: p = p[:-1] sent = env.to_sentence.index_to_question(p) print('%s (%d)' % (sent, r)) # compute reward vqa_inputs = [images, res5c, ans, ans_len, top_ans] # lm_inputs = lm_inputs[:2] wrapped_sampled = lm_inputs[:2] rewards, rewards_all, is_gt, aug_data = env.get_reward( pathes, [quest, quest_len], [vqa_inputs, wrapped_sampled, scores, quest_ids]) max_path_arr, max_path_len, max_noise, max_rewards = \ prepare_reinforce_data(pathes, noise, rewards, pad_token=model.pad_token) vqa_scores = rewards_all[:, 0] language_scores = rewards_all[:, 2] # scores = vqa_scores * (language_scores > 0.5) scores = vqa_scores * (language_scores > env.language_thresh) new_pathes = _parse_gt_questions(max_path_arr, max_path_len) _VQA_Belief.insert(new_pathes, scores) # _show_examples(max_path_arr, max_path_len, is_gt, 'Sampled') # pdb.set_trace() aug_images, aug_ans, aug_ans_len, is_in_vocab = aug_data sess_in = [ aug_images, max_path_arr, max_path_len, aug_ans, aug_ans_len, max_noise, max_rewards, rewards_all ] sess_in = [_in[is_in_vocab] for _in in sess_in] # remove oov avg_reward = max_rewards.mean() # train op sess_outputs = sess.run(task_ops, feed_dict=model.fill_feed_dict(sess_in)) sess_outputs += [avg_reward, 'reward'] # update language model # print('Number GT: %d' % is_gt.sum()) # num_fake_in_batch = 80 - is_gt.sum() if False: # at least half is generated wrapped_gt = _Q_CTX.get_gt_batch(*lm_inputs[2:]) # random sample new corrected_inputs = correct_language_model_inputs( wrapped_sampled + wrapped_gt, is_gt) # num_fake = corrected_inputs[0].shape[0] # num_real = corrected_inputs[2].shape[0] # print('Num positive: %d, num negative %d' % (num_real, num_fake)) # _show_examples(corrected_inputs[0], corrected_inputs[1], np.zeros_like(corrected_inputs[1]), 'Fake') # _show_examples(corrected_inputs[2], corrected_inputs[3], np.zeros_like(corrected_inputs[3]), 'Real') # pdb.set_trace() if min(wrapped_sampled[1].size, wrapped_gt[1].size) > 0: env.lm.trainstep(corrected_inputs) # _VQA_Belief.vertify_vqa(env, vqa_inputs) return sess_outputs
def feed_train(train_op, train_dir, log_every_n_steps, graph, global_step, number_of_steps, init_fn, saver, reader=None, model=None, summary_op=None, env=None): summary_writer = None sess = tf.Session(graph=graph) summary_interval = 100 # prepare summary writer _write_summary = summary_op is not None if _write_summary: summary_dir = os.path.join(train_dir, 'summary') if not tf.gfile.IsDirectory(summary_dir): tf.logging.info("Creating summary directory: %s", summary_dir) tf.gfile.MakeDirs(summary_dir) summary_writer = tf.summary.FileWriter(summary_dir) # setup language model lm = env.lm lm.set_session(sess) # initialise training ckpt = tf.train.get_checkpoint_state(train_dir) sv_path = os.path.join(train_dir, 'model.ckpt') with graph.as_default(): init_op = tf.initialize_all_variables() sess.run(init_op) if ckpt is None: if init_fn is not None: init_fn(sess) lm.setup_model() else: ckpt_path = ckpt.model_checkpoint_path tf.logging.info('Restore from model %s' % os.path.basename(ckpt_path)) saver.restore(sess, ckpt_path) lm.setup_model() # build belief buffer _VQA_Belief = VQABelief() # customized training code for itr in range(number_of_steps): datum = reader.get_test_batch() quest_id = datum[-2][0] image_id = datum[-1][0] top_ans_id = datum[4][0] if top_ans_id == 2000: continue _, _, quest, quest_len, _, ans, ans_len, _, _ = datum question = env.to_sentence.index_to_question( _parse_gt_questions(quest, quest_len)[0]) answer = env.to_sentence.index_to_answer( _parse_gt_questions(ans, ans_len)[0]) im_file = '%s2014/COCO_%s2014_%012d.jpg' % ('val', 'val', image_id) im_path = os.path.join(IM_ROOT, im_file) print('Hacking question %d (%d/%d)...' % (quest_id, itr, number_of_steps)) head = 'Q: %s A: %s' % (question, answer) print(head) t = time.time() while True: task_ops = [train_op, global_step] total_loss, np_global_step, avg_reward, t_str = \ reinforce_trainstep(datum, model, env, sess, task_ops, _VQA_Belief) if _VQA_Belief.should_terminate(): break print('Hacking finished in %0.2fs' % (time.time() - t)) questions = _VQA_Belief.show_belief(env, quest_id) _WRITER.add_result(image_id, quest_id, im_path, head, questions) _VQA_Belief.clear() # reset model init_fn(sess) # Finish training # tf.logging.info('Finished training! Saving model to disk.') # saver.save(sess, sv_path, global_step=global_step) # Close # reader.stop() sess.close()
from readers.vqa_irrelevance_data_fetcher import AttentionDataReader as Reader from post_process_variation_questions import _parse_gt_questions from inference_utils.question_generator_util import SentenceGenerator reader = Reader(batch_size=10, subset='trainval', model_name='something', epsilon=0.5, feat_type='res5c', version='v1', counter_sampling=False) to_sentence = SentenceGenerator(trainset='trainval') reader.start() for i in range(5): print('--------- BATCH %d ---------' % i) res5c, quest, quest_len, labels = reader.pop_batch() pathes = _parse_gt_questions(quest, quest_len) for _p, lbl in zip(pathes, labels): print('%s %d' % (to_sentence.index_to_question(_p), lbl)) reader.stop()