Пример #1
0
 def _show_examples(arr, arr_len, _rewards, name):
     ps = _parse_gt_questions(arr, arr_len)
     print('\n%s:' % (name))
     for p, r in zip(ps, _rewards):
         if p[-1] == 2:
             p = p[:-1]
         sent = env.to_sentence.index_to_question(p)
         print('%s (%d)' % (sent, r))
 def _show_examples(arr, arr_len, name):
     _rewards = model.inference([arr, arr_len])
     ps = _parse_gt_questions(arr, arr_len)
     print('\n%s:' % (name))
     for p, r in zip(ps, _rewards):
         if p[-1] == 2:
             p = p[:-1]
         sent = _SENT.index_to_question(p)
         print('%s (%0.3f)' % (sent, r))
 def __init__(self, batch_size, pad_token):
     self.batch_size = batch_size
     from util import load_hdf5
     data_file = 'data/vqa_std_mscoco_kprestval.data'
     d = load_hdf5(data_file)
     gts = _parse_gt_questions(d['quest_arr'], d['quest_len'])
     gts = [_g + [2] for _g in gts]
     self._quest, self._quest_len = put_to_array(gts, pad_token,
                                                 max_length=20)
     self.num = self._quest_len.size
def test_cst_reader():
    reader = ContrastiveDataReader(batch_size=4)
    to_sentence = SentenceGenerator(trainset='trainval')

    reader.start()
    for i in range(4):
        images, quest, quest_len, top_ans, mask = reader.pop_batch()
        questions = _parse_gt_questions(quest, quest_len)
        print('\nBatch %d' % i)
        this_batch_size = images.shape[0] / 2
        for idx in range(this_batch_size):
            print('Real: %s' % to_sentence.index_to_question(questions[idx]))
            print('Fake: %s\n' % to_sentence.index_to_question(questions[idx + this_batch_size]))
        print('Mask:')
        print(mask.astype(np.int32))
    reader.stop()
Пример #5
0
def reinforce_trainstep(reader_outputs, model, env, sess, task_ops,
                        _VQA_Belief):
    # reader_outputs = reader.pop_batch()
    # quest_ids, images, quest, quest_len, top_ans, ans, ans_len = reader_outputs
    # select the first image
    # idx = 0
    #
    # def _reshape_array(v):
    #     if type(v) == np.ndarray:
    #         return v[np.newaxis, :]
    #     else:
    #         return np.reshape(v, (1,))
    #
    # selected = [_reshape_array(v[idx]) for v in reader_outputs]
    res5c, images, quest, quest_len, top_ans, ans, ans_len, quest_ids, image_ids = reader_outputs
    # random sampling
    noise_vec, pathes, scores = model.random_sampling([images, ans, ans_len],
                                                      sess)
    _this_batch_size = images.shape[0]
    scores, pathes, noise = post_process_variation_questions_noise(
        scores, pathes, noise_vec, _this_batch_size, find_unique=False)

    lm_inputs = wrap_samples_for_language_model(sampled=pathes,
                                                pad_token=model.pad_token - 1,
                                                gts=[quest, quest_len],
                                                max_length=20)

    def _show_examples(arr, arr_len, _rewards, name):
        ps = _parse_gt_questions(arr, arr_len)
        print('\n%s:' % (name))
        for p, r in zip(ps, _rewards):
            if p[-1] == 2:
                p = p[:-1]
            sent = env.to_sentence.index_to_question(p)
            print('%s (%d)' % (sent, r))

    # compute reward
    vqa_inputs = [images, res5c, ans, ans_len, top_ans]
    # lm_inputs = lm_inputs[:2]
    wrapped_sampled = lm_inputs[:2]
    rewards, rewards_all, is_gt, aug_data = env.get_reward(
        pathes, [quest, quest_len],
        [vqa_inputs, wrapped_sampled, scores, quest_ids])

    max_path_arr, max_path_len, max_noise, max_rewards = \
        prepare_reinforce_data(pathes, noise, rewards, pad_token=model.pad_token)

    vqa_scores = rewards_all[:, 0]
    language_scores = rewards_all[:, 2]
    # scores = vqa_scores * (language_scores > 0.5)
    scores = vqa_scores * (language_scores > env.language_thresh)
    new_pathes = _parse_gt_questions(max_path_arr, max_path_len)
    _VQA_Belief.insert(new_pathes, scores)

    # _show_examples(max_path_arr, max_path_len, is_gt, 'Sampled')
    # pdb.set_trace()

    aug_images, aug_ans, aug_ans_len, is_in_vocab = aug_data
    sess_in = [
        aug_images, max_path_arr, max_path_len, aug_ans, aug_ans_len,
        max_noise, max_rewards, rewards_all
    ]
    sess_in = [_in[is_in_vocab] for _in in sess_in]  # remove oov
    avg_reward = max_rewards.mean()

    # train op
    sess_outputs = sess.run(task_ops, feed_dict=model.fill_feed_dict(sess_in))
    sess_outputs += [avg_reward, 'reward']

    # update language model
    # print('Number GT: %d' % is_gt.sum())
    # num_fake_in_batch = 80 - is_gt.sum()
    if False:  # at least half is generated
        wrapped_gt = _Q_CTX.get_gt_batch(*lm_inputs[2:])  # random sample new
        corrected_inputs = correct_language_model_inputs(
            wrapped_sampled + wrapped_gt, is_gt)
        # num_fake = corrected_inputs[0].shape[0]
        # num_real = corrected_inputs[2].shape[0]
        # print('Num positive: %d, num negative %d' % (num_real, num_fake))

        # _show_examples(corrected_inputs[0], corrected_inputs[1], np.zeros_like(corrected_inputs[1]), 'Fake')
        # _show_examples(corrected_inputs[2], corrected_inputs[3], np.zeros_like(corrected_inputs[3]), 'Real')
        # pdb.set_trace()

        if min(wrapped_sampled[1].size, wrapped_gt[1].size) > 0:
            env.lm.trainstep(corrected_inputs)
    # _VQA_Belief.vertify_vqa(env, vqa_inputs)
    return sess_outputs
def feed_train(train_op,
               train_dir,
               log_every_n_steps,
               graph,
               global_step,
               number_of_steps,
               init_fn,
               saver,
               reader=None,
               model=None,
               summary_op=None,
               env=None):
    summary_writer = None
    sess = tf.Session(graph=graph)
    summary_interval = 100
    # prepare summary writer
    _write_summary = summary_op is not None
    if _write_summary:
        summary_dir = os.path.join(train_dir, 'summary')
        if not tf.gfile.IsDirectory(summary_dir):
            tf.logging.info("Creating summary directory: %s", summary_dir)
            tf.gfile.MakeDirs(summary_dir)
        summary_writer = tf.summary.FileWriter(summary_dir)
    # setup language model
    lm = env.lm
    lm.set_session(sess)
    # initialise training
    ckpt = tf.train.get_checkpoint_state(train_dir)
    sv_path = os.path.join(train_dir, 'model.ckpt')
    with graph.as_default():
        init_op = tf.initialize_all_variables()
    sess.run(init_op)
    if ckpt is None:
        if init_fn is not None:
            init_fn(sess)
        lm.setup_model()
    else:
        ckpt_path = ckpt.model_checkpoint_path
        tf.logging.info('Restore from model %s' % os.path.basename(ckpt_path))
        saver.restore(sess, ckpt_path)
        lm.setup_model()

    # build belief buffer
    _VQA_Belief = VQABelief()
    # customized training code
    for itr in range(number_of_steps):
        datum = reader.get_test_batch()
        quest_id = datum[-2][0]
        image_id = datum[-1][0]
        top_ans_id = datum[4][0]
        if top_ans_id == 2000:
            continue

        _, _, quest, quest_len, _, ans, ans_len, _, _ = datum
        question = env.to_sentence.index_to_question(
            _parse_gt_questions(quest, quest_len)[0])
        answer = env.to_sentence.index_to_answer(
            _parse_gt_questions(ans, ans_len)[0])
        im_file = '%s2014/COCO_%s2014_%012d.jpg' % ('val', 'val', image_id)
        im_path = os.path.join(IM_ROOT, im_file)

        print('Hacking question %d (%d/%d)...' %
              (quest_id, itr, number_of_steps))
        head = 'Q: %s A: %s' % (question, answer)
        print(head)
        t = time.time()
        while True:
            task_ops = [train_op, global_step]
            total_loss, np_global_step, avg_reward, t_str = \
                reinforce_trainstep(datum, model, env, sess, task_ops, _VQA_Belief)
            if _VQA_Belief.should_terminate():
                break
        print('Hacking finished in %0.2fs' % (time.time() - t))
        questions = _VQA_Belief.show_belief(env, quest_id)

        _WRITER.add_result(image_id, quest_id, im_path, head, questions)
        _VQA_Belief.clear()
        # reset model
        init_fn(sess)

    # Finish training
    # tf.logging.info('Finished training! Saving model to disk.')
    # saver.save(sess, sv_path, global_step=global_step)

    # Close
    # reader.stop()
    sess.close()
from readers.vqa_irrelevance_data_fetcher import AttentionDataReader as Reader
from post_process_variation_questions import _parse_gt_questions
from inference_utils.question_generator_util import SentenceGenerator

reader = Reader(batch_size=10,
                subset='trainval',
                model_name='something',
                epsilon=0.5,
                feat_type='res5c',
                version='v1',
                counter_sampling=False)

to_sentence = SentenceGenerator(trainset='trainval')

reader.start()

for i in range(5):
    print('--------- BATCH %d ---------' % i)
    res5c, quest, quest_len, labels = reader.pop_batch()
    pathes = _parse_gt_questions(quest, quest_len)
    for _p, lbl in zip(pathes, labels):
        print('%s %d' % (to_sentence.index_to_question(_p), lbl))

reader.stop()