def ivqa_decoding_beam_search(checkpoint_path=None, subset='kpval'):
    model_config = ModelConfig()
    res_file = 'result/quest_vaq_greedy_%s_%s.json' % (
        FLAGS.model_type.upper(), subset)
    # Get model
    model_fn = get_model_creation_fn(FLAGS.model_type)
    create_fn = create_reader(FLAGS.model_type, phase='test')

    # Create the vocabulary.
    to_sentence = SentenceGenerator(trainset='trainval')

    # get data reader
    reader = create_fn(batch_size=100,
                       subset=subset,
                       version=FLAGS.test_version)

    if checkpoint_path is None:
        ckpt_dir = FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type)
        # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/'
        ckpt = tf.train.get_checkpoint_state(ckpt_dir)
        checkpoint_path = ckpt.model_checkpoint_path

    # Build model
    g = tf.Graph()
    with g.as_default():
        # Build the model.
        model = model_fn(model_config, 'beam')
        model.build()
        # Restore from checkpoint
        restorer = Restorer(g)
        sess = tf.Session()
        restorer.restore(sess, checkpoint_path)

    num_batches = reader.num_batches

    print('Running beam search inference...')
    results = []
    for i in range(num_batches):
        outputs = reader.get_test_batch()

        # inference
        quest_ids, image_ids = outputs[-2:]
        scores, pathes = model.greedy_inference(outputs[:-2], sess)

        scores, pathes = post_process_prediction(scores, pathes)
        question = to_sentence.index_to_question(pathes[0])
        print('%d/%d: %s' % (i, num_batches, question))

        for quest_id, image_id, path in zip(quest_ids, image_ids, pathes):
            sentence = to_sentence.index_to_question(path)
            res_i = {
                'image_id': int(image_id),
                'question_id': int(quest_id),
                'question': sentence
            }
            results.append(res_i)

    save_json(res_file, results)
    return res_file
示例#2
0
def vaq_decoding_greedy(checkpoint_path=None, subset='kpval'):
    model_config = ModelConfig()
    res_file = 'result/quest_vaq_greedy_%s.json' % FLAGS.model_type.upper()

    # Get model
    model_fn = get_model_creation_fn(FLAGS.model_type)
    create_fn = create_reader(FLAGS.model_type, phase='test')
    # Create the vocabulary.
    to_sentence = SentenceGenerator(trainset='trainval')

    # build data reader
    reader = create_fn(batch_size=32, subset=subset)

    if checkpoint_path is None:
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir %
                                             FLAGS.model_type)
        checkpoint_path = ckpt.model_checkpoint_path

    g = tf.Graph()
    with g.as_default():
        # Build the model.
        model = model_fn(model_config, 'greedy')
        model.build()
        saver = tf.train.Saver()

        sess = tf.Session()
        tf.logging.info('Restore from model %s' %
                        os.path.basename(checkpoint_path))
        saver.restore(sess, checkpoint_path)

    num_batches = reader.num_batches

    print('Running greedy inference...')
    results = []
    for i in range(num_batches):
        outputs = reader.get_test_batch()

        # inference
        quest_ids, image_ids = outputs[-2:]
        scores, pathes = model.greedy_inference(outputs[:-2], sess)

        scores, pathes = post_process_prediction(scores, pathes)
        question = to_sentence.index_to_question(pathes[0])
        print('%d/%d: %s' % (i, num_batches, question))

        for quest_id, image_id, path in zip(quest_ids, image_ids, pathes):
            sentence = to_sentence.index_to_question(path)
            res_i = {
                'image_id': int(image_id),
                'question_id': int(quest_id),
                'question': sentence
            }
            results.append(res_i)

    save_json(res_file, results)
    return res_file
def sample_cst_questions(checkpoint_path=None, subset='kptrain'):
    model_config = ModelConfig()
    model_config.convert = FLAGS.convert
    model_config.loss_type = 'pairwise'
    model_config.top_k = 3
    batch_size = 8
    # Get model
    create_fn = create_reader(FLAGS.model_type, phase='test')

    # Create the vocabulary.
    to_sentence = SentenceGenerator(trainset='trainval')

    # get data reader
    reader = create_fn(batch_size=batch_size,
                       subset=subset,
                       version=FLAGS.test_version)

    # Build model
    g = tf.Graph()
    with g.as_default():
        # Build the model.
        model = ContrastQuestionSampler(model_config)
        model.build()
        # Restore from checkpoint
        restorer = Restorer(g)
        sess = tf.Session()
        restorer.restore(sess, checkpoint_path)

    num_batches = reader.num_batches

    print('Running beam search inference...')

    for i in range(num_batches):
        outputs = reader.get_test_batch()

        # inference
        quest_ids, image_ids = outputs[-2:]
        c_ans, c_ans_len, pathes, scores = model.greedy_inference(
            outputs[:-2], sess)
        scores, pathes = post_process_prediction(scores, pathes)

        k = 3
        capt, capt_len = outputs[2:4]

        gt = capt[0, :capt_len[0]]
        print('gt: %s [%s]' %
              (to_sentence.index_to_question(gt),
               to_sentence.index_to_answer(c_ans[0, :c_ans_len[0]])))
        for ix in range(k):
            question = to_sentence.index_to_question(pathes[ix])
            answer = to_sentence.index_to_answer(c_ans[ix, :c_ans_len[ix]])
            print('%s %d: %s [%s]' %
                  ('pre' if ix == 0 else 'cst', ix, question, answer))
        import pdb
        pdb.set_trace()
def visualise():
    mc_ctx = MultiChoiceQuestionManger()
    to_sentence = SentenceGenerator(trainset='trainval')
    # writer = ExperimentWriter('latex/examples_replay_buffer_rescore')
    writer = ExperimentWriter('latex/examples_replay_buffer_rescore_prior')
    # d = load_json('vqa_replay_buffer/vqa_replay_low_rescore.json')
    d = load_json('vqa_replay_buffer/vqa_replay_low_rescore_prior_05_04.json')
    memory = d['memory']
    # show random 100
    keys = deepcopy(memory.keys())
    np.random.seed(123)
    np.random.shuffle(keys)
    vis_keys = keys[:100]
    for i, quest_key in enumerate(vis_keys):
        pathes = memory[quest_key]
        if len(pathes) == 0:
            continue
        # if it has valid questions
        quest_id = int(quest_key)
        image_id = mc_ctx.get_image_id(quest_id)
        gt_question = mc_ctx.get_question(quest_id)
        answer = mc_ctx.get_gt_answer(quest_id)
        head = 'Q: %s A: %s' % (gt_question, answer)
        im_file = '%s2014/COCO_%s2014_%012d.jpg' % ('val', 'val', image_id)
        im_path = os.path.join(IM_ROOT, im_file)
        questions = []
        for p in pathes.keys():
            conf1, conf2 = pathes[p]
            _tokens = [int(t) for t in p.split(' ')]
            sentence = to_sentence.index_to_question(_tokens)
            descr = '%s (%0.2f-%0.2f)' % (sentence, conf1, conf2)
            questions.append(descr)
        writer.add_result(image_id, quest_id, im_path, head, questions)
    writer.render()
def test_cst_reader():
    reader = ContrastiveDataReader(batch_size=4)
    to_sentence = SentenceGenerator(trainset='trainval')

    reader.start()
    for i in range(4):
        images, quest, quest_len, top_ans, mask = reader.pop_batch()
        questions = _parse_gt_questions(quest, quest_len)
        print('\nBatch %d' % i)
        this_batch_size = images.shape[0] / 2
        for idx in range(this_batch_size):
            print('Real: %s' % to_sentence.index_to_question(questions[idx]))
            print('Fake: %s\n' % to_sentence.index_to_question(questions[idx + this_batch_size]))
        print('Mask:')
        print(mask.astype(np.int32))
    reader.stop()
示例#6
0
def test():
    from util import unpickle
    import json
    from inference_utils.question_generator_util import SentenceGenerator
    from w2v_answer_encoder import MultiChoiceQuestionManger

    config = MLPConfig()
    model = SequenceMLP(config, phase='test')
    model.build()
    prob = model.prob

    # Load vocabulary
    to_sentence = SentenceGenerator(trainset='trainval')
    # create multiple choice question manger
    mc_manager = MultiChoiceQuestionManger(subset='trainval',
                                           answer_coding='sequence')

    sess = tf.Session()
    # Load model
    ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir)
    checkpoint_path = ckpt.model_checkpoint_path
    saver = tf.train.Saver()
    saver.restore(sess, checkpoint_path)

    # get data
    result = []
    dataset = unpickle('data/rescore_dev.pkl')
    for itr, datum in enumerate(dataset):
        seq_index, att_mask, label = _process_datum(datum)
        quest_id = datum['quest_id']
        quest = seq_index[0].tolist()
        feed_dict = model.fill_feed_dict([seq_index, att_mask])
        scores = sess.run(prob, feed_dict=feed_dict)
        idx = scores.argmax()
        # parse question and answer
        question = to_sentence.index_to_question([0] + quest)
        mc_ans = mc_manager.get_candidate_answers(quest_id)
        vaq_answer = mc_ans[idx]
        real_answer = mc_ans[label.argmax()]
        # add result
        result.append({u'answer': vaq_answer, u'question_id': quest_id})
        # show results
        if itr % 100 == 0:
            print('============== %d ============' % itr)
            print('question id: %d' % quest_id)
            print('question\t: %s' % question)
            print('answer\t: %s' % real_answer)
            print('VAQ answer\t: %s (%0.2f)' % (vaq_answer, scores[idx]))

    quest_ids = [res[u'question_id'] for res in result]
    # save results
    tf.logging.info('Saving results')
    res_file = 'result/rescore_dev_dev.json'
    json.dump(result, open(res_file, 'w'))
    from vqa_eval import evaluate_model
    acc = evaluate_model(res_file, quest_ids)
    print('Over all accuarcy: %0.2f' % acc)
def main(_):
    # Build the inference graph.
    config = QuestionGeneratorConfig()
    reader = TFRecordDataFetcher(FLAGS.input_files, config.image_feature_key)

    g = tf.Graph()
    ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
    checkpoint_path = ckpt.model_checkpoint_path
    print(checkpoint_path)
    with g.as_default():
        model = QuestionGenerator(config, phase='evaluate')
        model.build()
    # g.finalize()

    # Create the vocabulary.
    to_sentence = SentenceGenerator(trainset=FLAGS.model_trainset)

    filenames = []
    for file_pattern in FLAGS.input_files.split(","):
        filenames.extend(tf.gfile.Glob(file_pattern))
    tf.logging.set_verbosity(tf.logging.INFO)
    tf.logging.info("Running caption generation on %d files matching %s",
                    len(filenames), FLAGS.input_files)

    with tf.Session(graph=g) as sess:
        # Load the model from checkpoint.
        saver = tf.train.Saver(var_list=tf.all_variables())
        saver.restore(sess, checkpoint_path)

        itr = 0
        while not reader.eof():
            outputs = reader.pop_batch()
            im_ids, quest_id, im_feat, ans_w2v, quest_ids, ans_ids = outputs
            inputs = post_processing_data(outputs)
            perplexity = sess.run(model.likelihood,
                                  feed_dict=model.fill_feed_dict(inputs))

            # generated = [generated[0]]  # sample 3
            question = to_sentence.index_to_question(quest_ids)
            answer = to_sentence.index_to_answer(ans_ids)

            print('============== %d ============' % itr)
            print('image id: %d, question id: %d' % (im_ids, quest_id))
            print('question\t: %s' % question)
            elems = question.split(' ')
            tmp = ' '.join([
                '%s (%0.2f)' % (w, p)
                for w, p in zip(elems, perplexity.flatten())
            ][:-1])
            print('question\t' + tmp)
            print('answer\t: %s' % answer)
            print('perplexity\t: %0.2f\n' % perplexity.mean())

            itr += 1
def test_rerank_reader():
    reader = RetrievalDataReader(batch_size=1, n_contrast=10, subset='train')
    reader.start()
    outputs = reader.pop_batch()
    im_feat, quest_arr, quest_len, ans_arr, ans_len = outputs
    from inference_utils.question_generator_util import SentenceGenerator
    to_sentence = SentenceGenerator(
        trainset='trainval',
        ans_vocab_file='data/vqa_trainval_question_answer_word_counts.txt',
        quest_vocab_file='data/vqa_trainval_question_answer_word_counts.txt')
    for q_seq, q_len, a_seq, a_len in zip(quest_arr, quest_len, ans_arr,
                                          ans_len):
        q_ = np.array([0] + q_seq[:q_len].tolist() + [0])
        a_ = np.array([0] + a_seq[:a_len].tolist() + [0])
        q = to_sentence.index_to_question(q_)
        a = to_sentence.index_to_answer(a_)
        print('Q: %s' % q)
        print('A: %s\n' % a)
    reader.stop()
def main():
    # params
    k = 80
    res_file = 'result/quest_vaq_nn.json'
    # sentence generator
    to_sentence = SentenceGenerator(trainset='trainval')
    # load distances
    val_qids, nn_ids = load_image_nn()
    # create nn model
    nn_model = QuestionPool()
    num = len(val_qids)
    results = []
    for i, (v_qid, v_nn) in enumerate(zip(val_qids, nn_ids)):
        # run nn search
        t = time()
        tr_qid, tr_path = nn_model.get_candidates(v_nn[:k])
        sent = to_sentence.index_to_question(tr_path)
        print(sent)
        print('Processing %d/%d, time %0.2f sec.' % (i, num, time() - t))
        res_i = {'question_id': int(v_qid), 'question': sent}
        results.append(res_i)

    save_json(res_file, results)
    cider = evaluate_question(res_file, subset='kpval', version='v1')
def ivqa_decoding_beam_search(checkpoint_path=None, subset='kptest'):
    model_config = ModelConfig()
    res_file = 'result/aug_var_vaq_kl0_greedy_%s.json' % FLAGS.model_type.upper(
    )
    # Get model
    model_fn = get_model_creation_fn(FLAGS.model_type)
    create_fn = create_reader('VAQ-Var', phase='test')

    # Create the vocabulary.
    to_sentence = SentenceGenerator(trainset='trainval')

    # get data reader
    reader = create_fn(batch_size=1, subset=subset, version=FLAGS.test_version)

    if checkpoint_path is None:
        ckpt_dir = FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type)
        # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/'
        ckpt = tf.train.get_checkpoint_state(ckpt_dir)
        checkpoint_path = ckpt.model_checkpoint_path

    # Build model
    g = tf.Graph()
    with g.as_default():
        # Build the model.
        model = model_fn(model_config, 'sampling_beam')
        model.build()
        # Restore from checkpoint
        restorer = Restorer(g)
        sess = tf.Session()
        restorer.restore(sess, checkpoint_path)

    num_batches = reader.num_batches

    print('Running beam search inference...')
    results = []
    for i in range(num_batches):
        print('iter: %d/%d' % (i, num_batches))
        outputs = reader.get_test_batch()

        # inference
        quest_ids, image_ids = outputs[-2:]
        scores, pathes = model.greedy_inference(outputs[:-2], sess)
        scores = np.tile(scores[:, np.newaxis], [1, pathes.shape[1]])
        # scores, pathes = post_process_prediction(scores, pathes)

        _ntot = len(pathes)
        scores, pathes, ivqa_counts = post_process_variation_questions_with_count(
            scores, pathes, 1)

        question_id = int(quest_ids[0])
        image_id = image_ids[0]

        print('%d/%d' % (len(pathes[0]), _ntot))
        for _p_idx, (path, sc) in enumerate(zip(pathes[0], scores[0])):
            sentence = to_sentence.index_to_question(path)
            aug_quest_id = question_id * 1000 + _p_idx
            # res_i = {'image_id': int(image_id),
            #          'question_id': aug_quest_id,
            #          'question': sentence}
            res_i = {
                'image_id': int(image_id),
                'question_id': aug_quest_id,
                'question': sentence,
                'question_inds': path,
                'counts': len(pathes),
                'probs': float(sc)
            }
            results.append(res_i)

    save_json(res_file, results)
    return res_file
示例#11
0
def ivqa_decoding_beam_search(checkpoint_path=None, subset=FLAGS.subset):
    model_config = ModelConfig()
    _model_suffix = 'var_' if FLAGS.use_var else ''
    res_file = 'data_rl/%sivqa_%s_questions.json' % (_model_suffix,
                                                     FLAGS.subset)
    # Get model
    model_fn = get_model_creation_fn('VAQ-Var')
    create_fn = create_reader('VAQ-Var', phase='test')

    # Create the vocabulary.
    to_sentence = SentenceGenerator(trainset='trainval')

    # get data reader
    batch_size = 64
    reader = create_fn(batch_size=batch_size,
                       subset=subset,
                       version=FLAGS.test_version)

    if checkpoint_path is None:
        if FLAGS.use_var:  # variational models
            ckpt_dir = FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type)
        else:  # standard models
            ckpt_dir = FLAGS.checkpoint_dir % ('kprestval', FLAGS.model_type)
        # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/'
        ckpt = tf.train.get_checkpoint_state(ckpt_dir)
        checkpoint_path = ckpt.model_checkpoint_path

    mode = 'sampling' if FLAGS.use_var else 'beam'

    # Build model
    g = tf.Graph()
    with g.as_default():
        # Build the model.
        model = model_fn(model_config, mode)
        model.build()
        # Restore from checkpoint
        restorer = Restorer(g)
        sess = tf.Session()
        restorer.restore(sess, checkpoint_path)

    num_batches = reader.num_batches

    print('Running beam search inference...')
    results = []
    extend_questions = []
    extended_question_ids = []
    for i in range(num_batches):
        print('iter: %d/%d' % (i, num_batches))
        outputs = reader.get_test_batch()

        # inference
        quest_ids, image_ids = outputs[-2:]
        scores, pathes = model.greedy_inference(outputs[:-2], sess)
        scores, pathes = post_process_prediction(scores,
                                                 pathes,
                                                 add_start_end=False)

        # process for each sample
        _this_batch_size = quest_ids.shape[0]
        num_sampled = int(len(pathes) / _this_batch_size)
        _noise_offset = np.arange(0, num_sampled,
                                  dtype=np.int32) * _this_batch_size
        for _s_id in range(_this_batch_size):
            _index = _noise_offset + _s_id
            try:
                cur_scores = [scores[_idx] for _idx in _index]
                cur_pathes = [pathes[_idx] for _idx in _index]
            except Exception, e:
                print(str(e))
                pdb.set_trace()

            cur_scores, cur_pathes = find_unique_pathes(cur_scores, cur_pathes)
            question_id = int(quest_ids[_s_id])
            image_id = image_ids[_s_id]

            for _pid, path in enumerate(cur_pathes):
                sentence = to_sentence.index_to_question(path)
                extended_question_ids.append([question_id, _pid])
                aug_quest_id = question_id * 1000 + _pid
                res_i = {
                    'image_id': int(image_id),
                    'question_id': aug_quest_id,
                    'question': sentence
                }
                results.append(res_i)
            extend_questions += cur_pathes
示例#12
0
def ivqa_decoding_beam_search(checkpoint_path=None, subset='kptest'):
    model_config = ModelConfig()
    res_file = 'result/var_vaq_beam_%s_%s.json' % (FLAGS.model_type.upper(),
                                                   FLAGS.mode)
    # Get model
    model_fn = get_model_creation_fn(FLAGS.model_type)
    create_fn = create_reader(FLAGS.model_type, phase='test')

    # Create the vocabulary.
    to_sentence = SentenceGenerator(trainset='trainval')

    # get data reader
    reader = create_fn(batch_size=50,
                       subset=subset,
                       version=FLAGS.test_version)

    if checkpoint_path is None:
        ckpt_dir = FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type)
        # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/'
        ckpt = tf.train.get_checkpoint_state(ckpt_dir)
        checkpoint_path = ckpt.model_checkpoint_path

    # Build model
    g = tf.Graph()
    with g.as_default():
        # Build the model.
        model = model_fn(model_config, 'sampling_beam')
        model.build()
        # Restore from checkpoint
        restorer = Restorer(g)
        sess = tf.Session()
        restorer.restore(sess, checkpoint_path)

    num_batches = reader.num_batches

    print('Running beam search inference...')
    results = []
    for i in range(num_batches):
        print('iter: %d/%d' % (i, num_batches))
        # if i >= 10:
        #     break
        outputs = reader.get_test_batch()

        # inference
        quest_ids, image_ids = outputs[-2:]
        scores, pathes = model.greedy_inference(outputs[:-2], sess)

        # wrap inputs
        _this_batch_size = quest_ids.size
        seq_len = pathes.shape[1]
        dummy_scores = np.tile(scores[:, np.newaxis], [1, seq_len])
        # dummy_scores = np.zeros_like(pathes, dtype=np.float32)
        ivqa_scores, ivqa_pathes, ivqa_counts = post_process_variation_questions_with_count(
            dummy_scores, pathes, _this_batch_size)
        # scores, pathes = convert_to_unique_questions(scores, pathes)

        for _q_idx, (ps, scs, cs) in enumerate(
                zip(ivqa_pathes, ivqa_scores, ivqa_counts)):
            image_id = image_ids[_q_idx]
            question_id = int(quest_ids[_q_idx])
            if FLAGS.mode == 'full':
                for _p_idx, p in enumerate(ps):
                    sentence = to_sentence.index_to_question(p)
                    aug_quest_id = question_id * 1000 + _p_idx
                    res_i = {
                        'image_id': int(image_id),
                        'question_id': aug_quest_id,
                        'question': sentence
                    }
                    results.append(res_i)
            else:
                p = pick_question(scs, ps, cs)
                sentence = to_sentence.index_to_question(p)
                # print(sentence)
                res_i = {
                    'image_id': int(image_id),
                    'question_id': question_id,
                    'question': sentence
                }
                results.append(res_i)

    save_json(res_file, results)
    return res_file
示例#13
0
def test(checkpoint_path=None):
    subset = 'kptest'
    config = ModelConfig()
    config.phase = 'other'
    use_answer_type = FLAGS.model_type in ['VAQ-IAS', 'VQG']
    config.model_type = FLAGS.model_type
    mc_ctx = MultiChoiceQuestionManger(subset='val')

    # build data reader
    reader = Reader(batch_size=1,
                    subset=subset,
                    output_attr=True,
                    output_im=False,
                    output_qa=True,
                    output_capt=False,
                    output_ans_seq=True,
                    attr_type='res152')
    if checkpoint_path is None:
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir %
                                             FLAGS.model_type)
        checkpoint_path = ckpt.model_checkpoint_path

    res_file = 'result/quest_vaq_%s_%s.json' % (FLAGS.model_type.upper(),
                                                subset)
    print(res_file)
    # build and restore model
    model = load_model_inferencer()
    restore_fn = model.build_graph_from_config(config, checkpoint_path)

    sess = tf.Session(graph=tf.get_default_graph())
    tf.logging.info('Restore from model %s' %
                    os.path.basename(checkpoint_path))
    restore_fn(sess)

    # Create the vocabulary.
    to_sentence = SentenceGenerator(trainset=FLAGS.model_trainset)
    generator = caption_generator.CaptionGenerator(model,
                                                   to_sentence.question_vocab)

    results = []
    print('Running inference on split %s...' % subset)
    num_batches = reader.num_batches
    for i in range(num_batches):
        inputs, info, quest_gt_vis = pre_process_inputs(
            reader.get_test_batch(), mc_ctx, use_answer_type)
        quest_id, image_id = info
        captions = generator.beam_search(sess, inputs)
        question = to_sentence.index_to_question(quest_gt_vis)
        # answer = to_sentence.index_to_top_answer(ans_feed)
        print('============== %d ============' % i)
        print('image id: %d, question id: %d' % (image_id, quest_id))
        print('question\t: %s' % question)
        tmp = []
        for c, g in enumerate(captions[0:3]):
            quest = to_sentence.index_to_question(g.sentence)
            tmp.append(quest)
            print('<question %d>\t: %s' % (c, quest))
        # print('answer\t: %s\n' % answer)

        caption = captions[0]
        sentence = to_sentence.index_to_question(caption.sentence)
        res_i = {
            'image_id': image_id,
            'question_id': quest_id,
            'question': sentence
        }
        results.append(res_i)
    save_json(res_file, results)
    return res_file
示例#14
0
def test(T=3.0, num_cands=10):
    # Build the inference graph.
    cand_file = 'result/vqa_cands.json'
    config = QuestionGeneratorConfig()
    reader = TFRecordDataFetcher(FLAGS.input_files,
                                 config.image_feature_key)

    # Create model creator
    model_creator = create_model_fn(FLAGS.model_type)

    # create multiple choice question manger
    oe_manager = CandidateAnswerManager(cand_file, max_num_cands=10)

    # Create reader post-processing function
    reader_post_proc_fn = build_mc_reader_proc_fn(model_creator.ans_coding)

    g = tf.Graph()
    ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
    checkpoint_path = ckpt.model_checkpoint_path
    print(checkpoint_path)
    with g.as_default():
        model = model_creator(config, phase='evaluate')
        model.build()

    # Create the vocabulary.
    to_sentence = SentenceGenerator(trainset=FLAGS.model_trainset)

    filenames = []
    for file_pattern in FLAGS.input_files.split(","):
        filenames.extend(tf.gfile.Glob(file_pattern))
    tf.logging.set_verbosity(tf.logging.INFO)
    tf.logging.info("Running caption generation on %d files matching %s",
                    len(filenames), FLAGS.input_files)

    result = []
    with tf.Session(graph=g) as sess:
        # Load the model from checkpoint.
        saver = tf.train.Saver(var_list=tf.all_variables())
        saver.restore(sess, checkpoint_path)

        itr = 0
        while not reader.eof():
            outputs = reader.pop_batch()
            im_ids, quest_id, im_feat, ans_w2v, quest_ids, ans_ids = outputs
            oe_ans, oe_coding, scores = oe_manager.get_answer_sequence(quest_id)
            inputs = reader_post_proc_fn(outputs, oe_coding)
            perplexity, state = sess.run([model.likelihood, model.final_decoder_state],
                                         feed_dict=model.fill_feed_dict(inputs))
            perplexity = perplexity.reshape(inputs[-1].shape)
            loss = perplexity[:, :-1].mean(axis=1)
            weight = np.exp(-loss * T)
            weight = weight / weight.sum()  # l1 normalise
            score = scores * weight
            score = score[:num_cands]

            question = to_sentence.index_to_question(quest_ids)
            answer = to_sentence.index_to_answer(ans_ids)
            top1_ans = oe_ans[score.argmax()]
            result.append({u'answer': top1_ans, u'question_id': quest_id})

            if itr % 100 == 0:
                print('============== %d ============' % itr)
                print('image id: %d, question id: %d' % (im_ids, quest_id))
                print('question\t: %s' % question)
                print('answer\t: %s' % answer)
                top_k_ids = (-score).argsort()[:3].tolist()
                print('VQA answer\t: %s' % oe_ans[0])
                for i, idx in enumerate(top_k_ids):
                    t_mc_ans = oe_ans[idx]
                    print('VAQ answer <%d>\t: %s (%0.2f)' % (i, t_mc_ans, weight[idx]))

            itr += 1

        quest_ids = [res[u'question_id'] for res in result]
        # save results
        tf.logging.info('Saving results')
        res_file = FLAGS.result_file % get_model_iteration(checkpoint_path)
        json.dump(result, open(res_file, 'w'))
        return res_file, quest_ids
from readers.vqa_irrelevance_data_fetcher import AttentionDataReader as Reader
from post_process_variation_questions import _parse_gt_questions
from inference_utils.question_generator_util import SentenceGenerator

reader = Reader(batch_size=10,
                subset='trainval',
                model_name='something',
                epsilon=0.5,
                feat_type='res5c',
                version='v1',
                counter_sampling=False)

to_sentence = SentenceGenerator(trainset='trainval')

reader.start()

for i in range(5):
    print('--------- BATCH %d ---------' % i)
    res5c, quest, quest_len, labels = reader.pop_batch()
    pathes = _parse_gt_questions(quest, quest_len)
    for _p, lbl in zip(pathes, labels):
        print('%s %d' % (to_sentence.index_to_question(_p), lbl))

reader.stop()
示例#16
0
def ivqa_decoding_beam_search(checkpoint_path=None, subset='kpval'):
    model_config = ModelConfig()
    res_file = 'result/quest_vaq_greedy_%s.json' % FLAGS.model_type.upper()
    # Get model
    # model_fn = get_model_creation_fn(FLAGS.model_type)
    create_fn = create_reader(FLAGS.model_type, phase='test')

    # Create the vocabulary.
    to_sentence = SentenceGenerator(trainset='trainval')

    # get data reader
    reader = create_fn(batch_size=80,
                       subset=subset,
                       version=FLAGS.test_version)

    # if checkpoint_path is None:
    #     ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir %
    #                                          (FLAGS.version,
    #                                           FLAGS.model_type))
    #     checkpoint_path = ckpt.model_checkpoint_path

    # Build model
    # g = tf.Graph()
    # with g.as_default():
    #     # Build the model.
    #     model = model_fn(model_config, 'beam')
    #     model.build()
    #     # Restore from checkpoint
    #     restorer = Restorer(g)
    #     sess = tf.Session()
    #     restorer.restore(sess, checkpoint_path)

    num_batches = reader.num_batches

    print('Running beam search inference...')
    results = []
    for i in range(num_batches):
        outputs = reader.get_test_batch()

        # inference
        # quest_ids, image_ids = outputs[-2:]
        # scores, pathes = model.greedy_inference(outputs[:-2], sess)
        im, capt, capt_len, ans_seq, ans_seq_len, quest_ids, image_ids = outputs

        _, res, res_len, _, _, _, _, = reader.get_test_batch()

        pathes = parse_gt_questions(capt, capt_len)
        question = to_sentence.index_to_question(pathes[0])
        gts = [to_sentence.index_to_question(q) for q in pathes]
        gts_token = [' '.join([str(t) for t in path]) for path in pathes]

        respathes = parse_gt_questions(res, res_len)
        res = [to_sentence.index_to_question(q) for q in respathes]
        res_token = [' '.join([str(t) for t in path]) for path in respathes]

        scores = compute_cider_token_1vsall(quest_ids, res_token)
        import pdb
        pdb.set_trace()

        # gts_token = []
        # # compute_cider(quest_ids, gts, res)
        # compute_cider_token(quest_ids, gts_token, res_token)
        # import pdb
        # pdb.set_trace()
        # print('%d/%d: %s' % (i, num_batches, question))
        #
        # for quest_id, image_id, path in zip(quest_ids, image_ids, pathes):
        #     sentence = to_sentence.index_to_question(path)
        #     res_i = {'image_id': int(image_id), 'question_id': int(quest_id), 'question': sentence}
        #     results.append(res_i)

    save_json(res_file, results)
    return res_file
class RerankAnalysiser(object):
    def __init__(self):
        self.labels = []
        self.rerank_preds = []
        self.vqa_top_scores = []
        self.vqa_top_preds = []
        self.vqa_cands = []
        self.to_sentence = SentenceGenerator(trainset='trainval')
        self.file_stream = open('result/rerank_analysis.txt', 'w')

    def update(self, reader_outputs, model_prediction):
        _, _, quest, quest_len, label, _, _, quest_id, image_id = reader_outputs
        score, reranked, vqa_cands, vqa_scores = model_prediction
        # save vqa predictions
        self.vqa_top_preds.append(vqa_cands[:, 0])
        self.vqa_top_scores.append(vqa_scores[:, 0])
        self.vqa_cands.append(vqa_cands)
        # save ivqa predictions
        self.rerank_preds.append(reranked)
        self.labels.append(label)
        self.update_log(quest, quest_len, vqa_cands, vqa_scores, reranked,
                        label, image_id, quest_id)

    def update_log(self, quest, quest_len, vqa_cands, vqa_scores, rerank,
                   label, image_id, quest_id):
        idx = nr.randint(len(quest))
        quest_seq = quest[idx][:quest_len[idx]]
        _log = '-------- image_id: %d, quest_id: %d --------\n' % (
            image_id[idx], quest_id[idx])
        self.file_stream.write(_log)
        question = self.to_sentence.index_to_question(quest_seq)
        gt_label = label[idx]
        answer = self.to_sentence.index_to_top_answer(
            label[idx]) if gt_label < 2000 else 'UNK'
        _log = 'Q: %s\n' % question
        self.file_stream.write(_log)
        _log = 'A: %s\n' % answer
        self.file_stream.write(_log)

        r_id = rerank[idx]
        for i, (c_id,
                c_score) in enumerate(zip(vqa_cands[idx], vqa_scores[idx])):
            cand_answer = self.to_sentence.index_to_top_answer(c_id)
            if c_id == r_id:
                _log = '[%d]: %s (%0.2f)\t<<\n' % (i, cand_answer, c_score)
            else:
                _log = '[%d]: %s (%0.2f)\n' % (i, cand_answer, c_score)
            self.file_stream.write(_log)

    def refine_prediction(self, thresh=0.2):
        rep_tab = self.vqa_top_scores < thresh
        preds = self.vqa_top_preds.copy()
        preds[rep_tab] = self.rerank_preds[rep_tab]
        return preds

    def compute_accuracy(self):
        self.vqa_cands = np.concatenate(self.vqa_cands)
        self.vqa_top_preds = np.concatenate(self.vqa_top_preds)
        self.vqa_top_scores = np.concatenate(self.vqa_top_scores)
        self.labels = np.concatenate(self.labels)
        self.rerank_preds = np.concatenate(self.rerank_preds)

        valid_tab = self.labels < 2000

        def _get_num_col(x):
            if len(x.shape) == 1:
                return 1
            else:
                return x.shape[1]

        def compute_recall(preds, cond_tab=None):
            top_k = _get_num_col(preds)
            preds = preds.reshape([-1, top_k])
            num = preds.shape[0]
            match = np.zeros(num)
            for k in range(top_k):
                pred = preds[:, k]
                match += np.equal(pred, self.labels)
            correct = np.greater(match, 0)
            if cond_tab is None:
                cond_tab = valid_tab
            else:
                cond_tab = np.logical_and(valid_tab, cond_tab)

            valid_correct = correct[cond_tab]
            acc = valid_correct.sum() / float(valid_correct.size)
            prop = cond_tab.sum() / float(valid_tab.sum())
            return acc * 100, prop * 100

        print('\n')
        print('VQA acc@1: %0.2f [%0.1f%%]' %
              compute_recall(self.vqa_top_preds))
        print('VQA acc@3: %0.2f [%0.1f%%]' % compute_recall(self.vqa_cands))
        print('iVQA acc@1: %0.2f [%0.1f%%]' %
              compute_recall(self.rerank_preds))
        print('VQA and iVQA acc@1: %0.2f [%0.1f%%]' %
              compute_recall(self.vqa_top_preds,
                             np.equal(self.vqa_top_preds, self.rerank_preds)))
        thresh = np.arange(0.1, 1, 0.1, np.float32)
        for t in thresh:
            acc, p = compute_recall(self.vqa_top_preds,
                                    np.greater(self.vqa_top_scores, t))
            print('VQA acc@1 [t=%0.1f]: %0.2f [%0.1f%%]' % (t, acc, p))

        print('\nRefine:')
        thresh = np.arange(0.05, 1, 0.05, np.float32)
        for t in thresh:
            acc, p = compute_recall(self.refine_prediction(t))
            print('Refine VQA acc@1 [t=%0.2f]: %0.2f [%0.1f%%]' % (t, acc, p))

    def close(self):
        self.file_stream.close()
示例#18
0
def test():
    # Build the inference graph.
    config = QuestionGeneratorConfig()
    reader = TFRecordDataFetcher(FLAGS.input_files, config.image_feature_key)

    # Create model creator
    model_creator = create_model_fn(FLAGS.model_type)

    # create multiple choice question manger
    mc_manager = MultiChoiceQuestionManger(
        subset='trainval', answer_coding=model_creator.ans_coding)

    # Create reader post-processing function
    reader_post_proc_fn = build_mc_reader_proc_fn(model_creator.ans_coding)

    g = tf.Graph()
    ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
    checkpoint_path = ckpt.model_checkpoint_path
    print(checkpoint_path)
    with g.as_default():
        model = model_creator(config, phase='evaluate')
        model.build()
    # g.finalize()

    # Create the vocabulary.
    to_sentence = SentenceGenerator(trainset=FLAGS.model_trainset)

    filenames = []
    for file_pattern in FLAGS.input_files.split(","):
        filenames.extend(tf.gfile.Glob(file_pattern))
    tf.logging.set_verbosity(tf.logging.INFO)
    tf.logging.info("Running caption generation on %d files matching %s",
                    len(filenames), FLAGS.input_files)

    result, rescore_data, state_rescore_data = [], [], []
    with tf.Session(graph=g) as sess:
        # Load the model from checkpoint.
        saver = tf.train.Saver(var_list=tf.all_variables())
        saver.restore(sess, checkpoint_path)

        itr = 0
        while not reader.eof():
            if itr > 50000:  # cache at most 50k questions
                break
            outputs = reader.pop_batch()
            im_ids, quest_id, im_feat, ans_w2v, quest_ids, ans_ids = outputs
            mc_ans, mc_coding = mc_manager.get_candidate_answer_and_word_coding(
                quest_id)
            inputs = reader_post_proc_fn(outputs, mc_coding)
            perplexity, state = sess.run(
                [model.likelihood, model.final_decoder_state],
                feed_dict=model.fill_feed_dict(inputs))
            perplexity = perplexity.reshape(inputs[-1].shape)
            loss = perplexity[:, :-1].mean(axis=1)

            # generated = [generated[0]]  # sample 3
            question = to_sentence.index_to_question(quest_ids)
            answer = to_sentence.index_to_answer(ans_ids)
            top1_mc_ans = mc_ans[loss.argmin()]
            result.append({u'answer': top1_mc_ans, u'question_id': quest_id})

            # add hidden state saver
            label = mc_manager.get_binary_label(quest_id)
            state_sv = {'quest_id': quest_id, 'states': state, 'label': label}
            state_rescore_data.append(state_sv)

            if itr % 100 == 0:
                print('============== %d ============' % itr)
                print('image id: %d, question id: %d' % (im_ids, quest_id))
                print('question\t: %s' % question)
                print('answer\t: %s' % answer)
                top_k_ids = loss.argsort()[:3].tolist()
                for i, idx in enumerate(top_k_ids):
                    t_mc_ans = mc_ans[idx]
                    print('VAQ answer <%d>\t: %s (%0.2f)' %
                          (i, t_mc_ans, loss[idx]))

            itr += 1
            # save information for train classifier
            mc_label = np.array([a == answer for a in mc_ans],
                                dtype=np.float32)
            quest_target = inputs[-2]
            datum = {
                'quest_seq': quest_target,
                'perplex': perplexity,
                'label': mc_label,
                'quest_id': quest_id
            }
            rescore_data.append(datum)

        quest_ids = [res[u'question_id'] for res in result]
        # save results
        tf.logging.info('Saving results')
        res_file = FLAGS.result_file % get_model_iteration(checkpoint_path)
        json.dump(result, open(res_file, 'w'))
        tf.logging.info('Saving rescore data...')
        from util import pickle
        # pickle('data/rescore_dev.pkl', rescore_data)
        pickle('data/rescore_state_dev.pkl', state_rescore_data)
        tf.logging.info('Done!')
        return res_file, quest_ids
class VarIVQAModelWrapper(object):
    def __init__(self):
        self.to_sentence = SentenceGenerator(trainset='trainval')
        self.sent_encoder = SentenceEncoder()
        self.g = tf.Graph()
        self.ckpt_file = 'model/v1_var_kptrain_VAQ-VarDS/model.ckpt-3300000'
        from models.variational_ds_ivqa_model import VariationIVQAModel
        from config import ModelConfig
        config = ModelConfig()
        self._top_k = 10
        self.name = ' ------- VarIVQA ------- '

        with self.g.as_default():
            self.sess = tf.Session()
            self.model = VariationIVQAModel(config, phase='sampling_beam')
            self.model.build()
            vars = tf.trainable_variables()
            self.saver = tf.train.Saver(var_list=vars)
            self.saver.restore(self.sess, self.ckpt_file)

        self._init_image_cache()

    def _init_image_cache(self):
        from util import load_hdf5
        d = load_hdf5('data/attribute_std_mscoco_kpval.data')
        # d = load_hdf5('data/res152_std_mscoco_kpval.data')
        image_ids = d['image_ids']
        self.im_feats = d['att_arr']
        image_id2index = {image_id: idx for idx, image_id in enumerate(image_ids)}
        self.image_id2index = image_id2index

    def _load_image(self, image_id):
        idx = self.image_id2index[image_id]
        return self.im_feats[idx][np.newaxis, :]

    def _process_answer(self, answers):
        arr, arr_len = self.sent_encoder.encode_sentence(answers)
        return arr, arr_len

    def inference(self, image_id, answer):
        image = self._load_image(image_id)
        arr, arr_len = self._process_answer(answer)
        scores, pathes = self.model.greedy_inference([image, arr, arr_len], self.sess)
        self.show_prediction(scores, pathes)
        return scores

    def show_prediction(self, scores, pathes):
        # wrap inputs
        _this_batch_size = 1
        seq_len = pathes.shape[1]
        dummy_scores = np.tile(scores[:, np.newaxis], [1, seq_len])
        # dummy_scores = np.zeros_like(pathes, dtype=np.float32)
        ivqa_scores, ivqa_pathes, ivqa_counts = post_process_variation_questions_with_count_v2(dummy_scores, pathes,
                                                                                               _this_batch_size)
        for _q_idx, (ps, scs, cs) in enumerate(zip(ivqa_pathes, ivqa_scores, ivqa_counts)):
            inds = np.argsort(-np.array(scs))[:self._top_k]
            for _p_idx, _pick_id in enumerate(inds):
                _p = ps[_pick_id]
                _s = scs[_pick_id]  # / (len(_p) - 2)
                sentence = self.to_sentence.index_to_question(_p)
                print('%s (%0.2f)' % (sentence, _s))
示例#20
0
def test(checkpoint_path=None):
    config = ModelConfig()
    config.phase = 'other'
    use_answer_sequence = 'lstm' in FLAGS.model_type or FLAGS.model_type == 'VAQ-A'
    config.model_type = FLAGS.model_type

    # build data reader
    reader = Reader(batch_size=1,
                    subset='dev',
                    output_attr=True,
                    output_im=False,
                    output_qa=True,
                    output_capt=False,
                    output_ans_seq=use_answer_sequence)
    if checkpoint_path is None:
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir %
                                             FLAGS.model_type)
        checkpoint_path = ckpt.model_checkpoint_path

    res_file = 'result/quest_vaq_%s.json' % FLAGS.model_type.upper()
    res_file = 'result/quest_vaq_%s.json' % FLAGS.model_type.upper()
    # build and restore model
    model = InferenceWrapper()
    restore_fn = model.build_graph_from_config(config, checkpoint_path)

    sess = tf.Session(graph=tf.get_default_graph())
    tf.logging.info('Restore from model %s' %
                    os.path.basename(checkpoint_path))
    restore_fn(sess)

    # Create the vocabulary.
    to_sentence = SentenceGenerator(trainset=FLAGS.model_trainset)
    generator = caption_generator.CaptionGenerator(model,
                                                   to_sentence.question_vocab)

    results = []
    print('Running inference on split %s...' % TEST_SET)
    num_batches = reader.num_batches
    for i in range(num_batches):
        outputs = reader.get_test_batch()
        im_feed, quest, _, ans_feed, quest_id, image_id = outputs
        if ans_feed == 2000:
            continue
        image_id = int(image_id)
        quest_id = int(quest_id)
        im_feed = np.squeeze(im_feed)
        quest = np.squeeze(quest)
        # print('\n============== %d ============\n' % i)
        captions = generator.beam_search(sess, [im_feed, ans_feed])
        question = to_sentence.index_to_question(quest.tolist())
        answer = to_sentence.index_to_top_answer(ans_feed)
        print('============== %d ============' % i)
        print('image id: %d, question id: %d' % (image_id, quest_id))
        print('question\t: %s' % question)
        tmp = []
        for c, g in enumerate(captions[0:3]):
            quest = to_sentence.index_to_question(g.sentence)
            tmp.append(quest)
            print('<question %d>\t: %s' % (c, quest))
        print('answer\t: %s\n' % answer)

        caption = captions[0]
        sentence = to_sentence.index_to_question(caption.sentence)
        res_i = {
            'image_id': image_id,
            'question_id': quest_id,
            'question': sentence
        }
        results.append(res_i)
    save_json(res_file, results)
    return res_file
示例#21
0
def test(checkpoint_path=None):
    config = ModelConfig()
    config.phase = 'other'
    config.model_type = FLAGS.model_type
    config.cell_option = 5
    # config.cell_option = 4
    beam_size = 3
    subset = 'kptest'
    # build data reader
    reader = Reader(batch_size=1,
                    subset=subset,
                    output_attr=True,
                    output_im=True,
                    output_qa=True,
                    output_capt=False,
                    output_ans_seq=True)
    if checkpoint_path is None:
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir %
                                             FLAGS.model_type)
        checkpoint_path = ckpt.model_checkpoint_path

    res_file = 'result/beamsearch_vaq_%s_%s.json' % (FLAGS.model_type.upper(),
                                                     subset)
    # rerank_file = 'result/beamsearch_vaq_reank_cands_%s_val.json' % FLAGS.model_type.upper()
    # build and restore model
    model = InferenceWrapper()
    restore_fn = model.build_graph_from_config(config, checkpoint_path)

    sess = tf.Session(graph=tf.get_default_graph())
    tf.logging.info('Restore from model %s' %
                    os.path.basename(checkpoint_path))
    restore_fn(sess)

    # Create the vocabulary.
    to_sentence = SentenceGenerator(trainset=FLAGS.model_trainset)
    generator = caption_generator.CaptionGenerator(model,
                                                   to_sentence.question_vocab,
                                                   beam_size=beam_size)

    results = []
    re_rank_cands = []
    print('Running inference on split %s...' % TEST_SET)
    num_batches = reader.num_batches
    for i in range(num_batches):
        outputs = reader.get_test_batch()
        im_feed, attr, quest, quest_len, _, ans_seq, ans_seq_len, quest_id, image_id = outputs

        image_id = int(image_id)
        quest_id = int(quest_id)
        im_feed = np.squeeze(im_feed)
        quest = np.squeeze(quest)
        # print('\n============== %d ============\n' % i)
        captions = generator.beam_search(sess,
                                         [im_feed, attr, ans_seq, ans_seq_len])
        question = to_sentence.index_to_question(quest.tolist())
        # answer = to_sentence.index_to_top_answer(ans_feed)
        print('============== %d ============' % i)
        print('image id: %d, question id: %d' % (image_id, quest_id))
        print('question\t: %s' % question)
        tmp, tmp_scores = [], []
        vaq_cands = {'question_id': quest_id}
        for c, g in enumerate(captions):
            quest = to_sentence.index_to_question(g.sentence)
            tmp.append(quest)
            tmp_scores.append(g.logprob)
            print('<question %d>\t: %s' % (c, quest))
        # print('answer\t: %s\n' % answer)
        vaq_cands['questions'] = tmp
        vaq_cands['confidence'] = tmp_scores
        re_rank_cands.append(vaq_cands)

        caption = captions[0]
        sentence = to_sentence.index_to_question(caption.sentence)
        res_i = {
            'image_id': image_id,
            'question_id': quest_id,
            'question': sentence
        }
        results.append(res_i)
    save_json(res_file, results)
    # save_json(rerank_file, re_rank_cands)
    return res_file
def ivqa_decoding_beam_search(ckpt_dir, method):
    model_config = ModelConfig()
    inf_type = 'beam'
    assert (inf_type in ['beam', 'rand'])
    # method = FLAGS.method
    if inf_type == 'rand':
        res_file = 'result/bs_RL2_cands_LM_%s.json' % method
    else:
        res_file = 'result/bs_RL2_cands_LM_%s_BEAM.json' % method
    if os.path.exists(res_file):
        print('File %s already exist, skipped' % res_file)
        return
    # score_file = 'result/bs_vqa_scores_%s.mat' % method
    # Get model
    model_fn = get_model_creation_fn('VAQ-Var')
    create_fn = create_reader('VAQ-VVIS', phase='test')

    # Create the vocabulary.
    to_sentence = SentenceGenerator(trainset='trainval')

    # get data reader
    subset = 'bs_test'
    reader = create_fn(batch_size=1, subset=subset,
                       version=FLAGS.test_version)

    exemplar = ExemplarLanguageModel()

    # if checkpoint_path is None:
    #     if FLAGS.checkpoint_dir:
    #         ckpt_dir = FLAGS.checkpoint_dir
    #     else:
    #         ckpt_dir = FLAGS.checkpoint_pat % (FLAGS.version, FLAGS.model_type)
    # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/'
    ckpt = tf.train.get_checkpoint_state(ckpt_dir)
    checkpoint_path = ckpt.model_checkpoint_path

    # Build model
    g = tf.Graph()
    with g.as_default():
        # Build the model.ex
        if inf_type == 'rand':
            model = model_fn(model_config, 'sampling')
            model.set_num_sampling_points(1000)
        else:
            model = model_fn(model_config, 'sampling_beam')
            model.set_num_sampling_points(1000)
        model.build()
        # Restore from checkpoint
        restorer = Restorer(g)
        sess = tf.Session()
        restorer.restore(sess, checkpoint_path)

        # build language model
        language_model = LanguageModel()
        language_model.build()
        language_model.set_cache_dir('test_empty')
        # language_model.set_cache_dir('v1_var_att_lowthresh_cache_restval_VAQ-VarRL')
        language_model.set_session(sess)
        language_model.setup_model()

        # build VQA model
    # vqa_model = N2MNWrapper()
    # vqa_model = MLBWrapper()
    num_batches = reader.num_batches

    print('Running beam search inference...')
    results = {}
    # batch_vqa_scores = []

    num = FLAGS.max_iters if FLAGS.max_iters > 0 else num_batches
    for i in range(num):
        outputs = reader.get_test_batch()

        # inference
        quest_ids, image_ids = outputs[-2:]
        im, _, _, top_ans, ans_tokens, ans_len = outputs[:-2]
        # pdb.set_trace()
        if top_ans == 2000:
            continue

        print('\n%d/%d' % (i, num))
        question_id = int(quest_ids[0])
        image_id = int(image_ids[0])

        t1 = time()
        pathes, scores = model.greedy_inference([im, ans_tokens, ans_len], sess)

        # find unique
        ivqa_scores, ivqa_pathes = process_one(scores, pathes)
        t2 = time()
        print('Time for sample generation: %0.2fs' % (t2 - t1))

        # apply language model
        language_model_inputs = wrap_samples_for_language_model([ivqa_pathes],
                                                                pad_token=model.pad_token - 1,
                                                                max_length=20)
        match_gt = exemplar.query(ivqa_pathes)
        legality_scores = language_model.inference(language_model_inputs)
        legality_scores[match_gt] = 1.0
        num_keep = max(100, (legality_scores > 0.3).sum())  # no less than 100
        valid_inds = (-legality_scores).argsort()[:num_keep]

        t3 = time()
        print('Time for language model filtration: %0.2fs' % (t3 - t2))

        # for idx in valid_inds:
        #     path = ivqa_pathes[idx]
        #     sc = legality_scores[idx]
        #     sentence = to_sentence.index_to_question(path)
        #     # questions.append(sentence)
        #     print('%s (%0.3f)' % (sentence, sc))

        # apply  VQA model
        sampled = [ivqa_pathes[_idx] for _idx in valid_inds]
        legality_scores = legality_scores[valid_inds]

        result_key = int(question_id)
        tmp = []
        for idx, path in enumerate(sampled):
            # path = sampled[idx]
            sc = legality_scores[idx]
            sentence = to_sentence.index_to_question(path)
            # aug_quest_id = question_id * 1000 + _pid
            res_i = {'image_id': int(image_id),
                     'aug_id': idx,
                     'question_id': question_id,
                     'question': sentence,
                     'score': float(sc)}
            tmp.append(res_i)
        print('Number of unique questions: %d' % len(tmp))
        results[result_key] = tmp

    save_json(res_file, results)
示例#23
0
def ivqa_decoding_beam_search(checkpoint_path=None, subset='kpval'):
    model_config = ModelConfig()
    res_file = 'result/quest_vaq_greedy_%s.json' % FLAGS.model_type.upper()
    # Get model
    model_fn = get_model_creation_fn('VAQ-Var')
    create_fn = create_reader('VAQ-Var', phase='test')
    writer = ExperimentWriter('latex/examples_noimage_tmp')

    # Create the vocabulary.
    to_sentence = SentenceGenerator(trainset='trainval')

    # get data reader
    subset = 'kpval'
    reader = create_fn(batch_size=1, subset=subset, version=FLAGS.test_version)

    if checkpoint_path is None:
        # ckpt_dir = FLAGS.checkpoint_dir % (FLAGS.version, FLAGS.model_type)
        ckpt_dir = 'model/v1_var_att_noimage_cache_restval_VAQ-VarRL'
        ckpt = tf.train.get_checkpoint_state(ckpt_dir)
        checkpoint_path = ckpt.model_checkpoint_path

    # Build model
    g = tf.Graph()
    with g.as_default():
        # Build the model.
        model = model_fn(model_config, 'sampling')
        model.build()
        # Restore from checkpoint
        restorer = Restorer(g)
        sess = tf.Session()
        restorer.restore(sess, checkpoint_path)

    num_batches = reader.num_batches

    print('Running beam search inference...')
    results = []
    for i in range(num_batches):
        outputs = reader.get_test_batch()

        # inference
        quest_ids, image_ids = outputs[-2:]
        scores, pathes = model.greedy_inference(outputs[:-2], sess)
        scores, pathes = post_process_prediction(scores, pathes)
        pathes, pathes_len = put_to_array(pathes)
        scores, pathes = find_unique_rows(scores, pathes)
        scores, pathes = post_process_prediction(scores, pathes[:, 1:])
        # question = to_sentence.index_to_question(pathes[0])
        # print('%d/%d: %s' % (i, num_batches, question))

        # show image
        os.system('clear')
        im_file = '%s2014/COCO_%s2014_%012d.jpg' % ('val', 'val', image_ids[0])
        im_path = os.path.join(IM_ROOT, im_file)
        # im = imread(im_path)
        # plt.imshow(im)
        ans, ans_len = outputs[1:1 + 2]
        answers = extract_gt(ans, ans_len)
        answer = to_sentence.index_to_answer(answers[0])
        # plt.title(answer)

        print('Answer: %s' % answer)
        questions = []
        for path in pathes:
            sentence = to_sentence.index_to_question(path)
            questions.append(sentence)
            print(sentence)
        # plt.show()
        writer.add_result(image_ids[0], quest_ids[0], im_path, answer,
                          questions)

        for quest_id, image_id, path in zip(quest_ids, image_ids, pathes):
            sentence = to_sentence.index_to_question(path)
            res_i = {
                'image_id': int(image_id),
                'question_id': int(quest_id),
                'question': sentence
            }
            results.append(res_i)

        if i == 40:
            break

    writer.render()
    return

    save_json(res_file, results)
    return res_file
class IVQARewards(object):
    def __init__(self,
                 metric='cider',
                 gt_has_start_end_token=False,
                 pred_has_start_end_token=True,
                 use_end_token=True,
                 subset='kptrain'):
        self.gt_has_start_end_token = gt_has_start_end_token
        self.pred_has_start_end_token = pred_has_start_end_token
        self.use_end_token = use_end_token
        if metric == 'cider':
            self.scorer = ciderEval('vqa_%s_idxs_end' % subset)
        elif metric == 'bleu':
            self.scorer = Bleu(n=4)
        assert (metric == 'cider')
        self.to_sentence = SentenceGenerator(trainset='trainval')
        self._num_call = long(0)
        self.print_iterval = 100

    def get_reward(self, sampled, gts):
        """
        compute rewards given a sampled sentence and gt, the reward is
        computed based on CIDEr-D
        :param sampled: a list of list of pathes
        :param gts: a list of ground-truth samples [seq, seq_len]
        :param answers: numpy.array of ground-truth top answer index
        of VQA
        :return: numpy array of size (N,) of reward for each sample
        """
        gts = self.process_gt(gts)  # convert to list
        sampled = self.process_sampled(sampled)  # convert to list
        wrapped_gt, wrapped_sample = self.wrap_samples(sampled, gts)
        _, rewards = self.scorer.evaluate(wrapped_gt, wrapped_sample)
        # if not self._num_call % self.print_iterval:
        #     self.print_questions(gts, sampled, rewards)
        # self._num_call += 1
        # rewards = supress_cider_score(rewards)
        return rewards / 10.  # normalise to [0-1]

    def print_questions(self, gts, sampled, rewards):
        n_vis = 2
        num_tot = len(gts)
        vis_ids = np.random.choice(num_tot, size=(n_vis, ), replace=False)
        offsets = np.cumsum([len(sms) for sms in sampled]).tolist()
        offsets = [0] + offsets
        for _vis_id in vis_ids:
            _gt = gts[_vis_id]
            sent = self.to_sentence.index_to_question(_gt[:-1])
            print('\nGT: %s' % sent)
            _sms = sampled[_vis_id]
            _offset = offsets[_vis_id]
            for _sid, sm in enumerate(_sms):
                _r = rewards[_offset + _sid]
                sent = self.to_sentence.index_to_question(sm[:-1])
                print('%s (%0.3f)' % (sent, _r))
        print('\n')

    @staticmethod
    def wrap_samples(sampled, gts):
        wrapped_gt = OrderedDict()
        wrapped_sample = []
        idx = 0
        for _var_s, _gt in zip(sampled, gts):
            _gt_pat = serialize_path(_gt)
            for _s in _var_s:
                _key = str(idx)
                _s_pat = serialize_path(_s)
                wrapped_gt[_key] = [_gt_pat]
                wrapped_sample.append({'image_id': _key, 'caption': [_s_pat]})
                idx += 1
        return wrapped_gt, wrapped_sample

    def process_gt(self, gts):
        capt, capt_len = gts
        seqs = []
        for c, clen in zip(capt, capt_len):
            _gt = c[:clen].tolist()
            if self.gt_has_start_end_token:
                _gt = _gt[1:]
            else:
                _gt += [END_TOKEN]
            if not self.use_end_token:
                _gt = _gt[:-1]
            seqs.append(_gt)
        return seqs

    def process_sampled(self, sampled):
        new_sampled = []
        for ps in sampled:
            tmp = []
            for p in ps:
                if self.pred_has_start_end_token:
                    _p = p[1:]
                else:
                    _p = p + [END_TOKEN]
                if not self.use_end_token:
                    _p = _p[:-1]
                tmp.append(_p)
            new_sampled.append(tmp)
        return new_sampled
示例#25
0
    from inference_utils.question_generator_util import SentenceGenerator

    to_sentence = SentenceGenerator(trainset='trainval')
    reader = AttentionDataReader(batch_size=4,
                                 subset='trainval',
                                 counter_sampling=True)
    reader.start()
    from time import sleep

    t = time()
    for i in range(4):
        data = reader.pop_batch()
        data[0].mean()
        feats, q, q_len, a = data
        for c in range(2):
            q1 = to_sentence.index_to_question(q[c])
            a1 = to_sentence.index_to_top_answer(a[c])
            q2 = to_sentence.index_to_question(q[c + 2])
            a2 = to_sentence.index_to_top_answer(a[c + 2])
            if a1 == 2000 or a2 == 2000:
                continue
            print('Index: %d' % i)
            print('Q1: %s\nA1: %s \n' % (q1, a1))
            print('Q2: %s\nA2: %s \n' % (q2, a2))
            print('\n')
            sleep(0.4)
        # print(data[1].mean())
        # print(data[2].max())
        # print(data[0].shape)

        reader.update_loss(np.random.rand(4))
def ivqa_decoding_beam_search(checkpoint_path=None):
    model_config = ModelConfig()
    method = FLAGS.method
    res_file = 'result/bs_cand_for_vis.json'
    # Get model
    model_fn = get_model_creation_fn('VAQ-Var')
    create_fn = create_reader('VAQ-VVIS', phase='test')

    # Create the vocabulary.
    to_sentence = SentenceGenerator(trainset='trainval',
                                    top_ans_file='../VQA-tensorflow/data/vqa_trainval_top2000_answers.txt')

    # get data reader
    subset = 'kpval'
    reader = create_fn(batch_size=1, subset=subset,
                       version=FLAGS.test_version)

    exemplar = ExemplarLanguageModel()

    if checkpoint_path is None:
        if FLAGS.checkpoint_dir:
            ckpt_dir = FLAGS.checkpoint_dir
        else:
            ckpt_dir = FLAGS.checkpoint_pat % (FLAGS.version, FLAGS.model_type)
        # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/'
        ckpt = tf.train.get_checkpoint_state(ckpt_dir)
        checkpoint_path = ckpt.model_checkpoint_path

    # Build model
    g = tf.Graph()
    with g.as_default():
        # Build the model.ex
        model = model_fn(model_config, 'sampling')
        model.set_num_sampling_points(5000)
        model.build()
        # Restore from checkpoint
        restorer = Restorer(g)
        sess = tf.Session()
        restorer.restore(sess, checkpoint_path)

        # build language model
        language_model = LanguageModel()
        language_model.build()
        language_model.set_cache_dir('test_empty')
        # language_model.set_cache_dir('v1_var_att_lowthresh_cache_restval_VAQ-VarRL')
        language_model.set_session(sess)
        language_model.setup_model()

        # build VQA model
    # vqa_model = N2MNWrapper()
    # vqa_model = MLBWrapper()
    num_batches = reader.num_batches

    quest_ids_to_vis = {5682052: 'bread',
                        965492: 'plane',
                        681282: 'station'}

    print('Running beam search inference...')
    results = []
    batch_vqa_scores = []

    num = FLAGS.max_iters if FLAGS.max_iters > 0 else num_batches
    for i in range(num):

        outputs = reader.get_test_batch()

        # inference
        quest_ids, image_ids = outputs[-2:]
        quest_id_key = int(quest_ids)

        if quest_id_key not in quest_ids_to_vis:
            continue
        # pdb.set_trace()

        im, gt_q, _, top_ans, ans_tokens, ans_len = outputs[:-2]
        # pdb.set_trace()
        if top_ans == 2000:
            continue

        print('\n%d/%d' % (i, num))
        question_id = int(quest_ids[0])
        image_id = int(image_ids[0])

        t1 = time()
        pathes, scores = model.greedy_inference([im, ans_tokens, ans_len], sess)

        # find unique
        ivqa_scores, ivqa_pathes = process_one(scores, pathes)
        t2 = time()
        print('Time for sample generation: %0.2fs' % (t2 - t1))

        # apply language model
        language_model_inputs = wrap_samples_for_language_model([ivqa_pathes],
                                                                pad_token=model.pad_token - 1,
                                                                max_length=20)
        match_gt = exemplar.query(ivqa_pathes)
        legality_scores = language_model.inference(language_model_inputs)
        legality_scores[match_gt] = 1.0
        num_keep = max(100, (legality_scores > 0.1).sum())  # no less than 100
        valid_inds = (-legality_scores).argsort()[:num_keep]
        print('keep: %d/%d' % (num_keep, len(ivqa_pathes)))

        t3 = time()
        print('Time for language model filtration: %0.2fs' % (t3 - t2))

        def token_arr_to_list(arr):
            return arr.flatten().tolist()

        for _pid, idx in enumerate(valid_inds):
            path = ivqa_pathes[idx]
            # sc = vqa_scores[idx]
            sentence = to_sentence.index_to_question(path)
            aug_quest_id = question_id * 1000 + _pid
            res_i = {'image_id': int(image_id),
                     'aug_id': aug_quest_id,
                     'question_id': question_id,
                     'target': sentence,
                     'top_ans_id': int(top_ans),
                     'question': to_sentence.index_to_question(token_arr_to_list(gt_q)),
                     'answer': to_sentence.index_to_answer(token_arr_to_list(ans_tokens))}
            results.append(res_i)

    save_json(res_file, results)
    return None
class MixReward(object):
    def __init__(self, thresh=0.3, cider_w=0.6, dis_vqa_reward=False):
        self.vqa_reward = VQARewards(use_dis_reward=dis_vqa_reward)
        self.cider_reward = IVQARewards()
        self.diversity_reward = DiversityReward()
        self.thresh = thresh
        self.cider_w = cider_w
        self.to_sentence = SentenceGenerator(trainset='trainval')
        self._num_call = long(0)
        self.print_iterval = 100
        self.language_thresh = 0.2
        self.cider_thresh = 0.05
        self.use_cider = True
        self.lm = None
        self.replay_buffer = None

    def set_language_model(self, model):
        self.lm = model

    def set_vqa_model(self, vqa):
        self.vqa_reward.set_vqa_model(vqa)

    def get_vqa_model(self):
        return self.vqa_reward.model

    def set_replay_buffer(self, insert_thresh=0.5, sv_dir='vqa_replay_buffer'):
        from vqa_replay_buffer import VQAReplayBuffer
        self.replay_buffer = VQAReplayBuffer(insert_thresh=insert_thresh,
                                             sv_dir=sv_dir)

    def cache_questions(self, quest_ids, questions, rewards):
        vqa_reward, _, language_reward, _, _ = rewards
        mask = self.apply_language_mask(language_reward)  # is grammar correct
        self.replay_buffer.insert(quest_ids, questions, vqa_reward * mask)

    def set_cider_state(self, use_cider):
        self.use_cider = use_cider

    def set_language_thresh(self, t):
        self.language_thresh = t

    def compute_lm_reward(self, _lm_inputs):
        return self.lm.inference(_lm_inputs)

    def apply_cider_mask(self, cider_scores):
        return (cider_scores >= self.cider_thresh).astype(np.float32)

    def apply_language_mask(self, language_scores):
        return (language_scores >= self.language_thresh).astype(np.float32)

    def apply_mask(self, rewards):
        [vqa_reward, cider_reward, language_reward, diversity_reward] = rewards
        mask = self.apply_cider_mask(cider_reward)
        mask *= self.apply_language_mask(language_reward)
        return vqa_reward * mask * diversity_reward

    def get_reward(self, sampled, gts, context):
        diversity_reward, is_gt = self.diversity_reward.get_reward(
            sampled, context[2])
        vqa_reward, aug_data = self.vqa_reward.get_reward(sampled, context[0])
        cider_reward = self.cider_reward.get_reward(sampled, gts)  # cider
        language_reward = self.compute_lm_reward(context[1])
        language_reward[is_gt] = 1.0  # correct language model prediction
        rewards = [vqa_reward, cider_reward, language_reward, diversity_reward]
        overall_reward = self.apply_mask(rewards)
        rewards.append(overall_reward)
        # cache and print questions
        if self.replay_buffer:
            self.cache_questions(context[3], sampled, rewards)
        self.print_questions(_parse_gt_questions(*gts), sampled, rewards)
        rewards = self.concat_rewards(rewards)
        return overall_reward, rewards, is_gt, aug_data

    def concat_rewards(self, inputs):
        return np.concatenate([_in[:, np.newaxis] for _in in inputs], axis=1)

    def print_questions(self, gts, sampled, rewards):
        self._num_call += 1
        if self._num_call % self.print_iterval:
            return

        n_vis = 2
        num_tot = len(gts)
        r1, r2, r3, r0, r = rewards
        vis_ids = np.random.choice(num_tot, size=(n_vis, ), replace=False)
        offsets = np.cumsum([len(sms) for sms in sampled]).tolist()
        offsets = [0] + offsets
        for _vis_id in vis_ids:
            _gt = gts[_vis_id]
            sent = self.to_sentence.index_to_question(_gt)
            print('\nGT: %s' % sent)
            _sms = sampled[_vis_id]
            _offset = offsets[_vis_id]
            for _sid, sm in enumerate(_sms):
                _r0 = r0[_offset + _sid]
                _r1 = r1[_offset + _sid]
                _r2 = r2[_offset + _sid]
                _r3 = r3[_offset + _sid]
                _r = r[_offset + _sid]
                sent = self.to_sentence.index_to_question(sm[1:-1])
                print(
                    '%s (vqa:%0.3f, cider:%0.3f, lm:%0.3f, diver: %0.3f, overall:%0.3f)'
                    % (sent, _r1, _r2, _r3, _r0, _r))
        print('\n')
示例#28
0
def test(checkpoint_path=None):
    config = ModelConfig()
    config.phase = 'other'
    config.model_type = FLAGS.model_type

    beam_size = 10
    subset = 'kptest'
    # build data reader
    create_fn = create_reader(FLAGS.model_type, phase='test')
    reader = create_fn(batch_size=1, subset=subset, version='v1')
    if checkpoint_path is None:
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir % FLAGS.model_type)
        checkpoint_path = ckpt.model_checkpoint_path

    res_file = 'result/beamsearch_%s_%s.json' % (FLAGS.model_type.upper(), subset)
    cand_file = 'result/sampling_%s_%s.json' % (FLAGS.model_type.upper(), subset)

    # build and restore model
    model = InferenceWrapper()
    restore_fn = model.build_graph_from_config(config, checkpoint_path)

    sess = tf.Session(graph=tf.get_default_graph())
    tf.logging.info('Restore from model %s' % os.path.basename(checkpoint_path))
    restore_fn(sess)

    # Create the vocabulary.
    to_sentence = SentenceGenerator(trainset=FLAGS.model_trainset)
    generator = caption_generator.CaptionGenerator(model, to_sentence.question_vocab,
                                                   beam_size=beam_size)

    results = []
    candidates = []
    print('Running inference on split %s...' % TEST_SET)
    num_batches = reader.num_batches
    for i in range(num_batches):
        outputs = reader.get_test_batch()
        im_feed, attr, ans_seq, ans_seq_len, quest_id, image_id = outputs

        image_id = int(image_id)
        quest_id = int(quest_id)
        im_feed = np.squeeze(im_feed)
        captions = generator.beam_search(sess, [im_feed, attr, ans_seq, ans_seq_len])

        print('============== %d ============' % i)
        print('image id: %d, question id: %d' % (image_id, quest_id))
        # print('question\t: %s' % question)
        tmp = []
        vaq_cands_i = {'question_id': quest_id, 'image_id': image_id}
        for c, g in enumerate(captions):
            quest = to_sentence.index_to_question(g.sentence)
            tmp.append(quest)
            print('[%02d]: %s' % (c, quest))

        vaq_cands_i['candidates'] = tmp
        candidates.append(vaq_cands_i)

        caption = captions[0]
        sentence = to_sentence.index_to_question(caption.sentence)
        res_i = {'image_id': image_id, 'question_id': quest_id, 'question': sentence}
        results.append(res_i)
    save_json(res_file, results)
    save_json(cand_file, candidates)
    return res_file
示例#29
0
def ivqa_decoding_beam_search(checkpoint_path=None):
    model_config = ModelConfig()
    method = FLAGS.method
    res_file = 'result/bs_gen_%s.json' % method
    score_file = 'result/bs_vqa_scores_%s.mat' % method
    # Get model
    model_fn = get_model_creation_fn('VAQ-Var')
    create_fn = create_reader('VAQ-VVIS', phase='test')

    # Create the vocabulary.
    to_sentence = SentenceGenerator(trainset='trainval')

    # get data reader
    subset = 'kptest'
    reader = create_fn(batch_size=1, subset=subset, version=FLAGS.test_version)

    exemplar = ExemplarLanguageModel()

    if checkpoint_path is None:
        if FLAGS.checkpoint_dir:
            ckpt_dir = FLAGS.checkpoint_dir
        else:
            ckpt_dir = FLAGS.checkpoint_pat % (FLAGS.version, FLAGS.model_type)
        # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/'
        ckpt = tf.train.get_checkpoint_state(ckpt_dir)
        checkpoint_path = ckpt.model_checkpoint_path

    # Build model
    g = tf.Graph()
    with g.as_default():
        # Build the model.ex
        model = model_fn(model_config, 'sampling')
        model.set_num_sampling_points(1000)
        model.build()
        # Restore from checkpoint
        restorer = Restorer(g)
        sess = tf.Session()
        restorer.restore(sess, checkpoint_path)

        # build language model
        language_model = LanguageModel()
        language_model.build()
        language_model.set_cache_dir('test_empty')
        # language_model.set_cache_dir('v1_var_att_lowthresh_cache_restval_VAQ-VarRL')
        language_model.set_session(sess)
        language_model.setup_model()

        # build VQA model
        vqa_model = VQAWrapper(g, sess)
    # vqa_model = MLBWrapper()
    num_batches = reader.num_batches

    print('Running beam search inference...')
    results = []
    batch_vqa_scores = []

    num = FLAGS.max_iters if FLAGS.max_iters > 0 else num_batches
    for i in range(num):

        outputs = reader.get_test_batch()

        # inference
        quest_ids, image_ids = outputs[-2:]
        im, _, _, top_ans, ans_tokens, ans_len = outputs[:-2]
        # pdb.set_trace()
        if top_ans == 2000:
            continue

        print('\n%d/%d' % (i, num))
        question_id = int(quest_ids[0])
        image_id = int(image_ids[0])

        t1 = time()
        pathes, scores = model.greedy_inference([im, ans_tokens, ans_len],
                                                sess)

        # find unique
        ivqa_scores, ivqa_pathes = process_one(scores, pathes)
        t2 = time()
        print('Time for sample generation: %0.2fs' % (t2 - t1))

        # apply language model
        language_model_inputs = wrap_samples_for_language_model(
            [ivqa_pathes], pad_token=model.pad_token - 1, max_length=20)
        match_gt = exemplar.query(ivqa_pathes)
        legality_scores = language_model.inference(language_model_inputs)
        legality_scores[match_gt] = 1.0
        num_keep = max(100, (legality_scores > 0.1).sum())  # no less than 100
        valid_inds = (-legality_scores).argsort()[:num_keep]

        t3 = time()
        print('Time for language model filtration: %0.2fs' % (t3 - t2))

        # for idx in valid_inds:
        #     path = ivqa_pathes[idx]
        #     sc = legality_scores[idx]
        #     sentence = to_sentence.index_to_question(path)
        #     # questions.append(sentence)
        #     print('%s (%0.3f)' % (sentence, sc))

        # apply  VQA model
        sampled = [ivqa_pathes[_idx] for _idx in valid_inds]
        # vqa_scores = vqa_model.get_scores(sampled, image_id, top_ans)
        vqa_scores, is_valid = vqa_model.get_scores(sampled, im, top_ans)
        # conf_inds = (-vqa_scores).argsort()[:20]
        conf_inds = np.where(is_valid)[0]
        # pdb.set_trace()
        # conf_inds = (-vqa_scores).argsort()[:40]

        t4 = time()
        print('Time for VQA verification: %0.2fs' % (t4 - t3))

        this_mean_vqa_score = vqa_scores[conf_inds].mean()
        print('sampled: %d, unique: %d, legal: %d, gt: %d, mean score %0.2f' %
              (pathes.shape[0], len(ivqa_pathes), num_keep, match_gt.sum(),
               this_mean_vqa_score))
        batch_vqa_scores.append(this_mean_vqa_score)

        for _pid, idx in enumerate(conf_inds):
            path = sampled[idx]
            sc = vqa_scores[idx]
            sentence = to_sentence.index_to_question(path)
            aug_quest_id = question_id * 1000 + _pid
            res_i = {
                'image_id': int(image_id),
                'question_id': aug_quest_id,
                'question': sentence,
                'score': float(sc)
            }
            results.append(res_i)

    save_json(res_file, results)
    batch_vqa_scores = np.array(batch_vqa_scores, dtype=np.float32)
    mean_vqa_score = batch_vqa_scores.mean()
    from scipy.io import savemat
    savemat(score_file, {
        'scores': batch_vqa_scores,
        'mean_score': mean_vqa_score
    })
    print('BS mean VQA score: %0.3f' % mean_vqa_score)
    return res_file, mean_vqa_score
示例#30
0
def var_vqa_decoding_beam_search(checkpoint_path=None, subset='kpval'):
    model_config = ModelConfig()
    res_file = 'result/quest_vaq_greedy_%s.json' % FLAGS.model_type.upper()
    # Get model
    model_fn = get_model_creation_fn(FLAGS.model_type)
    create_fn = create_reader('V7W-VarDS', phase='test')
    writer = ExperimentWriter('latex/v7w_%s' % FLAGS.model_type.lower())

    # Create the vocabulary.
    to_sentence = SentenceGenerator(
        trainset='train',
        ans_vocab_file='data2/v7w_train_answer_word_counts.txt',
        quest_vocab_file='data2/v7w_train_question_word_counts.txt',
        top_ans_file='data2/v7w_train_top2000_answers.txt')

    # get data reader
    subset = 'val'
    reader = create_fn(batch_size=1, subset=subset, version=FLAGS.test_version)

    if checkpoint_path is None:
        ckpt_dir = FLAGS.checkpoint_dir % (FLAGS.trainset, FLAGS.model_type)
        # ckpt_dir = '/import/vision-ephemeral/fl302/models/v2_kpvaq_VAQ-RL/'
        ckpt = tf.train.get_checkpoint_state(ckpt_dir)
        checkpoint_path = ckpt.model_checkpoint_path

    # Build model
    g = tf.Graph()
    with g.as_default():
        # Build the model.
        model = model_fn(model_config, 'sampling')
        model.build()
        # Restore from checkpoint
        restorer = Restorer(g)
        sess = tf.Session()
        restorer.restore(sess, checkpoint_path)

    num_batches = reader.num_batches

    print('Running beam search inference...')
    results = []
    for i in range(num_batches):
        outputs = reader.get_test_batch()
        # pdb.set_trace()

        # inference
        images, quest, quest_len, ans, ans_len, quest_ids, image_ids = outputs
        scores, pathes = model.greedy_inference([images, quest, quest_len],
                                                sess)
        scores, pathes = post_process_prediction(scores, pathes)
        pathes, pathes_len = put_to_array(pathes)
        scores, pathes = find_unique_rows(scores, pathes)
        scores, pathes = post_process_prediction(scores, pathes[:, 1:])
        # question = to_sentence.index_to_question(pathes[0])
        # print('%d/%d: %s' % (i, num_batches, question))

        # show image
        os.system('clear')
        image_id = image_ids[0]
        im_path = _get_vg_image_root(image_id)
        # im = imread(im_path)
        # plt.imshow(im)
        questions = extract_gt(quest, quest_len)
        question = to_sentence.index_to_question(questions[0])
        print('Question: %s' % question)

        answers = extract_gt(ans, ans_len)
        answer = to_sentence.index_to_answer(answers[0])
        # plt.title(answer)

        print('Answer: %s' % answer)
        answers = []
        for path in pathes:
            sentence = to_sentence.index_to_answer(path)
            answers.append(sentence)
            print(sentence)
        # plt.show()
        qa = '%s - %s' % (question, answer)
        writer.add_result(image_ids[0], quest_ids[0], im_path, qa, answers)

        for quest_id, image_id, path in zip(quest_ids, image_ids, pathes):
            sentence = to_sentence.index_to_question(path)
            res_i = {
                'image_id': int(image_id),
                'question_id': int(quest_id),
                'question': sentence
            }
            results.append(res_i)

        if i == 40:
            break

    writer.render()
    return