示例#1
0
def make_vocab_files():
    """
    Produce the question and answer vocabulary files.
    """
    print 'making question vocab...', config.QUESTION_VOCAB_SPACE
    qdic, _ = VQADataProvider.load_data(config.QUESTION_VOCAB_SPACE)
    question_vocab = make_question_vocab(qdic)
    print 'making answer vocab...', config.ANSWER_VOCAB_SPACE
    _, adic = VQADataProvider.load_data(config.ANSWER_VOCAB_SPACE)
    answer_vocab = make_answer_vocab(adic, config.NUM_OUTPUT_UNITS)
    return question_vocab, answer_vocab
示例#2
0
def make_vocab_files():
    """
    Produce the question and answer vocabulary files.
    """
    print 'making question vocab...', config.QUESTION_VOCAB_SPACE
    qdic, _ = VQADataProvider.load_data(config.QUESTION_VOCAB_SPACE)
    question_vocab = make_question_vocab(qdic)
    print 'making answer vocab...', config.ANSWER_VOCAB_SPACE
    _, adic = VQADataProvider.load_data(config.ANSWER_VOCAB_SPACE)
    answer_vocab = make_answer_vocab(adic, config.NUM_OUTPUT_UNITS)
    return question_vocab, answer_vocab
示例#3
0
def make_vocab_files():
    """
    Produce the question and answer vocabulary files.
    """
    write_log('making question vocab... ' + config.QUESTION_VOCAB_SPACE, 'log.txt')
    write_log('making question character vocab... ' + config.QUESTION_VOCAB_SPACE, 'log.txt')
    qdic, _ = VQADataProvider.load_data(config.QUESTION_VOCAB_SPACE)
    question_vocab, question_char_vocab = make_question_vocab(qdic)
    write_log('making answer vocab... ' + config.ANSWER_VOCAB_SPACE, 'log.txt')
    _, adic = VQADataProvider.load_data(config.ANSWER_VOCAB_SPACE)
    answer_vocab = make_answer_vocab(adic, config.NUM_OUTPUT_UNITS)
    return question_vocab, question_char_vocab, answer_vocab
示例#4
0
def make_vocab_files():
    """
    Produce the question, answer, and explanation vocabulary files.
    """
    print('making question vocab...', config.QUESTION_VOCAB_SPACE)
    qdic, _, _ = VQADataProvider.load_data(config.QUESTION_VOCAB_SPACE)
    question_vocab = make_question_vocab(qdic)
    print('making answer vocab...', config.ANSWER_VOCAB_SPACE)
    _, adic, _ = VQADataProvider.load_data(config.ANSWER_VOCAB_SPACE)
    answer_vocab = make_answer_vocab(adic, config.NUM_OUTPUT_UNITS)
    print('making explanation vocab...', config.EXP_VOCAB_SPACE)
    _, _, expdic = VQADataProvider.load_data(config.EXP_VOCAB_SPACE)
    explanation_vocab = make_exp_vocab(expdic)
    return question_vocab, answer_vocab, explanation_vocab
示例#5
0
def make_question_char_vocab(qdic):
    """
    Returns a dictionary that maps characters to indices.
    """
    cdict = {'': 0}
    vid = 1
    for qid in qdic.keys():
        q_str = qdic[qid]['qstr']
        q_char_list = VQADataProvider.seq_to_char_list(q_str)

        for c in q_char_list:
            if not cdict.has_key(c):
                cdict[c] = vid
                vid += 1

    return cdict
示例#6
0
def make_question_vocab(qdic):
    """
    Returns a dictionary that maps words to indices.
    """
    vdict = {'': 0}
    vid = 1
    for qid in qdic.keys():
        # sequence to list
        q_str = qdic[qid]['qstr']
        q_list = VQADataProvider.seq_to_list(q_str)

        # create dict
        for w in q_list:
            if not vdict.has_key(w):
                vdict[w] = vid
                vid += 1

    return vdict
示例#7
0
def make_question_vocab(qdic):
    """
    Returns a dictionary that maps words to indices.
    """
    vdict = {'':0}
    vid = 1
    for qid in qdic.keys():
        # sequence to list
        q_str = qdic[qid]['qstr']
        q_list = VQADataProvider.seq_to_list(q_str)

        # create dict
        for w in q_list:
            if not vdict.has_key(w):
                vdict[w] = vid
                vid +=1

    return vdict
示例#8
0
def make_exp_vocab(exp_dic):
    """
    Returns a dictionary that maps words to indices.
    """
    exp_vdict = {'<EOS>': 0}
    exp_vdict[''] = 1
    exp_id = 2
    for qid in exp_dic.keys():
        exp_strs = exp_dic[qid]
        for exp_str in exp_strs:
            exp_list = VQADataProvider.seq_to_list(exp_str)

            for w in exp_list:
                if w not in exp_vdict:
                    exp_vdict[w] = exp_id
                    exp_id += 1

    return exp_vdict
示例#9
0
def exec_validation(device_id, mode, it='', visualize=False):

    caffe.set_device(device_id)
    caffe.set_mode_gpu()
    net = caffe.Net('./result/proto_test.prototxt',\
              './result/tmp.caffemodel',\
              caffe.TEST)

    dp = VQADataProvider(mode=mode,batchsize=64)
    total_questions = len(dp.getQuesIds())
    epoch = 0

    pred_list = []
    testloss_list = []
    stat_list = []

    while epoch == 0:
        t_word, t_cont, t_img_feature, t_answer, t_glove_matrix, t_qid_list, t_iid_list, epoch = dp.get_batch_vec()
        net.blobs['data'].data[...] = np.transpose(t_word,(1,0))
        net.blobs['cont'].data[...] = np.transpose(t_cont,(1,0))
        net.blobs['img_feature'].data[...] = t_img_feature
        net.blobs['label'].data[...] = t_answer
        net.blobs['glove'].data[...] = np.transpose(t_glove_matrix, (1,0,2))
        net.forward()
        t_pred_list = net.blobs['prediction'].data.argmax(axis=1)
        t_pred_str = [dp.vec_to_answer(pred_symbol) for pred_symbol in t_pred_list]
        testloss_list.append(net.blobs['loss'].data)
        for qid, iid, ans, pred in zip(t_qid_list, t_iid_list, t_answer.tolist(), t_pred_str):
            pred_list.append({u'answer':pred, u'question_id': int(dp.getStrippedQuesId(qid))})
            if visualize:
                q_list = dp.seq_to_list(dp.getQuesStr(qid))
                if mode == 'test-dev' or 'test':
                    ans_str = ''
                    ans_list = ['']*10
                else:
                    ans_str = dp.vec_to_answer(ans)
                    ans_list = [ dp.getAnsObj(qid)[i]['answer'] for i in xrange(10)]
                stat_list.append({\
                                    'qid'   : qid,
                                    'q_list' : q_list,
                                    'iid'   : iid,
                                    'answer': ans_str,
                                    'ans_list': ans_list,
                                    'pred'  : pred })
        percent = 100 * float(len(pred_list)) / total_questions
        sys.stdout.write('\r' + ('%.2f' % percent) + '%')
        sys.stdout.flush()



    mean_testloss = np.array(testloss_list).mean()

    if mode == 'val':
        valFile = './result/val2015_resfile'
        with open(valFile, 'w') as f:
            json.dump(pred_list, f)
        if visualize:
            visualize_failures(stat_list,mode)
        annFile = config.DATA_PATHS['val']['ans_file']
        quesFile = config.DATA_PATHS['val']['ques_file']
        vqa = VQA(annFile, quesFile)
        vqaRes = vqa.loadRes(valFile, quesFile)
        vqaEval = VQAEval(vqa, vqaRes, n=2)
        vqaEval.evaluate()
        acc_overall = vqaEval.accuracy['overall']
        acc_perQuestionType = vqaEval.accuracy['perQuestionType']
        acc_perAnswerType = vqaEval.accuracy['perAnswerType']
        return mean_testloss, acc_overall, acc_perQuestionType, acc_perAnswerType
    elif mode == 'test-dev':
        filename = './result/vqa_OpenEnded_mscoco_test-dev2015_v3t'+str(it).zfill(8)+'_results'
        with open(filename+'.json', 'w') as f:
            json.dump(pred_list, f)
        if visualize:
            visualize_failures(stat_list,mode)
    elif mode == 'test':
        filename = './result/vqa_OpenEnded_mscoco_test2015_v3c'+str(it).zfill(8)+'_results'
        with open(filename+'.json', 'w') as f:
            json.dump(pred_list, f)
        if visualize:
            visualize_failures(stat_list,mode)
示例#10
0
def exec_validation(device_id, mode, it='', visualize=False):

    caffe.set_device(device_id)
    caffe.set_mode_gpu()
    net = caffe.Net('./result/proto_test.prototxt',\
              './result/tmp.caffemodel',\
              caffe.TEST)

    dp = VQADataProvider(mode=mode, batchsize=64)
    total_questions = len(dp.getQuesIds())
    epoch = 0

    pred_list = []
    testloss_list = []
    stat_list = []

    while epoch == 0:
        t_word, t_cont, t_img_feature, t_answer, t_qid_list, t_iid_list, epoch = dp.get_batch_vec(
        )
        net.blobs['data'].data[...] = np.transpose(t_word, (1, 0))
        net.blobs['cont'].data[...] = np.transpose(t_cont, (1, 0))
        net.blobs['img_feature'].data[...] = t_img_feature
        net.blobs['label'].data[...] = t_answer
        net.forward()
        t_pred_list = net.blobs['prediction'].data.argmax(axis=1)
        t_pred_str = [
            dp.vec_to_answer(pred_symbol) for pred_symbol in t_pred_list
        ]
        testloss_list.append(net.blobs['loss'].data)
        for qid, iid, ans, pred in zip(t_qid_list, t_iid_list,
                                       t_answer.tolist(), t_pred_str):
            pred_list.append({
                'answer': pred,
                'question_id': int(dp.getStrippedQuesId(qid))
            })
            if visualize:
                q_list = dp.seq_to_list(dp.getQuesStr(qid))
                if mode == 'test-dev' or 'test':
                    ans_str = ''
                    ans_list = [''] * 10
                else:
                    ans_str = dp.vec_to_answer(ans)
                    ans_list = [
                        dp.getAnsObj(qid)[i]['answer'] for i in range(10)
                    ]
                stat_list.append({\
                                    'qid'   : qid,
                                    'q_list' : q_list,
                                    'iid'   : iid,
                                    'answer': ans_str,
                                    'ans_list': ans_list,
                                    'pred'  : pred })
        percent = 100 * float(len(pred_list)) / total_questions
        sys.stdout.write('\r' + ('%.2f' % percent) + '%')
        sys.stdout.flush()

    mean_testloss = np.array(testloss_list).mean()

    if mode == 'val':
        valFile = './result/val2015_resfile'
        with open(valFile, 'w') as f:
            json.dump(pred_list, f)
        if visualize:
            visualize_failures(stat_list, mode)
        annFile = config.DATA_PATHS['val']['ans_file']
        quesFile = config.DATA_PATHS['val']['ques_file']
        vqa = VQA(annFile, quesFile)
        vqaRes = vqa.loadRes(valFile, quesFile)
        vqaEval = VQAEval(vqa, vqaRes, n=2)
        vqaEval.evaluate()
        acc_overall = vqaEval.accuracy['overall']
        acc_perQuestionType = vqaEval.accuracy['perQuestionType']
        acc_perAnswerType = vqaEval.accuracy['perAnswerType']
        return mean_testloss, acc_overall, acc_perQuestionType, acc_perAnswerType
    elif mode == 'test-dev':
        filename = './result/vqa_OpenEnded_mscoco_test-dev2015_v3t' + str(
            it).zfill(8) + '_results'
        with open(filename + '.json', 'w') as f:
            json.dump(pred_list, f)
        if visualize:
            visualize_failures(stat_list, mode)
    elif mode == 'test':
        filename = './result/vqa_OpenEnded_mscoco_test2015_v3c' + str(
            it).zfill(8) + '_results'
        with open(filename + '.json', 'w') as f:
            json.dump(pred_list, f)
        if visualize:
            visualize_failures(stat_list, mode)
def generate_sentences(args):
    vqa_proto_path, exp_proto_path, adict_path, vdict_path, exp_vdict_path = \
        verify_folder(args.folder, args.use_gt)
    model_path = args.model_path

    dp = VQADataProvider(args.ques_file,
                         args.ann_file,
                         args.exp_file,
                         vdict_path,
                         adict_path,
                         exp_vdict_path,
                         args.batch_size,
                         args.data_shape,
                         args.img_feature_prefix,
                         args.max_length,
                         args.exp_max_length,
                         mode='val')
    total_questions = len(dp.getQuesIds())
    print(total_questions, 'total questions')

    caffe.set_device(args.gpu)
    caffe.set_mode_gpu()

    vqa_data_provider_layer.CURRENT_DATA_SHAPE = args.data_shape
    vqa_data_provider_layer.MAX_WORDS_IN_QUESTION = args.max_length
    vqa_data_provider_layer.MAX_WORDS_IN_EXP = args.exp_max_length

    exp_data_provider_layer.CURRENT_DATA_SHAPE = args.data_shape[0]
    exp_data_provider_layer.MAX_WORDS_IN_EXP = 1  # predict one by one

    vqa_net = caffe.Net(vqa_proto_path, args.model_path, caffe.TEST)
    exp_net = caffe.Net(exp_proto_path, args.model_path, caffe.TEST)
    print('VQA model loaded:', vqa_proto_path, args.model_path)
    print('EXP model loaded:', exp_proto_path, args.model_path)

    if not os.path.exists(args.out_dir):
        os.makedirs(args.out_dir)

    if args.save_att_map:
        att_map_save_dir = os.path.join(args.out_dir, 'att_maps')
        if not os.path.exists(att_map_save_dir):
            os.makedirs(att_map_save_dir)

    with open(args.exp_file, 'r') as f:
        exp_anno = json.load(f)

    final_results = {}
    vqa_submit_results = []
    epoch = 0
    while epoch == 0:
        qvec, cvec, ivec, avec, exp_vec, exp_vec_out, exp_cvec_1, exp_cvec_2, \
        qid_list, _, epoch = dp.get_batch_vec()
        shape = ivec.shape
        if vqa_net.blobs['img_feature'].data.shape != shape:
            vqa_net.blobs['img_feature'].reshape(*shape)
            vqa_net.blobs['data'].reshape(*np.transpose(qvec, (1, 0)).shape)
            vqa_net.blobs['cont'].reshape(*np.transpose(cont, (1, 0)).shape)
            vqa_net.blobs['label'].reshape(*avec.shape)
            vqa_net.blobs['exp'].reshape(exp_vec.transpose().shape)
            vqa_net.blobs['exp_out'].reshape(exp_vec_out.transpose().shape)
            vqa_net.blobs['exp_cont_1'].reshape(exp_cvec_1.transpose().shape)
            vqa_net.blobs['exp_cont_2'].reshape(exp_cvec_2.transpose().shape)

        vqa_net.blobs['data'].data[...] = np.transpose(qvec, (1, 0))
        vqa_net.blobs['cont'].data[...] = np.transpose(cvec, (1, 0))
        vqa_net.blobs['img_feature'].data[...] = ivec
        vqa_net.blobs['label'].data[...] = avec
        vqa_net.blobs['exp'].data[...] = exp_vec.transpose()  # not used
        vqa_net.blobs['exp_out'].data[
            ...] = exp_vec_out.transpose()  # not used
        vqa_net.blobs['exp_cont_1'].data[
            ...] = exp_cvec_1.transpose()  # not used
        vqa_net.blobs['exp_cont_2'].data[
            ...] = exp_cvec_2.transpose()  # not used

        vqa_net.forward()

        vqa_predictions = vqa_net.blobs['prediction'].data.copy()
        vqa_preds = vqa_predictions.argmax(axis=1)
        exp_att_feature = vqa_net.blobs['exp_att_feature'].data.copy()
        exp_att_feature = np.squeeze(exp_att_feature)

        vqa_att_map = vqa_net.blobs['att_map'].data.copy()
        exp_att_map = vqa_net.blobs['exp_att_map'].data.copy()
        if args.save_att_map:
            save_att_map(qid_list, exp_att_map, att_map_save_dir)

        finished = np.zeros(args.batch_size)
        predicted_words = []
        conts = []
        t = 0
        prev_word = exp_vec[:, 0].reshape(
            (1, args.batch_size))  # Initialize with <SOS>
        continuation = np.zeros(
            (1, args.batch_size))  # flush out for the first word

        while finished.sum() != args.batch_size and t < args.exp_max_length:
            shape = exp_att_feature.shape
            if exp_net.blobs['exp_att_feature'].data.shape != shape:
                exp_net.blob['exp_att_feature'].reshape(*shape)
                exp_net.blobs['exp'].reshape(*prev_word.shape)
                exp_net.blobs['exp_out'].reshape(1, args.batch_size)
                exp_net.blobs['exp_cont_1'].reshape(1, args.batch_size)
                exp_net.blobs['exp_cont_2'].reshape(1, args.batch_size)

            exp_net.blobs['exp_att_feature'].data[...] = exp_att_feature
            exp_net.blobs['exp'].data[...] = prev_word
            exp_net.blobs['exp_out'].data[...] = exp_vec_out[:, t].reshape(
                (1, args.batch_size))
            exp_net.blobs['exp_cont_1'].data[...] = continuation
            exp_net.blobs['exp_cont_2'].data[...] = continuation
            exp_net.forward()
            predicted_word = exp_net.blobs['exp_prediction'].data.copy()
            predicted_word = np.squeeze(predicted_word.argmax(axis=2))

            completed = np.where(predicted_word == 0)
            finished[completed] = 1
            predicted_words.append(predicted_word)
            conts.append(continuation)
            prev_word = predicted_word.reshape((1, args.batch_size))
            continuation = (finished != 1).astype(np.int32).reshape(
                (1, args.batch_size))
            t += 1

        predicted_words = np.array(predicted_words).transpose()
        conts = np.array(conts).transpose()

        r_vdict = reverse(dp.vdict)
        r_adict = reverse(dp.adict)
        r_exp_vdict = reverse(dp.exp_vdict)

        questions_str = batch_to_str('q', qvec, cvec, r_vdict, r_adict,
                                     r_exp_vdict)
        answers_str = batch_to_str('a', avec, np.ones_like(avec), r_vdict,
                                   r_adict, r_exp_vdict)
        pred_str = batch_to_str('a', vqa_preds, np.ones_like(vqa_preds),
                                r_vdict, r_adict, r_exp_vdict)
        generated_str = batch_to_str('exp', predicted_words, conts, r_vdict,
                                     r_adict, r_exp_vdict)

        for qid, qstr, ans, pred, expl, vqa_att, exp_att in zip(
                qid_list, questions_str, answers_str, pred_str, generated_str,
                vqa_att_map, exp_att_map):
            if ans == '':
                ans = UNK
            final_results[qid] = {
                'qstr': qstr,
                'ans': ans,
                'exp': expl,
                'pred': pred
            }
            vqa_submit_results.append({
                u'answer': pred,
                u'question_id': int(qid)
            })

    with open(os.path.join(args.out_dir, 'exp_results.json'), 'w') as f:
        json.dump(final_results, f)
    with open(os.path.join(args.out_dir, 'vqa_results.json'), 'w') as f:
        json.dump(vqa_submit_results, f)