示例#1
0
def create_question_explain_dictionary(dataroot, thres):
    dictionary = Dictionary()
    counter = Counter()
    question_files = [
        'v2_OpenEnded_mscoco_train2014_questions.json',
        'v2_OpenEnded_mscoco_val2014_questions.json',
        'v2_OpenEnded_mscoco_test2015_questions.json',
        'v2_OpenEnded_mscoco_test-dev2015_questions.json'
    ]
    explain_files = [
        'VQA-E_train_set.json',
        'VQA-E_val_set.json',
    ]
    for path in explain_files:
        explain_path = os.path.join(dataroot, path)
        es = json.load(open(explain_path))
        for e in es:
            counter.update(dictionary.word_token(e['explanation'][0]))

    dictionary.add_word('<pad>')
    dictionary.add_word('<start>')
    dictionary.add_word('<end>')
    dictionary.add_word('<unk>')
    for word, cnt in counter.items():
        if cnt >= thres:
            dictionary.add_word(word)
    for path in question_files:
        question_path = os.path.join(dataroot, path)
        qs = json.load(open(question_path))['questions']
        for q in qs:
            dictionary.tokenize(q['question'], True)

    return dictionary
示例#2
0
def create_explain_dictionary(dataroot, thres):
    dictionary = Dictionary()
    counter = Counter()
    files = [
        'VQA-E_train_set.json',
        'VQA-E_val_set.json',
    ]
    for path in files:
        explain_path = os.path.join(dataroot, path)
        es = json.load(open(explain_path))
        for e in es:
            counter.update(dictionary.word_token(e['explanation'][0]))

    dictionary.add_word('<pad>')
    dictionary.add_word('<start>')
    dictionary.add_word('<end>')
    dictionary.add_word('<unk>')
    for word, cnt in counter.items():
        if cnt >= thres:
            dictionary.add_word(word)
    return dictionary
示例#3
0
def create_caption_dictionary(dataroot, thres):
    dictionary = Dictionary()
    counter = Counter()
    files = [
        'captions_train2014.json',
        'captions_val2014.json',
    ]
    for path in files:
        caption_path = os.path.join(dataroot, path)
        qs = json.load(open(caption_path))['annotations']
        for q in qs:
            counter.update(dictionary.word_token(q['caption']))

    dictionary.add_word('<pad>')
    dictionary.add_word('<start>')
    dictionary.add_word('<end>')
    dictionary.add_word('<unk>')
    for word, cnt in counter.items():
        if cnt >= thres:
            dictionary.add_word(word)
    return dictionary
示例#4
0
def create_VQAX_explain_dictionary(dataroot, thres):
    dictionary = Dictionary()
    counter = Counter()
    files = [
        'train_exp_anno.json',
        'val_exp_anno.json',
        'test_exp_anno.json',
    ]
    for path in files:
        explain_path = os.path.join(dataroot, path)
        es = json.load(open(explain_path))
        for e in es.items():
            for E in e[1]:
                counter.update(dictionary.word_token(E))

    dictionary.add_word('<pad>')
    dictionary.add_word('<start>')
    dictionary.add_word('<end>')
    dictionary.add_word('<unk>')
    for word, cnt in counter.items():
        if cnt >= thres:
            dictionary.add_word(word)
    return dictionary