def create_question_explain_dictionary(dataroot, thres): dictionary = Dictionary() counter = Counter() question_files = [ 'v2_OpenEnded_mscoco_train2014_questions.json', 'v2_OpenEnded_mscoco_val2014_questions.json', 'v2_OpenEnded_mscoco_test2015_questions.json', 'v2_OpenEnded_mscoco_test-dev2015_questions.json' ] explain_files = [ 'VQA-E_train_set.json', 'VQA-E_val_set.json', ] for path in explain_files: explain_path = os.path.join(dataroot, path) es = json.load(open(explain_path)) for e in es: counter.update(dictionary.word_token(e['explanation'][0])) dictionary.add_word('<pad>') dictionary.add_word('<start>') dictionary.add_word('<end>') dictionary.add_word('<unk>') for word, cnt in counter.items(): if cnt >= thres: dictionary.add_word(word) for path in question_files: question_path = os.path.join(dataroot, path) qs = json.load(open(question_path))['questions'] for q in qs: dictionary.tokenize(q['question'], True) return dictionary
def create_explain_dictionary(dataroot, thres): dictionary = Dictionary() counter = Counter() files = [ 'VQA-E_train_set.json', 'VQA-E_val_set.json', ] for path in files: explain_path = os.path.join(dataroot, path) es = json.load(open(explain_path)) for e in es: counter.update(dictionary.word_token(e['explanation'][0])) dictionary.add_word('<pad>') dictionary.add_word('<start>') dictionary.add_word('<end>') dictionary.add_word('<unk>') for word, cnt in counter.items(): if cnt >= thres: dictionary.add_word(word) return dictionary
def create_caption_dictionary(dataroot, thres): dictionary = Dictionary() counter = Counter() files = [ 'captions_train2014.json', 'captions_val2014.json', ] for path in files: caption_path = os.path.join(dataroot, path) qs = json.load(open(caption_path))['annotations'] for q in qs: counter.update(dictionary.word_token(q['caption'])) dictionary.add_word('<pad>') dictionary.add_word('<start>') dictionary.add_word('<end>') dictionary.add_word('<unk>') for word, cnt in counter.items(): if cnt >= thres: dictionary.add_word(word) return dictionary
def create_VQAX_explain_dictionary(dataroot, thres): dictionary = Dictionary() counter = Counter() files = [ 'train_exp_anno.json', 'val_exp_anno.json', 'test_exp_anno.json', ] for path in files: explain_path = os.path.join(dataroot, path) es = json.load(open(explain_path)) for e in es.items(): for E in e[1]: counter.update(dictionary.word_token(E)) dictionary.add_word('<pad>') dictionary.add_word('<start>') dictionary.add_word('<end>') dictionary.add_word('<unk>') for word, cnt in counter.items(): if cnt >= thres: dictionary.add_word(word) return dictionary