def create_dictionary(dataroot): dictionary = Dictionary() questions = [] files = [ 'v2_OpenEnded_mscoco_train2014_questions.json', 'v2_OpenEnded_mscoco_val2014_questions.json', 'v2_OpenEnded_mscoco_test2015_questions.json', 'v2_OpenEnded_mscoco_test-dev2015_questions.json' ] for path in files: question_path = os.path.join(dataroot, path) qs = json.load(open(question_path))['questions'] for q in qs: dictionary.tokenize(q['question'], True) print('words coming from vqa ', dictionary.__len__()) #add all collected words from imsitu. contains both overlaps with vqa as well as new words imsitu_words_path = os.path.join( dataroot, 'allnverbs_imsitu_words_nl2vqamatching.json') imsitu_words = json.load(open(imsitu_words_path)) for label, eng_name in imsitu_words.items(): dictionary.tokenize(eng_name, True) print(' with words coming from imsitu ', dictionary.__len__()) return dictionary
def create_dictionary(dataroot): dictionary = Dictionary() questions = [] files = ['imsitu_questions_prev.json'] for path in files: question_path = os.path.join(dataroot, path) q_data = json.load(open(question_path)) for verb, values in q_data.items(): roles = values['roles'] for role, info in roles.items(): question = info['question'] dictionary.tokenize(question, True) #add all collected words from imsitu. contains both overlaps with vqa as well as new words imsitu_words_path = os.path.join( dataroot, 'allnverbsall_imsitu_words_nl2glovematching.json') imsitu_words = json.load(open(imsitu_words_path)) for label, eng_name in imsitu_words.items(): dictionary.tokenize(eng_name, True) print(' with words coming from imsitu ', dictionary.__len__()) return dictionary