def create_dictionary(dataroot):
    dictionary = Dictionary()
    questions = []
    files = [
        'v2_OpenEnded_mscoco_train2014_questions.json',
        'v2_OpenEnded_mscoco_val2014_questions.json',
        'v2_OpenEnded_mscoco_test2015_questions.json',
        'v2_OpenEnded_mscoco_test-dev2015_questions.json'
    ]
    for path in files:
        question_path = os.path.join(dataroot, path)
        qs = json.load(open(question_path))['questions']
        for q in qs:
            dictionary.tokenize(q['question'], True)

    print('words coming from vqa ', dictionary.__len__())

    #add all collected words from imsitu. contains both overlaps with vqa as well as new words
    imsitu_words_path = os.path.join(
        dataroot, 'allnverbs_imsitu_words_nl2vqamatching.json')
    imsitu_words = json.load(open(imsitu_words_path))

    for label, eng_name in imsitu_words.items():
        dictionary.tokenize(eng_name, True)

    print(' with words coming from imsitu ', dictionary.__len__())

    return dictionary
def create_dictionary(dataroot):
    dictionary = Dictionary()
    questions = []
    files = ['imsitu_questions_prev.json']

    for path in files:
        question_path = os.path.join(dataroot, path)
        q_data = json.load(open(question_path))

        for verb, values in q_data.items():
            roles = values['roles']
            for role, info in roles.items():
                question = info['question']
                dictionary.tokenize(question, True)

    #add all collected words from imsitu. contains both overlaps with vqa as well as new words
    imsitu_words_path = os.path.join(
        dataroot, 'allnverbsall_imsitu_words_nl2glovematching.json')
    imsitu_words = json.load(open(imsitu_words_path))

    for label, eng_name in imsitu_words.items():
        dictionary.tokenize(eng_name, True)

    print(' with words coming from imsitu ', dictionary.__len__())

    return dictionary