示例#1
0
def main():
    questions = utils.path_for(train=True, question=True)
    answers = utils.path_for(train=True, answer=True)

    with open(questions, 'r') as fd:
        questions = json.load(fd)
    with open(answers, 'r') as fd:
        answers = json.load(fd)

    questions = data.prepare_questions(questions)
    answers = data.prepare_answers(answers)

    question_vocab, question_vocabi = extract_vocab(questions, start=1)
    answer_vocab, answer_vocabi = extract_vocab(answers,
                                                top_k=config.max_answers)

    vocabs = {
        'question': question_vocab,
        'answer': answer_vocab,
    }
    with open(config.vocabulary_path, 'w') as fd:
        json.dump(vocabs, fd)

    print(answer_vocabi)
    vocabsi = {
        'answeri': answer_vocabi,
    }
    with open(config.vocabularyi_path, 'w') as fd:
        json.dump(vocabsi, fd)
示例#2
0
def main():
    annotations_path = utils.path_for_annotations(train=True)

    with open(annotations_path, 'r') as fd:
        annotations_json = json.load(fd)

    questions = data.prepare_questions(annotations_json)
    answers = data.prepare_answers(annotations_json)

    question_vocab = extract_vocab(questions, start=1)
    answer_vocab = extract_vocab(answers, top_k=config.max_answers)

    vocabs = {
        'question': question_vocab,
        'answer': answer_vocab,
    }
    with open(config.vocabulary_path, 'w') as fd:
        json.dump(vocabs, fd)
示例#3
0
def main():
    questions = utils.path_for(train=True, question=True)
    answers = utils.path_for(train=True, answer=True)

    with open(questions, 'r', encoding='utf-8') as fd:
        questions = json.load(fd)
    with open(answers, 'r', encoding='utf-8') as fd:
        answers = json.load(fd)

    questions = data.prepare_questions(questions)
    answers = data.prepare_answers(answers)

    question_vocab = extract_vocab(questions, start=1)
    answer_vocab = extract_vocab(answers, top_k=config.max_answers)

    vocabs = {
        'question': question_vocab,
        'answer': answer_vocab,
    }
    with open(config.vocabulary_path, 'w', encoding='utf-8') as fd:
        json.dump(vocabs, fd, ensure_ascii=False)
示例#4
0
def main():
    questions = os.path.join(config.qa_path,
                             'v2_OpenEnded_mscoco_train2014_questions.json')
    answers = os.path.join(config.qa_path,
                           'v2_mscoco_train2014_annotations.json')

    with open(questions, 'r') as fd:
        questions = json.load(fd)
    with open(answers, 'r') as fd:
        answers = json.load(fd)

    questions = list(data.prepare_questions(questions))
    answers = list(data.prepare_answers(answers))

    question_vocab = extract_vocab(questions, start=1)
    answer_vocab = extract_vocab(answers, top_k=config.max_answers)

    vocabs = {
        'question': question_vocab,
        'answer': answer_vocab,
    }
    with open(config.vocabulary_path, 'w') as fd:
        json.dump(vocabs, fd)
示例#5
0
def main():
    annotations_path = utils.path_for_annotations(val=True)

    with open(annotations_path, 'r') as fd:
        annotations_json = json.load(fd)

    answers = data.prepare_answers(annotations_json)

    all_tokens = itertools.chain.from_iterable(answers)
    counter = Counter(all_tokens)
    total_num_answers = sum(counter.values())
    print("total # of answers:", total_num_answers)
    print("total # of distinct answers:", len(counter))
    most_common = counter.most_common()
    print("10 most common answers and counts", most_common[:10])
    counts = [pair[1] for pair in most_common]
    cdf = np.cumsum(counts) / total_num_answers
    plt.plot(list(range(1, len(most_common) + 1)), cdf)
    plt.xlabel("Top N answers")
    plt.ylabel("Proportion of all answers")
    plt.title("Distribution of Answers")
    plt.savefig('temp.jpg')
    for i, count in enumerate(counts):
        if count == 1:
            print(i)
            break
    print(f"counter[{i-1}]:", counts[i - 1], cdf[i - 1])
    print(f"counter[{i}]:", counts[i], cdf[i])
    print(cdf[1000])
    for pair in most_common[:100]:
        print(pair[0], pair[1], pair[1] / total_num_answers)
    num_color_answers = 0
    for color in colors.colors:
        if color in counter:
            num_color_answers += counter[color]
    print("num color answers", num_color_answers,
          num_color_answers / total_num_answers)