Python Evaluator.F1Single示例

                    predictions[qa['id']] = qa['answers'].pop(1)['text']

    evaluator = Evaluator(articles=articles)
    print 'Exact match:', round(evaluator.ExactMatch(predictions), 1)
    print 'F1:', round(evaluator.F1(predictions), 1)
    total_num_same_count = sum(num_same_counts.values())
    for num_same, count in sorted(num_same_counts.items()):
        print num_same, 'same:', round(100.0 * count / total_num_same_count, 1)

    with open('dataset/dev-answertypetags.json') as fileobj:
        tags = json.loads(fileobj.read())

    print len(tags), 'tagged questions'
    for tag, _ in Counter(tags.values()).most_common():
        num_correct = 0
        total_f1 = 0
        num_total = 0
        for question_id, _ in filter(lambda x: x[1] == tag, tags.items()):
            num_total += 1
            predicted_answer = predictions.get(question_id, None)
            if predicted_answer is not None:
                if evaluator.ExactMatchSingle(question_id, predicted_answer):
                    num_correct += 1
                total_f1 += evaluator.F1Single(question_id, predicted_answer)

        print str(round(100.0 * num_total / len(tags),
                        1)) + '%', tag, 'questions, exact match', str(
                            round(100.0 * num_correct / num_total,
                                  1)) + '%', ', F1', round(
                                      100.0 * total_f1 / num_total, 1)

示例#2

显示文件

    with open(jsonDataFile, "r") as fp:
        human_articles = json.load(fp)['data']
    for article in human_articles:
        for paragraph in article['paragraphs']:
            for qa in paragraph['qas']:
                if len(qa['answers']) > 1:
                    human_predictions[qa['id']] = qa['answers'].pop(1)['text']
    human_evaluator = Evaluator(articles=human_articles)

    for dist in sorted(editDistGroup.keys()):
        total_f1 = 0
        total_human_f1 = 0
        num_q = 0
        num_human = 0
        for qaId, _ in editDistGroup[dist]:
            total_f1 += 100.0 * evaluator.F1Single(qaId, predDict[qaId])
            num_q += 1
            if qaId in human_predictions:
                total_human_f1 += 100.0 * human_evaluator.F1Single(
                    qaId, human_predictions[qaId])
                num_human += 1
        exactMatchRate = 0
        F1 = total_f1 / num_q
        exactMatchRateList.append(exactMatchRate)
        F1List.append(F1)
        HumanF1List.append(total_human_f1 / num_human)
        print total_human_f1 / num_human
        print "edit dist ", dist
        print "number of sample ", len(editDistGroup[dist])
        print "exact match ", exactMatchRate
        print "F1 ", F1