Пример #1
0
    def testCalculateStats(self):
        stats = results_pb2.Stats()
        stats.true_positives = 12
        stats.false_positives = 8
        stats.false_negatives = 3
        eval_lib.calculate_stats(stats)
        self.assertAlmostEqual(.6, stats.precision)
        self.assertAlmostEqual(.8, stats.recall)
        self.assertAlmostEqual(.6857142857142856, stats.f_score)

        stats = results_pb2.Stats()
        eval_lib.calculate_stats(stats)
        self.assertTrue(math.isnan(stats.precision))
        self.assertTrue(math.isnan(stats.recall))
        self.assertTrue(math.isnan(stats.f_score))
        self.assertEqual(
            'Precision has denominator of zero. Recall has denominator of zero. '
            'f-score is NaN', stats.error_message)
Пример #2
0
    def to_results_proto(self):
        """Convert to results_pb2.Results."""
        results = results_pb2.Results()
        eval_lib.calculate_stats(self.strict_entity_matching.micro)
        results.strict_entity_matching_results.micro_average_results.CopyFrom(
            self.strict_entity_matching.micro)
        results.strict_entity_matching_results.macro_average_results.CopyFrom(
            self.strict_entity_matching.macro.calculate_stats())
        r = results.strict_entity_matching_results.per_type_micro_average_results
        r.extend(self.strict_entity_matching.per_type_protos())

        eval_lib.calculate_stats(self.binary_token_matching.typeless_micro)
        results.binary_token_matching_results.micro_average_results.CopyFrom(
            self.binary_token_matching.typeless_micro)
        results.binary_token_matching_results.macro_average_results.CopyFrom(
            self.binary_token_matching.typeless_macro.calculate_stats())
        results.binary_token_matching_results.per_type_micro_average_results.extend(
            self.binary_token_matching.per_type_protos())

        return results
Пример #3
0
    def testAccumulatedResultsAdd(self):
        result1 = eval_lib.IndividualResult()
        result1.stats.true_positives = 30
        result1.stats.false_positives = 20
        result1.stats.false_negatives = 10
        result1.per_type['TypeA'].true_positives = 9
        result1.per_type['TypeA'].false_positives = 8
        result1.per_type['TypeA'].false_negatives = 7
        result1.per_type['TypeB'].true_positives = 6
        result1.per_type['TypeB'].false_positives = 5
        result1.per_type['TypeB'].false_negatives = 4
        result1.typeless.true_positives = 15
        result1.typeless.false_positives = 14
        result1.typeless.false_negatives = 13
        eval_lib.calculate_stats(result1.stats)
        eval_lib.calculate_stats(result1.typeless)

        result2 = eval_lib.IndividualResult()
        result2.stats.true_positives = 3
        result2.stats.false_positives = 2
        result2.stats.false_negatives = 1
        result2.per_type['TypeA'].true_positives = 19
        result2.per_type['TypeA'].false_positives = 18
        result2.per_type['TypeA'].false_negatives = 17
        result2.per_type['TypeB'].true_positives = 16
        result2.per_type['TypeB'].false_positives = 15
        result2.per_type['TypeB'].false_negatives = 14
        result2.typeless.true_positives = 13
        result2.typeless.false_positives = 12
        result2.typeless.false_negatives = 11
        eval_lib.calculate_stats(result2.stats)
        eval_lib.calculate_stats(result2.typeless)

        ar = eval_lib.AccumulatedResults()
        ar.add_result(result1)
        ar.add_result(result2)

        ar1 = eval_lib.AccumulatedResults()
        ar1.add_result(result1)

        ar2 = eval_lib.AccumulatedResults()
        ar2.add_result(result2)

        ar_sum = ar1 + ar2
        self.assertEqual(ar.micro, ar_sum.micro)
        self.assertEqual(ar.macro.calculate_stats(),
                         ar_sum.macro.calculate_stats())
        self.assertEqual(ar.per_type, ar_sum.per_type)
        self.assertEqual(ar.typeless_micro, ar_sum.typeless_micro)
        self.assertEqual(ar.typeless_macro.calculate_stats(),
                         ar_sum.typeless_macro.calculate_stats())
Пример #4
0
    def testPickle(self):
        individual_result = eval_lib.IndividualResult()
        individual_result.stats.true_positives = 30
        individual_result.stats.false_positives = 20
        individual_result.stats.false_negatives = 10
        individual_result.per_type['TypeA'].true_positives = 9
        individual_result.per_type['TypeA'].false_positives = 8
        individual_result.per_type['TypeA'].false_negatives = 7
        individual_result.per_type['TypeB'].true_positives = 6
        individual_result.per_type['TypeB'].false_positives = 5
        individual_result.per_type['TypeB'].false_negatives = 4
        individual_result.typeless.true_positives = 15
        individual_result.typeless.false_positives = 14
        individual_result.typeless.false_negatives = 13
        eval_lib.calculate_stats(individual_result.stats)
        eval_lib.calculate_stats(individual_result.typeless)

        pickled = pickle.dumps(individual_result)
        unpickled = pickle.loads(pickled)
        self.assertEqual(individual_result.record_id, unpickled.record_id)
        self.assertEqual(individual_result.per_type, unpickled.per_type)
        self.assertEqual(normalize_floats(individual_result.stats),
                         normalize_floats(unpickled.stats))
        self.assertEqual(normalize_floats(individual_result.typeless),
                         normalize_floats(unpickled.typeless))
        self.assertEqual(individual_result.debug_info, unpickled.debug_info)

        ar = eval_lib.AccumulatedResults()
        ar.add_result(individual_result)

        pickled = pickle.dumps(ar)
        unpickled = pickle.loads(pickled)

        self.assertEqual(ar.micro, unpickled.micro)
        self.assertEqual(ar.macro.calculate_stats(),
                         unpickled.macro.calculate_stats())
        self.assertEqual(ar.per_type, unpickled.per_type)
        self.assertEqual(ar.typeless_micro, unpickled.typeless_micro)
        self.assertEqual(ar.typeless_macro.calculate_stats(),
                         unpickled.typeless_macro.calculate_stats())
Пример #5
0
    def testAccumulateResults(self):
        result1 = eval_lib.IndividualResult()
        result1.stats.true_positives = 30
        result1.stats.false_positives = 20
        result1.stats.false_negatives = 10
        result1.per_type['TypeA'].true_positives = 9
        result1.per_type['TypeA'].false_positives = 8
        result1.per_type['TypeA'].false_negatives = 7
        result1.per_type['TypeB'].true_positives = 6
        result1.per_type['TypeB'].false_positives = 5
        result1.per_type['TypeB'].false_negatives = 4
        result1.typeless.true_positives = 15
        result1.typeless.false_positives = 14
        result1.typeless.false_negatives = 13
        eval_lib.calculate_stats(result1.stats)
        eval_lib.calculate_stats(result1.typeless)

        result2 = eval_lib.IndividualResult()
        result2.stats.true_positives = 3
        result2.stats.false_positives = 2
        result2.stats.false_negatives = 1
        result2.per_type['TypeA'].true_positives = 19
        result2.per_type['TypeA'].false_positives = 18
        result2.per_type['TypeA'].false_negatives = 17
        result2.per_type['TypeB'].true_positives = 16
        result2.per_type['TypeB'].false_positives = 15
        result2.per_type['TypeB'].false_negatives = 14
        result2.typeless.true_positives = 13
        result2.typeless.false_positives = 12
        result2.typeless.false_negatives = 11
        eval_lib.calculate_stats(result2.stats)
        eval_lib.calculate_stats(result2.typeless)

        ar = eval_lib.AccumulatedResults()
        ar.add_result(result1)
        ar.add_result(result2)

        expected_micro = results_pb2.Stats()
        expected_micro.true_positives = 33
        expected_micro.false_positives = 22
        expected_micro.false_negatives = 11
        self.assertEqual(expected_micro, ar.micro)

        expected_macro = results_pb2.Stats()
        expected_macro.precision = 0.6
        expected_macro.recall = 0.75
        expected_macro.f_score = 0.666667
        self.assertEqual(normalize_floats(expected_macro),
                         normalize_floats(ar.macro.calculate_stats()))

        expected_type_a = results_pb2.Stats()
        expected_type_a.true_positives = 28
        expected_type_a.false_positives = 26
        expected_type_a.false_negatives = 24
        expected_type_b = results_pb2.Stats()
        expected_type_b.true_positives = 22
        expected_type_b.false_positives = 20
        expected_type_b.false_negatives = 18
        expected_per_type = {
            'TypeA': expected_type_a,
            'TypeB': expected_type_b
        }
        self.assertEqual(expected_per_type, ar.per_type)

        expected_typeless_micro = results_pb2.Stats()
        expected_typeless_micro.true_positives = 28
        expected_typeless_micro.false_positives = 26
        expected_typeless_micro.false_negatives = 24
        self.assertEqual(expected_typeless_micro, ar.typeless_micro)

        expected_typeless_macro = results_pb2.Stats()
        expected_typeless_macro.precision = 0.518621
        expected_typeless_macro.recall = 0.53869
        expected_typeless_macro.f_score = 0.528465
        self.assertEqual(normalize_floats(expected_typeless_macro),
                         normalize_floats(ar.typeless_macro.calculate_stats()))