def testCalculateStats(self): stats = results_pb2.Stats() stats.true_positives = 12 stats.false_positives = 8 stats.false_negatives = 3 eval_lib.calculate_stats(stats) self.assertAlmostEqual(.6, stats.precision) self.assertAlmostEqual(.8, stats.recall) self.assertAlmostEqual(.6857142857142856, stats.f_score) stats = results_pb2.Stats() eval_lib.calculate_stats(stats) self.assertTrue(math.isnan(stats.precision)) self.assertTrue(math.isnan(stats.recall)) self.assertTrue(math.isnan(stats.f_score)) self.assertEqual( 'Precision has denominator of zero. Recall has denominator of zero. ' 'f-score is NaN', stats.error_message)
def to_results_proto(self): """Convert to results_pb2.Results.""" results = results_pb2.Results() eval_lib.calculate_stats(self.strict_entity_matching.micro) results.strict_entity_matching_results.micro_average_results.CopyFrom( self.strict_entity_matching.micro) results.strict_entity_matching_results.macro_average_results.CopyFrom( self.strict_entity_matching.macro.calculate_stats()) r = results.strict_entity_matching_results.per_type_micro_average_results r.extend(self.strict_entity_matching.per_type_protos()) eval_lib.calculate_stats(self.binary_token_matching.typeless_micro) results.binary_token_matching_results.micro_average_results.CopyFrom( self.binary_token_matching.typeless_micro) results.binary_token_matching_results.macro_average_results.CopyFrom( self.binary_token_matching.typeless_macro.calculate_stats()) results.binary_token_matching_results.per_type_micro_average_results.extend( self.binary_token_matching.per_type_protos()) return results
def testAccumulatedResultsAdd(self): result1 = eval_lib.IndividualResult() result1.stats.true_positives = 30 result1.stats.false_positives = 20 result1.stats.false_negatives = 10 result1.per_type['TypeA'].true_positives = 9 result1.per_type['TypeA'].false_positives = 8 result1.per_type['TypeA'].false_negatives = 7 result1.per_type['TypeB'].true_positives = 6 result1.per_type['TypeB'].false_positives = 5 result1.per_type['TypeB'].false_negatives = 4 result1.typeless.true_positives = 15 result1.typeless.false_positives = 14 result1.typeless.false_negatives = 13 eval_lib.calculate_stats(result1.stats) eval_lib.calculate_stats(result1.typeless) result2 = eval_lib.IndividualResult() result2.stats.true_positives = 3 result2.stats.false_positives = 2 result2.stats.false_negatives = 1 result2.per_type['TypeA'].true_positives = 19 result2.per_type['TypeA'].false_positives = 18 result2.per_type['TypeA'].false_negatives = 17 result2.per_type['TypeB'].true_positives = 16 result2.per_type['TypeB'].false_positives = 15 result2.per_type['TypeB'].false_negatives = 14 result2.typeless.true_positives = 13 result2.typeless.false_positives = 12 result2.typeless.false_negatives = 11 eval_lib.calculate_stats(result2.stats) eval_lib.calculate_stats(result2.typeless) ar = eval_lib.AccumulatedResults() ar.add_result(result1) ar.add_result(result2) ar1 = eval_lib.AccumulatedResults() ar1.add_result(result1) ar2 = eval_lib.AccumulatedResults() ar2.add_result(result2) ar_sum = ar1 + ar2 self.assertEqual(ar.micro, ar_sum.micro) self.assertEqual(ar.macro.calculate_stats(), ar_sum.macro.calculate_stats()) self.assertEqual(ar.per_type, ar_sum.per_type) self.assertEqual(ar.typeless_micro, ar_sum.typeless_micro) self.assertEqual(ar.typeless_macro.calculate_stats(), ar_sum.typeless_macro.calculate_stats())
def testPickle(self): individual_result = eval_lib.IndividualResult() individual_result.stats.true_positives = 30 individual_result.stats.false_positives = 20 individual_result.stats.false_negatives = 10 individual_result.per_type['TypeA'].true_positives = 9 individual_result.per_type['TypeA'].false_positives = 8 individual_result.per_type['TypeA'].false_negatives = 7 individual_result.per_type['TypeB'].true_positives = 6 individual_result.per_type['TypeB'].false_positives = 5 individual_result.per_type['TypeB'].false_negatives = 4 individual_result.typeless.true_positives = 15 individual_result.typeless.false_positives = 14 individual_result.typeless.false_negatives = 13 eval_lib.calculate_stats(individual_result.stats) eval_lib.calculate_stats(individual_result.typeless) pickled = pickle.dumps(individual_result) unpickled = pickle.loads(pickled) self.assertEqual(individual_result.record_id, unpickled.record_id) self.assertEqual(individual_result.per_type, unpickled.per_type) self.assertEqual(normalize_floats(individual_result.stats), normalize_floats(unpickled.stats)) self.assertEqual(normalize_floats(individual_result.typeless), normalize_floats(unpickled.typeless)) self.assertEqual(individual_result.debug_info, unpickled.debug_info) ar = eval_lib.AccumulatedResults() ar.add_result(individual_result) pickled = pickle.dumps(ar) unpickled = pickle.loads(pickled) self.assertEqual(ar.micro, unpickled.micro) self.assertEqual(ar.macro.calculate_stats(), unpickled.macro.calculate_stats()) self.assertEqual(ar.per_type, unpickled.per_type) self.assertEqual(ar.typeless_micro, unpickled.typeless_micro) self.assertEqual(ar.typeless_macro.calculate_stats(), unpickled.typeless_macro.calculate_stats())
def testAccumulateResults(self): result1 = eval_lib.IndividualResult() result1.stats.true_positives = 30 result1.stats.false_positives = 20 result1.stats.false_negatives = 10 result1.per_type['TypeA'].true_positives = 9 result1.per_type['TypeA'].false_positives = 8 result1.per_type['TypeA'].false_negatives = 7 result1.per_type['TypeB'].true_positives = 6 result1.per_type['TypeB'].false_positives = 5 result1.per_type['TypeB'].false_negatives = 4 result1.typeless.true_positives = 15 result1.typeless.false_positives = 14 result1.typeless.false_negatives = 13 eval_lib.calculate_stats(result1.stats) eval_lib.calculate_stats(result1.typeless) result2 = eval_lib.IndividualResult() result2.stats.true_positives = 3 result2.stats.false_positives = 2 result2.stats.false_negatives = 1 result2.per_type['TypeA'].true_positives = 19 result2.per_type['TypeA'].false_positives = 18 result2.per_type['TypeA'].false_negatives = 17 result2.per_type['TypeB'].true_positives = 16 result2.per_type['TypeB'].false_positives = 15 result2.per_type['TypeB'].false_negatives = 14 result2.typeless.true_positives = 13 result2.typeless.false_positives = 12 result2.typeless.false_negatives = 11 eval_lib.calculate_stats(result2.stats) eval_lib.calculate_stats(result2.typeless) ar = eval_lib.AccumulatedResults() ar.add_result(result1) ar.add_result(result2) expected_micro = results_pb2.Stats() expected_micro.true_positives = 33 expected_micro.false_positives = 22 expected_micro.false_negatives = 11 self.assertEqual(expected_micro, ar.micro) expected_macro = results_pb2.Stats() expected_macro.precision = 0.6 expected_macro.recall = 0.75 expected_macro.f_score = 0.666667 self.assertEqual(normalize_floats(expected_macro), normalize_floats(ar.macro.calculate_stats())) expected_type_a = results_pb2.Stats() expected_type_a.true_positives = 28 expected_type_a.false_positives = 26 expected_type_a.false_negatives = 24 expected_type_b = results_pb2.Stats() expected_type_b.true_positives = 22 expected_type_b.false_positives = 20 expected_type_b.false_negatives = 18 expected_per_type = { 'TypeA': expected_type_a, 'TypeB': expected_type_b } self.assertEqual(expected_per_type, ar.per_type) expected_typeless_micro = results_pb2.Stats() expected_typeless_micro.true_positives = 28 expected_typeless_micro.false_positives = 26 expected_typeless_micro.false_negatives = 24 self.assertEqual(expected_typeless_micro, ar.typeless_micro) expected_typeless_macro = results_pb2.Stats() expected_typeless_macro.precision = 0.518621 expected_typeless_macro.recall = 0.53869 expected_typeless_macro.f_score = 0.528465 self.assertEqual(normalize_floats(expected_typeless_macro), normalize_floats(ar.typeless_macro.calculate_stats()))