def test_perfect_match(self): ret = dummy_format_factory(['zero']) rel = dummy_format_factory(['zero']) # args: TextOnlyEvaluator(retrieved, relevant) e = TextOnlyEvaluator(ret, rel) r = e.get_eval_results() self.assertAlmostEqual(r.precision, 1) self.assertAlmostEqual(r.recall, 1) self.assertAlmostEqual(r.f1_score, 1)
def test_missmatch(self): ret = dummy_format_factory(['one', 'four']) rel = dummy_format_factory(['two', 'three']) # args: TextOnlyEvaluator(retrieved, relevant) e = TextOnlyEvaluator(ret, rel) r = e.get_eval_results() self.assertEqual(r.precision, 0) self.assertEqual(r.recall, 0) self.assertTrue(math.isinf(r.f1_score))
def test_match(self): ret = dummy_format_factory(['zero', 'one', 'two', 'four']) rel = dummy_format_factory(['one', 'two', 'three']) # args: TextOnlyEvaluator(retrieved, relevant) e = TextOnlyEvaluator(ret, rel) r = e.get_eval_results() self.assertAlmostEqual(r.precision, 0.5) self.assertAlmostEqual(r.recall, 0.6666, delta=0.0001) self.assertAlmostEqual(r.f1_score, 0.5714, delta=0.001)
def test_both_empty(self): ret = dummy_format_factory([]) rel = dummy_format_factory([]) # args: TextOnlyEvaluator(retrieved, relevant) e = TextOnlyEvaluator(ret, rel) r = e.get_eval_results() self.assertTrue(math.isinf(r.precision)) self.assertTrue(math.isinf(r.recall)) self.assertTrue(math.isnan(r.f1_score))
def test_match(self): ret = dummy_format_factory(['zero','one','two','four']) rel = dummy_format_factory(['one','two','three']) # args: TextOnlyEvaluator(retrieved, relevant) e = TextOnlyEvaluator(ret, rel) r = e.get_eval_results() self.assertAlmostEqual(r.precision, 0.5) self.assertAlmostEqual(r.recall, 0.6666, delta = 0.0001) self.assertAlmostEqual(r.f1_score, 0.5714, delta = 0.001)
def test_missmatch(self): ret = dummy_format_factory(['one','four']) rel = dummy_format_factory(['two','three']) # args: TextOnlyEvaluator(retrieved, relevant) e = TextOnlyEvaluator(ret, rel) r = e.get_eval_results() self.assertEqual(r.precision, 0) self.assertEqual(r.recall, 0) self.assertTrue(math.isinf(r.f1_score))
def single_evaluation(extractor_cls, results, dataset_type, dataset_name): logger.info('started evaluating extractor %s', extractor_cls.NAME) results.set_extractor(extractor_cls.SLUG) storage = LocalResultStorage(dataset_name, extractor_cls) loader = LocalDatasetLoader(dataset_name) for doc in loader: logger.debug('doc: %s', doc.id) format_clean = from_document_factory(doc, slug=dataset_type) try: result_string = storage.fetch_result(doc) except DataError: logger.info('no stored result for %s at %s extractor', doc.id, extractor_cls.NAME) continue else: format_result = extractor_cls.formatted_result(result_string) evaluator = TextOnlyEvaluator(retrieved=format_result, relevant=format_clean, id=doc.id) results.add_result(evaluator.get_eval_results())
def single_evaluation(extractor_cls, results, dataset_type, dataset_name): logger.info('started evaluating extractor %s', extractor_cls.NAME) results.set_extractor(extractor_cls.SLUG) storage = LocalResultStorage(dataset_name, extractor_cls) loader = LocalDatasetLoader(dataset_name) for doc in loader: logger.debug('doc: %s', doc.id) format_clean = from_document_factory(doc, slug = dataset_type) try: result_string = storage.fetch_result(doc) except DataError: logger.info('no stored result for %s at %s extractor', doc.id, extractor_cls.NAME) continue else: format_result = extractor_cls.formatted_result(result_string) evaluator = TextOnlyEvaluator( retrieved = format_result, relevant = format_clean, id = doc.id) results.add_result(evaluator.get_eval_results())