def test_precision_evaluator(eval_at, expected): matches_ids = [0, 1, 2, 3, 4] desired_ids = [1, 0, 20, 30, 40] evaluator = PrecisionEvaluator(eval_at=eval_at) assert evaluator.evaluate(actual=matches_ids, desired=desired_ids) == expected np.testing.assert_almost_equal(evaluator.mean, expected)
def test_precision_evaluator_no_groundtruth(): matches_ids = [0, 1, 2, 3, 4] desired_ids = [] evaluator = PrecisionEvaluator(eval_at=2) assert evaluator.evaluate(actual=matches_ids, desired=desired_ids) == 0.0 assert evaluator._running_stats._n == 1 np.testing.assert_almost_equal(evaluator.mean, 0.0)
def test_precision_evaluator(eval_at, expected): matches_ids = [0, 1, 2, 3, 4] groundtruth_ids = [1, 0, 20, 30, 40] evaluator = PrecisionEvaluator(eval_at=eval_at) assert evaluator.evaluate(matches_ids=matches_ids, groundtruth_ids=groundtruth_ids) == expected assert evaluator.num_documents == 1 assert evaluator.sum == expected assert evaluator.avg == expected
def test_precision_evaluator_no_groundtruth(): matches_ids = [0, 1, 2, 3, 4] groundtruth_ids = [] evaluator = PrecisionEvaluator(eval_at=2) assert evaluator.evaluate(matches_ids=matches_ids, groundtruth_ids=groundtruth_ids) == 0.0 assert evaluator.num_documents == 1 assert evaluator.sum == 0.0 assert evaluator.avg == 0.0
def test_precision_evaluator_average(eval_at, expected_first): matches_ids = [[0, 1, 2, 3, 4], [-1, -1, -1, -1, -1], [-1, -1, -1, -1, -1]] desired_ids = [[1, 0, 20, 30, 40], [1, 0, 20, 30, 40], [1, 0, 20, 30, 40]] evaluator = PrecisionEvaluator(eval_at=eval_at) assert evaluator.evaluate(actual=matches_ids[0], desired=desired_ids[0]) == expected_first assert evaluator.evaluate(actual=matches_ids[1], desired=desired_ids[1]) == 0.0 assert evaluator.evaluate(actual=matches_ids[2], desired=desired_ids[2]) == 0.0 assert evaluator._running_stats._n == 3 np.testing.assert_almost_equal(evaluator.mean, expected_first / 3)
def test_evaluate_assert_doc_groundtruth_structure( simple_chunk_rank_evaluate_driver, eval_request_with_unmatching_struct): simple_chunk_rank_evaluate_driver.attach( executor=PrecisionEvaluator(eval_at=2), pea=None) simple_chunk_rank_evaluate_driver.eval_request = eval_request_with_unmatching_struct with pytest.raises(AssertionError): simple_chunk_rank_evaluate_driver()
def test_precision_evaluator_average(eval_at, expected_first): matches_ids = [[0, 1, 2, 3, 4], [-1, -1, -1, -1, -1], [-1, -1, -1, -1, -1]] groundtruth_ids = [[1, 0, 20, 30, 40], [1, 0, 20, 30, 40], [1, 0, 20, 30, 40]] evaluator = PrecisionEvaluator(eval_at=eval_at) assert evaluator.evaluate( matches_ids=matches_ids[0], groundtruth_ids=groundtruth_ids[0]) == expected_first assert evaluator.evaluate(matches_ids=matches_ids[1], groundtruth_ids=groundtruth_ids[1]) == 0.0 assert evaluator.evaluate(matches_ids=matches_ids[2], groundtruth_ids=groundtruth_ids[2]) == 0.0 assert evaluator.num_documents == 3 assert evaluator.sum == expected_first assert evaluator.avg == expected_first / 3
def test_ranking_evaluate_runningavg_driver(runningavg_rank_evaluate_driver, ground_truth_pairs): runningavg_rank_evaluate_driver.attach(executor=PrecisionEvaluator(eval_at=2), runtime=None) runningavg_rank_evaluate_driver._apply_all(ground_truth_pairs) for pair in ground_truth_pairs: doc = pair.doc assert len(doc.evaluations) == 1 assert doc.evaluations[0].op_name == 'PrecisionEvaluator@2' assert doc.evaluations[0].value == 1.0
def test_ranking_evaluate_driver(simple_rank_evaluate_driver, ground_truth_pairs): simple_rank_evaluate_driver.attach(executor=PrecisionEvaluator(eval_at=2), pea=None) simple_rank_evaluate_driver._apply_all(ground_truth_pairs) for pair in ground_truth_pairs: doc = pair.doc assert len(doc.evaluations) == 1 assert doc.evaluations[0].op_name == 'SimpleRankEvaluateDriver-Precision@2' assert doc.evaluations[0].value == 1.0
def test_ranking_evaluate_driver_matches_in_chunks(simple_chunk_rank_evaluate_driver, eval_request): # this test proves that we can evaluate matches at chunk level, # proving that the driver can traverse in a parallel way docs and groundtruth simple_chunk_rank_evaluate_driver.attach(executor=PrecisionEvaluator(eval_at=2), pea=None) simple_chunk_rank_evaluate_driver.eval_request = eval_request simple_chunk_rank_evaluate_driver() assert len(eval_request.docs) == len(eval_request.groundtruths) assert len(eval_request.docs) == 10 for doc in eval_request.docs: assert len(doc.evaluations) == 0 # evaluation done at chunk level assert len(doc.chunks) == 1 chunk = doc.chunks[0] assert len(chunk.evaluations) == 1 # evaluation done at chunk level assert chunk.evaluations[0].op_name == 'SimpleChunkRankEvaluateDriver-Precision@2' assert chunk.evaluations[0].value == 1.0