Exemplo n.º 1
0
def test_collect_matches2doc_ranker_driver_min_ranker():
    doc = create_document_to_score_same_depth_level()
    driver = SimpleCollectMatchesRankDriver()
    executor = MinRanker()
    driver.attach(executor=executor, pea=None)
    import sys
    min_value_30 = sys.maxsize
    min_value_20 = sys.maxsize
    for match in doc.matches:
        if match.parent_id == '30':
            if match.score.value < min_value_30:
                min_value_30 = match.score.value
        if match.parent_id == '20':
            if match.score.value < min_value_20:
                min_value_20 = match.score.value

    assert min_value_30 < min_value_20
    driver._traverse_apply([
        doc,
    ])
    assert len(doc.matches) == 2
    assert doc.matches[0].id == '30'
    assert doc.matches[0].score.value == pytest.approx(
        (1. / (1. + min_value_30)), 0.0000001)
    assert doc.matches[1].id == '20'
    assert doc.matches[1].score.value == pytest.approx(
        (1. / (1. + min_value_20)), 0.0000001)
    for match in doc.matches:
        # match score is computed w.r.t to doc.id
        assert match.score.ref_id == doc.id
def test_chunk2doc_ranker_driver_traverse_apply_larger_range():
    docs = [
        create_chunk_chunk_matches_to_score(),
    ]
    driver = SimpleChunk2DocRankDriver(traversal_paths=('cc', 'c'))
    executor = MinRanker()
    driver.attach(executor=executor, pea=None)
    driver._traverse_apply(docs)
    for doc in docs:
        assert len(doc.matches) == 1
        assert len(doc.chunks) == 1
        chunk = doc.chunks[0]
        assert len(chunk.matches) == 2
        min_granularity_2 = chunk.matches[0].score.value
        for idx, m in enumerate(chunk.matches):
            # the score should be 1 / (1 + id * 2)
            if m.score.value < min_granularity_2:
                min_granularity_2 = m.score.value
            assert m.score.value == pytest.approx(1. / (1 + float(m.id) * 2.),
                                                  0.0001)
            assert m.score.ref_id == 101
        match = doc.matches[0]
        assert match.score.ref_id == 100
        assert match.score.value == pytest.approx(1. / (1 + min_granularity_2),
                                                  0.0001)
Exemplo n.º 3
0
def test_chunk2doc_ranker_driver_traverse_apply():
    docs = [create_chunk_matches_to_score(), ]
    driver = SimpleChunk2DocRankDriver(recur_range=(0, 1))
    executor = MinRanker()
    driver.attach(executor=executor, pea=None)
    driver._traverse_apply(docs)
    for doc in docs:
        assert len(doc.matches) == 2
        for idx, m in enumerate(doc.matches):
            # the score should be 1 / (1 + id * 2)
            assert m.score.value == pytest.approx(1. / (1 + m.id * 2.), 0.0001)
Exemplo n.º 4
0
 def test_chunk2doc_ranker_driver_traverse_apply(self):
     docs = [
         create_chunk_matches_to_score(),
     ]
     driver = SimpleChunk2DocRankDriver(depth_range=(0, 1))
     executor = MinRanker()
     driver.attach(executor=executor, pea=None)
     driver._traverse_apply(docs)
     for doc in docs:
         assert len(doc.matches) == 2
         for idx, m in enumerate(doc.matches):
             # the score should be 1 / (1 + id * 2)
             self.assertAlmostEqual(m.score.value, 1. / (1 + m.id * 2.))
             assert m.level_depth == 0
Exemplo n.º 5
0
def test_chunk2doc_ranker_driver_min_ranker():
    doc = create_document_to_score()
    driver = SimpleChunk2DocRankDriver()
    executor = MinRanker()
    driver.attach(executor=executor, pea=None)
    driver._apply_all(doc.chunks, doc)
    assert len(doc.matches) == 4
    assert doc.matches[0].id == 40
    assert doc.matches[0].score.value == pytest.approx(1 / (1 + 4), 0.0001)
    assert doc.matches[1].id == 50
    assert doc.matches[1].score.value == pytest.approx(1 / (1 + 5), 0.0001)
    assert doc.matches[2].id == 60
    assert doc.matches[2].score.value == pytest.approx(1 / (1 + 6), 0.0001)
    assert doc.matches[3].id == 70
    assert doc.matches[3].score.value == pytest.approx(1 / (1 + 7), 0.0001)
    for match in doc.matches:
        # match score is computed w.r.t to doc.id
        assert match.score.ref_id == doc.id
Exemplo n.º 6
0
 def test_chunk2doc_ranker_driver_MinRanker(self):
     doc = create_document_to_score()
     driver = SimpleChunk2DocRankDriver()
     executor = MinRanker()
     driver.attach(executor=executor, pea=None)
     driver._apply_all(doc.chunks, doc)
     assert len(doc.matches) == 4
     assert doc.matches[0].id == 40
     self.assertAlmostEqual(doc.matches[0].score.value, 1 / (1 + 4))
     assert doc.matches[1].id == 50
     self.assertAlmostEqual(doc.matches[1].score.value, 1 / (1 + 5))
     assert doc.matches[2].id == 60
     self.assertAlmostEqual(doc.matches[2].score.value, 1 / (1 + 6))
     assert doc.matches[3].id == 70
     self.assertAlmostEqual(doc.matches[3].score.value, 1 / (1 + 7))
     for match in doc.matches:
         # match score is computed w.r.t to doc.id
         assert match.score.ref_id == doc.id