def test_union():
    s1 = matching.ListMatcher([1, 2, 3, 4, 5, 6, 7, 8])
    s2 = matching.ListMatcher([2, 4, 8, 10, 20, 30])
    s3 = matching.ListMatcher([10, 100, 200])
    target = [1, 2, 3, 4, 5, 6, 7, 8, 10, 20, 30, 100, 200]
    um = matching.UnionMatcher(s1, matching.UnionMatcher(s2, s3))
    assert target == list(um.all_ids())
def test_simple_union():
    lm1 = matching.ListMatcher([1, 4, 10, 20, 90])
    lm2 = matching.ListMatcher([0, 4, 20])
    um = matching.UnionMatcher(lm1, lm2)
    ls = []
    while um.is_active():
        ls.append((um.id(), um.score()))
        um.next()
    assert ls == [(0, 1.0), (1, 1.0), (4, 2.0), (10, 1.0), (20, 2.0), (90, 1.0)]

    lm1 = matching.ListMatcher([1, 4, 10, 20, 90])
    lm2 = matching.ListMatcher([0, 4, 20])
    um = matching.UnionMatcher(lm1, lm2)
    assert list(um.all_ids()) == [0, 1, 4, 10, 20, 90]

    lm1 = matching.ListMatcher([1, 4, 10, 20, 90])
    lm2 = matching.ListMatcher([0, 4, 20])
    um = matching.UnionMatcher(lm1, lm2)
    um.next()
    um.next()
    um = um.copy()
    ls = []
    while um.is_active():
        ls.append(um.id())
        um.next()
    assert ls == [4, 10, 20, 90]
示例#3
0
def create_matchers():
    id1 = [i for i in range(1000)]
    id2 = [i + 1 for i in range(1000)]
    id3 = [i * 2 + i % 5 for i in range(1000)]
    id4 = [i * i for i in range(1000)]
    id5 = [1001 - i for i in range(1000)]
    id6 = [i * 3 // 2 for i in range(1000)]
    vl1 = [0.1 for i in range(1000)]
    vl2 = [0.2 for i in range(1000)]
    vl3 = [0.3 for i in range(1000)]
    vl4 = [0.4 for i in range(1000)]
    vl5 = [0.5 for i in range(1000)]
    vl6 = [0.6 for i in range(1000)]
    sc1 = scoring.WeightScorer(0.15)
    sc2 = scoring.WeightScorer(0.25)
    sc3 = scoring.WeightScorer(0.35)
    sc4 = scoring.WeightScorer(0.45)
    sc5 = scoring.WeightScorer(0.55)
    sc6 = scoring.WeightScorer(0.65)
    ls1 = matching.ListMatcher(id1, vl1, sc1)
    ls2 = matching.ListMatcher(id2, vl2, sc2)
    ls3 = matching.ListMatcher(id3, vl3, sc3)
    ls4 = matching.ListMatcher(id4, vl4, sc4)
    ls5 = matching.ListMatcher(id5, vl5, sc5)
    ls6 = matching.ListMatcher(id6, vl6, sc6)
    um1 = matching.UnionMatcher(ls1, ls2)
    um2 = matching.UnionMatcher(ls3, ls4)
    um3 = matching.UnionMatcher(ls5, ls6)
    inv = matching.InverseMatcher(um3, 15)
    mm = matching.MultiMatcher([um1, um2, inv], [0, 9, 18])
    return mm
def test_union_scores():
    s1 = matching.ListMatcher([1, 2, 3])
    s2 = matching.ListMatcher([2, 4, 8])
    s3 = matching.ListMatcher([2, 3, 8])
    target = [(1, 1.0), (2, 3.0), (3, 2.0), (4, 1.0), (8, 2.0)]
    um = matching.UnionMatcher(s1, matching.UnionMatcher(s2, s3))
    result = []
    while um.is_active():
        result.append((um.id(), um.score()))
        um.next()
    assert target == result
示例#5
0
def test_replacements():
    sc = scoring.WeightScorer(0.25)
    a = matching.ListMatcher([1, 2, 3], [0.25, 0.25, 0.25], scorer=sc)
    b = matching.ListMatcher([1, 2, 3], [0.25, 0.25, 0.25], scorer=sc)
    um = matching.UnionMatcher(a, b)

    a2 = a.replace(0.5)
    assert_equal(a2.__class__, matching.NullMatcherClass)

    um2 = um.replace(0.5)
    assert_equal(um2.__class__, matching.IntersectionMatcher)
    um2 = um.replace(0.6)
    assert_equal(um2.__class__, matching.NullMatcherClass)

    wm = matching.WrappingMatcher(um, boost=2.0)
    wm = wm.replace(0.5)
    assert_equal(wm.__class__, matching.WrappingMatcher)
    assert_equal(wm.boost, 2.0)
    assert_equal(wm.child.__class__, matching.IntersectionMatcher)

    ls1 = matching.ListMatcher([1, 2, 3], [0.1, 0.1, 0.1],
                               scorer=scoring.WeightScorer(0.1))
    ls2 = matching.ListMatcher([1, 2, 3], [0.2, 0.2, 0.2],
                               scorer=scoring.WeightScorer(0.2))
    ls3 = matching.ListMatcher([1, 2, 3], [0.3, 0.3, 0.3],
                               scorer=scoring.WeightScorer(0.3))
    mm = matching.MultiMatcher([ls1, ls2, ls3], [0, 4, 8])
    mm = mm.replace(0.25)
    assert_equal(mm.current, 2)

    dm = matching.DisjunctionMaxMatcher(ls1, ls2)
    dm = dm.replace(0.15)
    assert dm is ls2
示例#6
0
    def matcher(self, searcher, context=None):
        from whoosh import collectors

        # Get the subqueries
        subs = self.subqueries
        if not subs:
            return matching.NullMatcher()
        elif len(subs) == 1:
            return subs[0].matcher(searcher, context)

        # Sort the subqueries into "small" and "big" queries based on their
        # estimated size. This works best for term queries.
        reader = searcher.reader()
        smallqs = []
        bigqs = []
        for q in subs:
            size = q.estimate_size(reader)
            if size <= self.SPLIT_DOC_LIMIT:
                smallqs.append(q)
            else:
                bigqs.append(q)

        # Build a pre-scored matcher for the small queries
        minscore = 0
        smallmatcher = None
        if smallqs:
            smallmatcher = DefaultOr(smallqs).matcher(searcher, context)
            smallmatcher = matching.ArrayMatcher(smallmatcher, context.limit)
            minscore = smallmatcher.limit_quality()
        if bigqs:
            # Get a matcher for the big queries
            m = DefaultOr(bigqs).matcher(searcher, context)
            # Add the prescored matcher for the small queries
            if smallmatcher:
                m = matching.UnionMatcher(m, smallmatcher)
                # Set the minimum score based on the prescored matcher
                m.set_min_quality(minscore)
        elif smallmatcher:
            # If there are no big queries, just return the prescored matcher
            m = smallmatcher
        else:
            m = matching.NullMatcher()

        return m