def test_union(): s1 = matching.ListMatcher([1, 2, 3, 4, 5, 6, 7, 8]) s2 = matching.ListMatcher([2, 4, 8, 10, 20, 30]) s3 = matching.ListMatcher([10, 100, 200]) target = [1, 2, 3, 4, 5, 6, 7, 8, 10, 20, 30, 100, 200] um = matching.UnionMatcher(s1, matching.UnionMatcher(s2, s3)) assert target == list(um.all_ids())
def test_simple_union(): lm1 = matching.ListMatcher([1, 4, 10, 20, 90]) lm2 = matching.ListMatcher([0, 4, 20]) um = matching.UnionMatcher(lm1, lm2) ls = [] while um.is_active(): ls.append((um.id(), um.score())) um.next() assert ls == [(0, 1.0), (1, 1.0), (4, 2.0), (10, 1.0), (20, 2.0), (90, 1.0)] lm1 = matching.ListMatcher([1, 4, 10, 20, 90]) lm2 = matching.ListMatcher([0, 4, 20]) um = matching.UnionMatcher(lm1, lm2) assert list(um.all_ids()) == [0, 1, 4, 10, 20, 90] lm1 = matching.ListMatcher([1, 4, 10, 20, 90]) lm2 = matching.ListMatcher([0, 4, 20]) um = matching.UnionMatcher(lm1, lm2) um.next() um.next() um = um.copy() ls = [] while um.is_active(): ls.append(um.id()) um.next() assert ls == [4, 10, 20, 90]
def create_matchers(): id1 = [i for i in range(1000)] id2 = [i + 1 for i in range(1000)] id3 = [i * 2 + i % 5 for i in range(1000)] id4 = [i * i for i in range(1000)] id5 = [1001 - i for i in range(1000)] id6 = [i * 3 // 2 for i in range(1000)] vl1 = [0.1 for i in range(1000)] vl2 = [0.2 for i in range(1000)] vl3 = [0.3 for i in range(1000)] vl4 = [0.4 for i in range(1000)] vl5 = [0.5 for i in range(1000)] vl6 = [0.6 for i in range(1000)] sc1 = scoring.WeightScorer(0.15) sc2 = scoring.WeightScorer(0.25) sc3 = scoring.WeightScorer(0.35) sc4 = scoring.WeightScorer(0.45) sc5 = scoring.WeightScorer(0.55) sc6 = scoring.WeightScorer(0.65) ls1 = matching.ListMatcher(id1, vl1, sc1) ls2 = matching.ListMatcher(id2, vl2, sc2) ls3 = matching.ListMatcher(id3, vl3, sc3) ls4 = matching.ListMatcher(id4, vl4, sc4) ls5 = matching.ListMatcher(id5, vl5, sc5) ls6 = matching.ListMatcher(id6, vl6, sc6) um1 = matching.UnionMatcher(ls1, ls2) um2 = matching.UnionMatcher(ls3, ls4) um3 = matching.UnionMatcher(ls5, ls6) inv = matching.InverseMatcher(um3, 15) mm = matching.MultiMatcher([um1, um2, inv], [0, 9, 18]) return mm
def test_union_scores(): s1 = matching.ListMatcher([1, 2, 3]) s2 = matching.ListMatcher([2, 4, 8]) s3 = matching.ListMatcher([2, 3, 8]) target = [(1, 1.0), (2, 3.0), (3, 2.0), (4, 1.0), (8, 2.0)] um = matching.UnionMatcher(s1, matching.UnionMatcher(s2, s3)) result = [] while um.is_active(): result.append((um.id(), um.score())) um.next() assert target == result
def test_replacements(): sc = scoring.WeightScorer(0.25) a = matching.ListMatcher([1, 2, 3], [0.25, 0.25, 0.25], scorer=sc) b = matching.ListMatcher([1, 2, 3], [0.25, 0.25, 0.25], scorer=sc) um = matching.UnionMatcher(a, b) a2 = a.replace(0.5) assert_equal(a2.__class__, matching.NullMatcherClass) um2 = um.replace(0.5) assert_equal(um2.__class__, matching.IntersectionMatcher) um2 = um.replace(0.6) assert_equal(um2.__class__, matching.NullMatcherClass) wm = matching.WrappingMatcher(um, boost=2.0) wm = wm.replace(0.5) assert_equal(wm.__class__, matching.WrappingMatcher) assert_equal(wm.boost, 2.0) assert_equal(wm.child.__class__, matching.IntersectionMatcher) ls1 = matching.ListMatcher([1, 2, 3], [0.1, 0.1, 0.1], scorer=scoring.WeightScorer(0.1)) ls2 = matching.ListMatcher([1, 2, 3], [0.2, 0.2, 0.2], scorer=scoring.WeightScorer(0.2)) ls3 = matching.ListMatcher([1, 2, 3], [0.3, 0.3, 0.3], scorer=scoring.WeightScorer(0.3)) mm = matching.MultiMatcher([ls1, ls2, ls3], [0, 4, 8]) mm = mm.replace(0.25) assert_equal(mm.current, 2) dm = matching.DisjunctionMaxMatcher(ls1, ls2) dm = dm.replace(0.15) assert dm is ls2
def matcher(self, searcher, context=None): from whoosh import collectors # Get the subqueries subs = self.subqueries if not subs: return matching.NullMatcher() elif len(subs) == 1: return subs[0].matcher(searcher, context) # Sort the subqueries into "small" and "big" queries based on their # estimated size. This works best for term queries. reader = searcher.reader() smallqs = [] bigqs = [] for q in subs: size = q.estimate_size(reader) if size <= self.SPLIT_DOC_LIMIT: smallqs.append(q) else: bigqs.append(q) # Build a pre-scored matcher for the small queries minscore = 0 smallmatcher = None if smallqs: smallmatcher = DefaultOr(smallqs).matcher(searcher, context) smallmatcher = matching.ArrayMatcher(smallmatcher, context.limit) minscore = smallmatcher.limit_quality() if bigqs: # Get a matcher for the big queries m = DefaultOr(bigqs).matcher(searcher, context) # Add the prescored matcher for the small queries if smallmatcher: m = matching.UnionMatcher(m, smallmatcher) # Set the minimum score based on the prescored matcher m.set_min_quality(minscore) elif smallmatcher: # If there are no big queries, just return the prescored matcher m = smallmatcher else: m = matching.NullMatcher() return m