def __init__(self, ms, slop=1, ordered=True, mindist=1): self.ms = ms self.slop = slop self.ordered = ordered self.mindist = mindist isect = make_binary_tree(binary.IntersectionMatcher, ms) super(SpanNear2.SpanNear2Matcher, self).__init__(isect)
def __init__(self, ms, slop=1, ordered=True, mindist=1): self.ms = ms self.slop = slop self.ordered = ordered self.mindist = mindist isect = make_binary_tree( cylleneus.engine.matching.binary.IntersectionMatcher, ms) super(SpanWith2.SpanWith2Matcher, self).__init__(isect)
def word_graph(self, fieldname): from whoosh.support.dawg import UnionNode from whoosh.util import make_binary_tree if not self.has_word_graph(fieldname): raise Exception("No word graph for field %r" % fieldname) graphs = [r.word_graph(fieldname) for r in self.readers if r.has_word_graph(fieldname)] if len(graphs) == 1: return graphs[0] return make_binary_tree(UnionNode, graphs)
def word_graph(self, fieldname): from whoosh.automata.fst import UnionNode from whoosh.util import make_binary_tree if not self.has_word_graph(fieldname): raise Exception("No word graph for field %r" % fieldname) graphs = [r.word_graph(fieldname) for r in self.readers if r.has_word_graph(fieldname)] if len(graphs) == 0: raise KeyError("No readers have graph for %r" % fieldname) if len(graphs) == 1: return graphs[0] return make_binary_tree(UnionNode, graphs)
def word_graph(self, fieldname): from whoosh.automata.fst import UnionNode from whoosh.util import make_binary_tree if not self.has_word_graph(fieldname): raise Exception("No word graph for field %r" % fieldname) graphs = [ r.word_graph(fieldname) for r in self.readers if r.has_word_graph(fieldname) ] if len(graphs) == 0: raise KeyError("No readers have graph for %r" % fieldname) if len(graphs) == 1: return graphs[0] return make_binary_tree(UnionNode, graphs)
def test_random_union(): testcount = 100 rangelimits = (2, 10) clauselimits = (2, 10) vals = list(range(100)) for _ in xrange(testcount): target = set() matchers = [] for _ in xrange(randint(*clauselimits)): nums = sample(vals, randint(*rangelimits)) target = target.union(nums) matchers.append(matching.ListMatcher(sorted(nums))) target = sorted(target) um = make_binary_tree(matching.UnionMatcher, matchers) assert_equal(list(um.all_ids()), target)
def test_random_union(): testcount = 100 rangelimits = (2, 10) clauselimits = (2, 10) vals = list(range(100)) for _ in xrange(testcount): target = set() matchers = [] for _ in xrange(randint(*clauselimits)): nums = sample(vals, randint(*rangelimits)) target = target.union(nums) matchers.append(matching.ListMatcher(sorted(nums))) target = sorted(target) um = make_binary_tree(matching.UnionMatcher, matchers) assert list(um.all_ids()) == target
def phrase(cls, fieldname, words, slop=1, ordered=True): """Returns a tree of SpanNear queries to match a list of terms. This class method is a convenience for constructing a phrase query using a binary tree of SpanNear queries:: SpanNear.phrase("content", ["alfa", "bravo", "charlie", "delta"]) :param fieldname: the name of the field to search in. :param words: a sequence of token texts to search for. :param slop: the number of positions within which the terms must occur. Default is 1, meaning the terms must occur right next to each other. :param ordered: whether the terms must occur in order. Default is True. """ terms = [Term(fieldname, word) for word in words] return make_binary_tree(cls, terms, slop=slop, ordered=ordered)
def phrase(cls, fieldname, words, slop=1, ordered=True): """Returns a tree of SpanNear queries to match a list of terms. This class method is a convenience for constructing a phrase query using a binary tree of SpanNear queries:: SpanNear.phrase("content", ["alfa", "bravo", "charlie", "delta"]) :param fieldname: the name of the field to search in. :param words: a sequence of texts to search for. :param slop: the number of positions within which the terms must occur. Default is 1, meaning the terms must occur right next to each other. :param ordered: whether the terms must occur in order. Default is True. """ terms = [Term(fieldname, word) for word in words] return make_binary_tree(cls, terms, slop=slop, ordered=ordered)
def phrase(cls, fieldname, words, slop=1, ordered=True): """Returns a tree of SpanNear queries to match a list of terms. This class method is a convenience for constructing a phrase query using a binary tree of SpanNear queries. >>> SpanNear.phrase("f", [u"a", u"b", u"c", u"d"]) SpanNear(SpanNear(Term("f", u"a"), Term("f", u"b")), SpanNear(Term("f", u"c"), Term("f", u"d"))) :param fieldname: the name of the field to search in. :param words: a sequence of token texts to search for. :param slop: the number of positions within which the terms must occur. Default is 1, meaning the terms must occur right next to each other. :param ordered: whether the terms must occur in order. Default is True. """ terms = [Term(fieldname, word) for word in words] return make_binary_tree(cls, terms, slop=slop, ordered=ordered)
def _matcher(self, matchercls, q_weight_fn, searcher, weighting=None, **kwargs): # q_weight_fn is a function which is called on each query and returns a # "weight" value which is used to build a huffman-like matcher tree. If # q_weight_fn is None, an order-preserving binary tree is used instead. # Pull any queries inside a Not() out into their own list subs, nots = self._split_queries() if not subs: return matching.NullMatcher() # Create a matcher from the list of subqueries subms = [q.matcher(searcher, weighting=weighting) for q in subs] if len(subms) == 1: m = subms[0] elif q_weight_fn is None: m = make_binary_tree(matchercls, subms) else: w_subms = [(q_weight_fn(q), m) for q, m in zip(subs, subms)] m = make_weighted_tree(matchercls, w_subms) # If there were queries inside Not(), make a matcher for them and # wrap the matchers in an AndNotMatcher if nots: if len(nots) == 1: notm = nots[0].matcher(searcher) else: r = searcher.reader() notms = [(q.estimate_size(r), q.matcher(searcher)) for q in nots] notm = make_weighted_tree(matching.UnionMatcher, notms) if notm.is_active(): m = matching.AndNotMatcher(m, notm) # If this query had a boost, add a wrapping matcher to apply the boost if self.boost != 1.0: m = matching.WrappingMatcher(m, self.boost) return m
def _tree_matcher(self, subs, mcls, searcher, context, q_weight_fn, **kwargs): # q_weight_fn is a function which is called on each query and returns a # "weight" value which is used to build a huffman-like matcher tree. If # q_weight_fn is None, an order-preserving binary tree is used instead. # Create a matcher from the list of subqueries subms = [q.matcher(searcher, context) for q in subs] if len(subms) == 1: m = subms[0] elif q_weight_fn is None: m = make_binary_tree(mcls, subms, **kwargs) else: w_subms = [(q_weight_fn(q), m) for q, m in zip(subs, subms)] m = make_weighted_tree(mcls, w_subms, **kwargs) # If this query had a boost, add a wrapping matcher to apply the boost if self.boost != 1.0: m = matching.WrappingMatcher(m, self.boost) return m
def matcher(self, searcher, context=None): matchers = [q.matcher(searcher, context) for q in self.subqs] return make_binary_tree(SpanOr.SpanOrMatcher, matchers)
def matcher(self, searcher, weighting=None): matchers = [q.matcher(searcher, weighting=weighting) for q in self.subqs] return make_binary_tree(SpanOr.SpanOrMatcher, matchers)
def matcher(self, searcher, exclude_docs=None): matchers = [q.matcher(searcher, exclude_docs=exclude_docs) for q in self.subqs] return make_binary_tree(SpanOr.SpanOrMatcher, matchers)
def matcher(self, searcher, exclude_docs=None): matchers = [ q.matcher(searcher, exclude_docs=exclude_docs) for q in self.subqs ] return make_binary_tree(SpanOr.SpanOrMatcher, matchers)