def matcher(self, searcher, context=None): from whoosh.query import Term, SpanNear2 fieldname = self.fieldname if fieldname not in searcher.schema: return matching.NullMatcher() field = searcher.schema[fieldname] if not field.format or not field.format.supports("positions"): raise qcore.QueryError("Phrase search: %r field has no positions" % self.fieldname) terms = [] # Build a list of Term queries from the words in the phrase reader = searcher.reader() for word in self.words: word = field.to_bytes(word) if (fieldname, word) not in reader: # Shortcut the query if one of the words doesn't exist. return matching.NullMatcher() terms.append(Term(fieldname, word)) # Create the equivalent SpanNear2 query from the terms q = SpanNear2(terms, slop=self.slop, ordered=True, mindist=1) # Get the matcher m = q.matcher(searcher, context) if self.boost != 1.0: m = matching.WrappingMatcher(m, boost=self.boost) return m
def matcher(self, searcher, context=None): fieldname = self.fieldname text = self.text if fieldname not in searcher.schema: return matching.NullMatcher() field = searcher.schema[fieldname] try: text = field.to_bytes(text) except ValueError: return matching.NullMatcher() if (self.fieldname, text) in searcher.reader(): if context is None: w = searcher.weighting else: w = context.weighting m = searcher.postings(self.fieldname, text, weighting=w) if self.minquality: m.set_min_quality(self.minquality) if self.boost != 1.0: m = matching.WrappingMatcher(m, boost=self.boost) return m else: return matching.NullMatcher()
def matcher(self, searcher, context=None): fieldname = self.fieldname reader = searcher.reader() if fieldname not in searcher.schema: return matching.NullMatcher() field = searcher.schema[fieldname] words = [field.to_bytes(word) for word in self.words] # Shortcut the query if one of the words doesn't exist. for word in words: if (fieldname, word) not in reader: return matching.NullMatcher() if not field.format or not field.format.supports("positions"): raise qcore.QueryError("Phrase search: %r field has no positions" % self.fieldname) # Construct a tree of SpanNear queries representing the words in the # phrase and return its matcher from whoosh.query.spans import SpanNear q = SpanNear.phrase(fieldname, words, slop=self.slop) m = q.matcher(searcher, context) if self.boost != 1.0: m = matching.WrappingMatcher(m, boost=self.boost) return m
def test_empty_andnot(): pos = matching.NullMatcher() neg = matching.NullMatcher() anm = matching.AndNotMatcher(pos, neg) assert not anm.is_active() assert not list(anm.all_ids()) pos = matching.ListMatcher([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) neg = matching.NullMatcher() ans = matching.AndNotMatcher(pos, neg) ids = list(ans.all_ids()) assert ids == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
def matcher(self, searcher, context=None): from whoosh.query import Or fieldname = self.field() constantscore = self.constantscore reader = searcher.reader() qs = [Term(fieldname, word) for word in self._btexts(reader)] if not qs: return matching.NullMatcher() if len(qs) == 1: # If there's only one term, just use it m = qs[0].matcher(searcher, context) else: if constantscore: # To tell the sub-query that score doesn't matter, set weighting # to None if context: context = context.set(weighting=None) else: from whoosh.searching import SearchContext context = SearchContext(weighting=None) # Or the terms together m = Or(qs, boost=self.boost).matcher(searcher, context) return m
def matcher(self, searcher, weighting=None): text = self.text if self.fieldname not in searcher.schema: return matching.NullMatcher() # If someone created a query object with a non-text term,e.g. # query.Term("printed", True), be nice and convert it to text if not isinstance(text, (bytes_type, text_type)): field = searcher.schema[self.fieldname] text = field.to_text(text) if (self.fieldname, text) in searcher.reader(): m = searcher.postings(self.fieldname, text, weighting=weighting) if self.boost != 1.0: m = matching.WrappingMatcher(m, boost=self.boost) return m else: return matching.NullMatcher()
def matcher(self, searcher, context=None): from whoosh import collectors # Get the subqueries subs = self.subqueries if not subs: return matching.NullMatcher() elif len(subs) == 1: return subs[0].matcher(searcher, context) # Sort the subqueries into "small" and "big" queries based on their # estimated size. This works best for term queries. reader = searcher.reader() smallqs = [] bigqs = [] for q in subs: size = q.estimate_size(reader) if size <= self.SPLIT_DOC_LIMIT: smallqs.append(q) else: bigqs.append(q) # Build a pre-scored matcher for the small queries minscore = 0 smallmatcher = None if smallqs: smallmatcher = DefaultOr(smallqs).matcher(searcher, context) smallmatcher = matching.ArrayMatcher(smallmatcher, context.limit) minscore = smallmatcher.limit_quality() if bigqs: # Get a matcher for the big queries m = DefaultOr(bigqs).matcher(searcher, context) # Add the prescored matcher for the small queries if smallmatcher: m = matching.UnionMatcher(m, smallmatcher) # Set the minimum score based on the prescored matcher m.set_min_quality(minscore) elif smallmatcher: # If there are no big queries, just return the prescored matcher m = smallmatcher else: m = matching.NullMatcher() return m
def matcher(self, searcher, context=None): # This method does a little sanity checking and then passes the info # down to the _matcher() method which subclasses must implement subs = self.subqueries if not subs: return matching.NullMatcher() if len(subs) == 1: m = subs[0].matcher(searcher, context) else: m = self._matcher(subs, searcher, context) return m
def matcher(self, searcher, weighting=None): fieldname = self.fieldname constantscore = self.constantscore reader = searcher.reader() qs = [Term(fieldname, word) for word in self._words(reader)] if not qs: return matching.NullMatcher() if len(qs) == 1: # If there's only one term, just use it q = qs[0] elif constantscore or len(qs) > self.TOO_MANY_CLAUSES: # If there's so many clauses that an Or search would take forever, # trade memory for time and just find all the matching docs serve # them up as one or more ListMatchers fmt = searcher.schema[fieldname].format doc_to_values = defaultdict(list) doc_to_weights = defaultdict(float) for q in qs: m = q.matcher(searcher) while m.is_active(): docnum = m.id() doc_to_values[docnum].append(m.value()) if not constantscore: doc_to_weights[docnum] += m.weight() m.next() docnums = sorted(doc_to_values.keys()) # This is a list of lists of value strings -- ListMatcher will # actually do the work of combining multiple values if the user # asks for them values = [doc_to_values[docnum] for docnum in docnums] kwargs = {"values": values, "format": fmt} if constantscore: kwargs["all_weights"] = self.boost else: kwargs["weights"] = [ doc_to_weights[docnum] for docnum in docnums ] return matching.ListMatcher(docnums, **kwargs) else: # The default case: Or the terms together from whoosh.query import Or q = Or(qs) return q.matcher(searcher, weighting=weighting)
def _matcher(self, matchercls, q_weight_fn, searcher, weighting=None, **kwargs): # q_weight_fn is a function which is called on each query and returns a # "weight" value which is used to build a huffman-like matcher tree. If # q_weight_fn is None, an order-preserving binary tree is used instead. # Pull any queries inside a Not() out into their own list subs, nots = self._split_queries() if not subs: return matching.NullMatcher() # Create a matcher from the list of subqueries subms = [q.matcher(searcher, weighting=weighting) for q in subs] if len(subms) == 1: m = subms[0] elif q_weight_fn is None: m = make_binary_tree(matchercls, subms) else: w_subms = [(q_weight_fn(q), m) for q, m in zip(subs, subms)] m = make_weighted_tree(matchercls, w_subms) # If there were queries inside Not(), make a matcher for them and # wrap the matchers in an AndNotMatcher if nots: if len(nots) == 1: notm = nots[0].matcher(searcher) else: r = searcher.reader() notms = [(q.estimate_size(r), q.matcher(searcher)) for q in nots] notm = make_weighted_tree(matching.UnionMatcher, notms) if notm.is_active(): m = matching.AndNotMatcher(m, notm) # If this query had a boost, add a wrapping matcher to apply the boost if self.boost != 1.0: m = matching.WrappingMatcher(m, self.boost) return m
def matcher(self, searcher, context=None): return matching.NullMatcher()
def matcher(self, searcher, weighting=None): return matching.NullMatcher()
def test_nullmatcher(): nm = matching.NullMatcher() assert not nm.is_active() assert list(nm.all_ids()) == []
def test_nullmatcher(): nm = matching.NullMatcher() assert not nm.is_active() assert_equal(list(nm.all_ids()), [])