def test_regular_or(): ix = get_index() with ix.searcher() as s: oq = Or([Term("text", "bravo"), Term("text", "alfa")]) m = oq.matcher(s) while m.is_active(): orig = s.stored_fields(m.id())["text"] for span in m.spans(): v = orig[span.start] assert v == "bravo" or v == "alfa" m.next()
def matcher(self, searcher, context=None): fieldname = self.fieldname constantscore = self.constantscore reader = searcher.reader() qs = [Term(fieldname, word) for word in self._btexts(reader)] if not qs: return matching.NullMatcher() if len(qs) == 1: # If there's only one term, just use it q = qs[0] elif constantscore or len(qs) > self.TOO_MANY_CLAUSES: # If there's so many clauses that an Or search would take forever, # trade memory for time and just find all the matching docs and # serve them as one ListMatcher fmt = searcher.schema[fieldname].format doc_to_values = defaultdict(list) doc_to_weights = defaultdict(float) for q in qs: m = q.matcher(searcher) while m.is_active(): docnum = m.id() doc_to_values[docnum].append(m.value()) if not constantscore: doc_to_weights[docnum] += m.weight() m.next() docnums = sorted(doc_to_values.keys()) # This is a list of lists of value strings -- ListMatcher will # actually do the work of combining multiple values if the user # asks for them values = [doc_to_values[docnum] for docnum in docnums] kwargs = {"values": values, "format": fmt} if constantscore: kwargs["all_weights"] = self.boost else: kwargs["weights"] = [doc_to_weights[docnum] for docnum in docnums] #return matching.ListMatcher(docnums, term=term, **kwargs) return matching.ListMatcher(docnums, **kwargs) else: # The default case: Or the terms together from whoosh.query import Or q = Or(qs) m = q.matcher(searcher, context) #m = matching.SingleTermMatcher(m, term) return m
def matcher(self, searcher, weighting=None): fieldname = self.fieldname constantscore = self.constantscore reader = searcher.reader() qs = [Term(fieldname, word) for word in self._words(reader)] if not qs: return matching.NullMatcher() if len(qs) == 1: # If there's only one term, just use it q = qs[0] elif constantscore or len(qs) > self.TOO_MANY_CLAUSES: # If there's so many clauses that an Or search would take forever, # trade memory for time and just find all the matching docs serve # them up as one or more ListMatchers fmt = searcher.schema[fieldname].format doc_to_values = defaultdict(list) doc_to_weights = defaultdict(float) for q in qs: m = q.matcher(searcher) while m.is_active(): docnum = m.id() doc_to_values[docnum].append(m.value()) if not constantscore: doc_to_weights[docnum] += m.weight() m.next() docnums = sorted(doc_to_values.keys()) # This is a list of lists of value strings -- ListMatcher will # actually do the work of combining multiple values if the user # asks for them values = [doc_to_values[docnum] for docnum in docnums] kwargs = {"values": values, "format": fmt} if constantscore: kwargs["all_weights"] = self.boost else: kwargs["weights"] = [ doc_to_weights[docnum] for docnum in docnums ] return matching.ListMatcher(docnums, **kwargs) else: # The default case: Or the terms together from whoosh.query import Or q = Or(qs) return q.matcher(searcher, weighting=weighting)