Пример #1
0
    def matcher(self, searcher, context=None):
        from whoosh.query import Term, SpanNear2

        fieldname = self.fieldname
        if fieldname not in searcher.schema:
            return matching.NullMatcher()

        field = searcher.schema[fieldname]
        if not field.format or not field.format.supports("positions"):
            raise qcore.QueryError("Phrase search: %r field has no positions"
                                   % self.fieldname)

        terms = []
        # Build a list of Term queries from the words in the phrase
        reader = searcher.reader()
        for word in self.words:
            word = field.to_bytes(word)
            if (fieldname, word) not in reader:
                # Shortcut the query if one of the words doesn't exist.
                return matching.NullMatcher()
            terms.append(Term(fieldname, word))

        # Create the equivalent SpanNear2 query from the terms
        q = SpanNear2(terms, slop=self.slop, ordered=True, mindist=1)
        # Get the matcher
        m = q.matcher(searcher, context)

        if self.boost != 1.0:
            m = matching.WrappingMatcher(m, boost=self.boost)
        return m
Пример #2
0
    def matcher(self, searcher, context=None):
        fieldname = self.fieldname
        text = self.text
        if fieldname not in searcher.schema:
            return matching.NullMatcher()

        field = searcher.schema[fieldname]
        try:
            text = field.to_bytes(text)
        except ValueError:
            return matching.NullMatcher()

        if (self.fieldname, text) in searcher.reader():
            if context is None:
                w = searcher.weighting
            else:
                w = context.weighting

            m = searcher.postings(self.fieldname, text, weighting=w)
            if self.minquality:
                m.set_min_quality(self.minquality)
            if self.boost != 1.0:
                m = matching.WrappingMatcher(m, boost=self.boost)
            return m
        else:
            return matching.NullMatcher()
Пример #3
0
    def matcher(self, searcher, context=None):
        fieldname = self.fieldname
        reader = searcher.reader()

        if fieldname not in searcher.schema:
            return matching.NullMatcher()
        field = searcher.schema[fieldname]

        words = [field.to_bytes(word) for word in self.words]

        # Shortcut the query if one of the words doesn't exist.
        for word in words:
            if (fieldname, word) not in reader:
                return matching.NullMatcher()

        if not field.format or not field.format.supports("positions"):
            raise qcore.QueryError("Phrase search: %r field has no positions" %
                                   self.fieldname)

        # Construct a tree of SpanNear queries representing the words in the
        # phrase and return its matcher
        from whoosh.query.spans import SpanNear

        q = SpanNear.phrase(fieldname, words, slop=self.slop)
        m = q.matcher(searcher, context)
        if self.boost != 1.0:
            m = matching.WrappingMatcher(m, boost=self.boost)
        return m
def test_empty_andnot():
    pos = matching.NullMatcher()
    neg = matching.NullMatcher()
    anm = matching.AndNotMatcher(pos, neg)
    assert not anm.is_active()
    assert not list(anm.all_ids())

    pos = matching.ListMatcher([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
    neg = matching.NullMatcher()
    ans = matching.AndNotMatcher(pos, neg)
    ids = list(ans.all_ids())
    assert ids == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
Пример #5
0
    def matcher(self, searcher, context=None):
        from whoosh.query import Or

        fieldname = self.field()
        constantscore = self.constantscore

        reader = searcher.reader()
        qs = [Term(fieldname, word) for word in self._btexts(reader)]
        if not qs:
            return matching.NullMatcher()

        if len(qs) == 1:
            # If there's only one term, just use it
            m = qs[0].matcher(searcher, context)
        else:
            if constantscore:
                # To tell the sub-query that score doesn't matter, set weighting
                # to None
                if context:
                    context = context.set(weighting=None)
                else:
                    from whoosh.searching import SearchContext
                    context = SearchContext(weighting=None)
            # Or the terms together
            m = Or(qs, boost=self.boost).matcher(searcher, context)
        return m
Пример #6
0
    def matcher(self, searcher, weighting=None):
        text = self.text
        if self.fieldname not in searcher.schema:
            return matching.NullMatcher()
        # If someone created a query object with a non-text term,e.g.
        # query.Term("printed", True), be nice and convert it to text
        if not isinstance(text, (bytes_type, text_type)):
            field = searcher.schema[self.fieldname]
            text = field.to_text(text)

        if (self.fieldname, text) in searcher.reader():
            m = searcher.postings(self.fieldname, text, weighting=weighting)
            if self.boost != 1.0:
                m = matching.WrappingMatcher(m, boost=self.boost)
            return m
        else:
            return matching.NullMatcher()
Пример #7
0
    def matcher(self, searcher, context=None):
        from whoosh import collectors

        # Get the subqueries
        subs = self.subqueries
        if not subs:
            return matching.NullMatcher()
        elif len(subs) == 1:
            return subs[0].matcher(searcher, context)

        # Sort the subqueries into "small" and "big" queries based on their
        # estimated size. This works best for term queries.
        reader = searcher.reader()
        smallqs = []
        bigqs = []
        for q in subs:
            size = q.estimate_size(reader)
            if size <= self.SPLIT_DOC_LIMIT:
                smallqs.append(q)
            else:
                bigqs.append(q)

        # Build a pre-scored matcher for the small queries
        minscore = 0
        smallmatcher = None
        if smallqs:
            smallmatcher = DefaultOr(smallqs).matcher(searcher, context)
            smallmatcher = matching.ArrayMatcher(smallmatcher, context.limit)
            minscore = smallmatcher.limit_quality()
        if bigqs:
            # Get a matcher for the big queries
            m = DefaultOr(bigqs).matcher(searcher, context)
            # Add the prescored matcher for the small queries
            if smallmatcher:
                m = matching.UnionMatcher(m, smallmatcher)
                # Set the minimum score based on the prescored matcher
                m.set_min_quality(minscore)
        elif smallmatcher:
            # If there are no big queries, just return the prescored matcher
            m = smallmatcher
        else:
            m = matching.NullMatcher()

        return m
Пример #8
0
    def matcher(self, searcher, context=None):
        # This method does a little sanity checking and then passes the info
        # down to the _matcher() method which subclasses must implement

        subs = self.subqueries
        if not subs:
            return matching.NullMatcher()

        if len(subs) == 1:
            m = subs[0].matcher(searcher, context)
        else:
            m = self._matcher(subs, searcher, context)
        return m
Пример #9
0
    def matcher(self, searcher, weighting=None):
        fieldname = self.fieldname
        constantscore = self.constantscore
        reader = searcher.reader()
        qs = [Term(fieldname, word) for word in self._words(reader)]
        if not qs:
            return matching.NullMatcher()

        if len(qs) == 1:
            # If there's only one term, just use it
            q = qs[0]
        elif constantscore or len(qs) > self.TOO_MANY_CLAUSES:
            # If there's so many clauses that an Or search would take forever,
            # trade memory for time and just find all the matching docs serve
            # them up as one or more ListMatchers
            fmt = searcher.schema[fieldname].format
            doc_to_values = defaultdict(list)
            doc_to_weights = defaultdict(float)
            for q in qs:
                m = q.matcher(searcher)
                while m.is_active():
                    docnum = m.id()
                    doc_to_values[docnum].append(m.value())
                    if not constantscore:
                        doc_to_weights[docnum] += m.weight()
                    m.next()

            docnums = sorted(doc_to_values.keys())
            # This is a list of lists of value strings -- ListMatcher will
            # actually do the work of combining multiple values if the user
            # asks for them
            values = [doc_to_values[docnum] for docnum in docnums]

            kwargs = {"values": values, "format": fmt}
            if constantscore:
                kwargs["all_weights"] = self.boost
            else:
                kwargs["weights"] = [
                    doc_to_weights[docnum] for docnum in docnums
                ]

            return matching.ListMatcher(docnums, **kwargs)
        else:
            # The default case: Or the terms together
            from whoosh.query import Or
            q = Or(qs)

        return q.matcher(searcher, weighting=weighting)
Пример #10
0
    def _matcher(self, matchercls, q_weight_fn, searcher, weighting=None,
                 **kwargs):
        # q_weight_fn is a function which is called on each query and returns a
        # "weight" value which is used to build a huffman-like matcher tree. If
        # q_weight_fn is None, an order-preserving binary tree is used instead.

        # Pull any queries inside a Not() out into their own list
        subs, nots = self._split_queries()

        if not subs:
            return matching.NullMatcher()

        # Create a matcher from the list of subqueries
        subms = [q.matcher(searcher, weighting=weighting) for q in subs]
        if len(subms) == 1:
            m = subms[0]
        elif q_weight_fn is None:
            m = make_binary_tree(matchercls, subms)
        else:
            w_subms = [(q_weight_fn(q), m) for q, m in zip(subs, subms)]
            m = make_weighted_tree(matchercls, w_subms)

        # If there were queries inside Not(), make a matcher for them and
        # wrap the matchers in an AndNotMatcher
        if nots:
            if len(nots) == 1:
                notm = nots[0].matcher(searcher)
            else:
                r = searcher.reader()
                notms = [(q.estimate_size(r), q.matcher(searcher))
                         for q in nots]
                notm = make_weighted_tree(matching.UnionMatcher, notms)

            if notm.is_active():
                m = matching.AndNotMatcher(m, notm)

        # If this query had a boost, add a wrapping matcher to apply the boost
        if self.boost != 1.0:
            m = matching.WrappingMatcher(m, self.boost)

        return m
Пример #11
0
 def matcher(self, searcher, context=None):
     return matching.NullMatcher()
Пример #12
0
 def matcher(self, searcher, weighting=None):
     return matching.NullMatcher()
def test_nullmatcher():
    nm = matching.NullMatcher()
    assert not nm.is_active()
    assert list(nm.all_ids()) == []
Пример #14
0
def test_nullmatcher():
    nm = matching.NullMatcher()
    assert not nm.is_active()
    assert_equal(list(nm.all_ids()), [])