Пример #1
0
    def search(self, query, limit=5000, sortedby=None, reverse=False):
        """Runs the query represented by the query object and returns a Results object.
        
        See the help for :meth:`~Searcher.find` for information on the parameters.
        
        :param query: a :class:`whoosh.query.Query` object.
        :rtype: :class:`Results`
        """

        ixreader = self.ixreader

        t = now()
        if sortedby is not None:
            if isinstance(sortedby, basestring):
                sorter = scoring.FieldSorter(sortedby)
            elif isinstance(sortedby, (list, tuple)):
                sorter = scoring.MultiFieldSorter(
                    [FieldSorter(fn) for fn in sortedby])
            elif isinstance(sortedby, Sorter):
                sorter = sortedby
            else:
                raise ValueError(
                    "sortedby argument must be a string, list, or Sorter (%r)"
                    % sortedby)

            scored_list = sorter.order(self, query.docs(self), reverse=reverse)
            scores = None
            docvector = BitVector(ixreader.doc_count_all(), source=scored_list)
            if len(scored_list) > limit:
                scored_list = list(scored_list)[:limit]
        else:
            # Sort by scores
            topdocs = TopDocs(limit, ixreader.doc_count_all())
            final = self.weighting.final
            topdocs.add_all((docnum, final(self, docnum, score))
                            for docnum, score in query.doc_scores(self))

            best = topdocs.best()
            if best:
                # topdocs.best() returns a list like
                # [(docnum, score), (docnum, score), ... ]
                # This unpacks that into two lists: docnums and scores
                scored_list, scores = zip(*topdocs.best())
            else:
                scored_list = []
                scores = []

            docvector = topdocs.docs
        t = now() - t

        return Results(self,
                       query,
                       scored_list,
                       docvector,
                       runtime=t,
                       scores=scores)
Пример #2
0
    def sort_query(self, query, sortedby, reverse=False):
        if isinstance(sortedby, basestring):
            sorter = self._field_sorter(sortedby)
        elif isinstance(sortedby, (list, tuple)):
            sorter = scoring.MultiFieldSorter([self._field_sorter(fname)
                                               for fname in sortedby])
        elif isinstance(sortedby, Sorter):
            sorter = sortedby
        else:
            raise ValueError("sortedby argument (%R) must be a string, list,"
                             " or Sorter" % sortedby)

        t = now()
        sorted_docs = list(sorter.order(self, query.docs(self), reverse=reverse))
        runtime = now() - t
        
        return Results(self, query, sorted_docs, None, runtime)
Пример #3
0
    def search(self,
               query,
               limit=5000,
               sortedby=None,
               reverse=False,
               minscore=0.0001):
        """Runs the query represented by the ``query`` object and returns a
        Results object.
        
        :param query: a :class:`whoosh.query.Query` object.
        :param limit: the maximum number of documents to score. If you're only
            interested in the top N documents, you can set limit=N to limit the
            scoring for a faster search.
        :param sortedby: if this parameter is not None, the results are sorted
            instead of scored. If this value is a string, the results are
            sorted by the field named in the string. If this value is a list or
            tuple, it is assumed to be a sequence of strings and the results
            are sorted by the fieldnames in the sequence. Otherwise 'sortedby'
            should be a scoring.Sorter object.
            
            The fields you want to sort by must be indexed.
            
            For example, to sort the results by the 'path' field::
            
                searcher.find(q, sortedby = "path")
                
            To sort the results by the 'path' field and then the 'category'
            field::
                
                searcher.find(q, sortedby = ("path", "category"))
                
            To use a sorting object::
            
                searcher.find(q, sortedby = scoring.FieldSorter("path", key=mykeyfn))
            
            Using a string or tuple simply instantiates a
            :class:`whoosh.scoring.FieldSorter` or
            :class:`whoosh.scoring.MultiFieldSorter` object for you. To get a
            custom sort order, instantiate your own ``FieldSorter`` with a
            ``key`` argument, or write a custom :class:`whoosh.scoring.Sorter`
            class.
            
            FieldSorter and MultiFieldSorter cache the document order, using 4
            bytes times the number of documents in the index, and taking time
            to cache. To increase performance, instantiate your own sorter and
            re-use it (but remember you need to recreate it if the index
            changes).
        
        :param reverse: if ``sortedby`` is not None, this reverses the
            direction of the sort.
        :param minscore: the minimum score to include in the results.
        :rtype: :class:`Results`
        """

        ixreader = self.ixreader

        t = now()
        if sortedby is not None:
            if isinstance(sortedby, basestring):
                sorter = scoring.FieldSorter(sortedby)
            elif isinstance(sortedby, (list, tuple)):
                sorter = scoring.MultiFieldSorter(
                    [FieldSorter(fn) for fn in sortedby])
            elif isinstance(sortedby, Sorter):
                sorter = sortedby
            else:
                raise ValueError(
                    "sortedby argument must be a string, list, or Sorter (%r)"
                    % sortedby)

            scored_list = sorter.order(self, query.docs(self), reverse=reverse)
            scores = None
            docvector = BitVector(ixreader.doc_count_all(), source=scored_list)
            if len(scored_list) > limit:
                scored_list = list(scored_list)[:limit]
        else:
            # Sort by scores
            topdocs = TopDocs(limit, ixreader.doc_count_all())
            final = self.weighting.final
            topdocs.add_all(((docnum, final(self, docnum, score))
                             for docnum, score in query.doc_scores(self)),
                            minscore)

            best = topdocs.best()
            if best:
                # topdocs.best() returns a list like
                # [(docnum, score), (docnum, score), ... ]
                # This unpacks that into two lists: docnums and scores
                scored_list, scores = zip(*topdocs.best())
            else:
                scored_list = []
                scores = []

            docvector = topdocs.docs
        t = now() - t

        return Results(self,
                       query,
                       scored_list,
                       docvector,
                       runtime=t,
                       scores=scores)
Пример #4
0
    def search(self,
               query,
               limit=5000,
               weighting=None,
               sortedby=None,
               reverse=False):
        """Runs the query represented by the query object and returns a Results object.
        
        :query: a query.Query object representing the search query. You can translate
            a query string into a query object with e.g. qparser.QueryParser.
        :limit: the maximum number of documents to score. If you're only interested in
            the top N documents, you can set limit=N to limit the scoring for a faster
            search.
        :weighting: if this parameter is not None, use this weighting object to score the
            results instead of the default.
        :sortedby: if this parameter is not None, the results are sorted instead of scored.
            If this value is a string, the results are sorted by the field named in the string.
            If this value is a list or tuple, it is assumed to be a sequence of strings and the
            results are sorted by the fieldnames in the sequence. Otherwise 'sortedby' should be
            a scoring.Sorter object.
            
            The fields you want to sort by must be indexed.
            
            For example, to sort the results by the 'path' field::
            
                searcher.search(q, sortedby = "path")
                
            To sort the results by the 'path' field and then the 'category' field::
                
                searcher.search(q, sortedby = ("path", "category"))
                
            To use a sorting object::
            
                searcher.search(q, sortedby = scoring.NullSorter)
        
        :reverse: if 'sortedby' is not None, this reverses the direction of the sort.
        """

        doc_reader = self.doc_reader

        t = time.time()
        if sortedby is not None:
            if isinstance(sortedby, basestring):
                sortedby = scoring.FieldSorter(sortedby)
            elif isinstance(sortedby, (list, tuple)):
                sortedby = scoring.MultiFieldSorter(sortedby)
            elif callable(sortedby):
                sortedby = sortedby()

            scored_list = sortedby.order(self,
                                         query.docs(self),
                                         reverse=reverse)
            docvector = BitVector(doc_reader.doc_count_all(),
                                  source=scored_list)
            if len(scored_list) > limit:
                scored_list = list(scored_list)[:limit]
        else:
            # Sort by scores
            topdocs = TopDocs(limit, doc_reader.doc_count_all())
            topdocs.add_all(
                query.doc_scores(self, weighting=weighting or self.weighting))
            scored_list = topdocs.best()
            docvector = topdocs.docs
        t = time.time() - t

        return Results(self, query, scored_list, docvector, runtime=t)