Пример #1
0
    def get_enquire(self):
        enquire = xapian.Enquire(self.database.database)
        # enquire.set_weighting_scheme(xapian.BoolWeight())
        # enquire.set_docid_order(xapian.Enquire.DONT_CARE)
        # if weighting_scheme:
        #     enquire.set_weighting_scheme(xapian.BM25Weight(*self.weighting_scheme))
        enquire.set_query(self.query)

        spies = {}
        sort_by = []
        warnings = []

        if self.facets:
            for name in self.facets:
                self.dead or 'alive'  # Raises DeadException when needed
                name = name.strip().lower()
                slot = get_slot(name)
                if slot:
                    spy = xapian.ValueCountMatchSpy(slot)
                    enquire.add_matchspy(spy)
                    spies[name] = spy
                else:
                    warnings.append("Ignored document value name (%r)" % name)

        if self.sort_by:
            for sort_field in self.sort_by:
                self.dead or 'alive'  # Raises DeadException when needed
                if sort_field.startswith('-'):
                    reverse = True
                    sort_field = sort_field[1:]  # Strip the '-'
                else:
                    reverse = False
                sort_by.append((sort_field, reverse))

            sorter = xapian.MultiValueKeyMaker()
            for name, reverse in sort_by:
                self.dead or 'alive'  # Raises DeadException when needed
                name = name.strip().lower()
                slot = get_slot(name)
                if slot:
                    sorter.add_value(slot, reverse)
                else:
                    warnings.append("Ignored document value name (%r)" % name)
            enquire.set_sort_by_key_then_relevance(sorter,
                                                   self.sort_by_reversed)

        if self.distinct:
            if self.distinct is True:
                field = 'ID'
            else:
                field = self.distinct
            enquire.set_collapse_key(get_slot(field))
        self.spies = spies
        self.warnings = warnings

        return enquire
Пример #2
0
    def _set_sort_by(self, enquire, sort_by):
        sorter = xapian.MultiValueKeyMaker()

        for sort_field in sort_by:
            if sort_field.startswith('-'):
                reverse = True
                sort_field = sort_field[1:]  # Strip the '-'
            else:
                reverse = False  # Reverse is inverted in Xapian -- http://trac.xapian.org/ticket/311
            sorter.add_value(self._value_column(sort_field), reverse)

        enquire.set_sort_by_key(sorter)
Пример #3
0
def search(dbpath, querystring, offset=0, pagesize=10):
    # offset - defines starting point within result set
    # pagesize - defines number of records to retrieve

    # Open the database we're going to search.
    db = xapian.Database(dbpath)

    # Set up a QueryParser with a stemmer and suitable prefixes
    queryparser = xapian.QueryParser()
    queryparser.set_stemmer(xapian.Stem("en"))
    queryparser.set_stemming_strategy(queryparser.STEM_SOME)
    queryparser.add_prefix("title", "S")
    queryparser.add_prefix("description", "XD")

    # And parse the query
    query = queryparser.parse_query(querystring)

    # Use an Enquire object on the database to run the query
    enquire = xapian.Enquire(db)
    enquire.set_query(query)
    # Start of example code.
    keymaker = xapian.MultiValueKeyMaker()
    keymaker.add_value(1, False)
    keymaker.add_value(3, True)
    enquire.set_sort_by_key_then_relevance(keymaker, False)
    # End of example code.

    # And print out something about each match
    matches = []
    for match in enquire.get_mset(offset, pagesize):
        fields = json.loads(match.document.get_data().decode('utf8'))
        print(
            u"%(rank)i: #%(docid)3.3i %(name)s %(date)s\n        Population %(pop)s"
            % {
                'rank': match.rank + 1,
                'docid': match.docid,
                'name': fields.get('name', u''),
                'date': support.format_date(fields.get('admitted', u'')),
                'pop': support.format_numeral(int(fields.get('population',
                                                             0))),
                'lat': fields.get('latitude', u''),
                'lon': fields.get('longitude', u''),
            })
        matches.append(match.docid)

    # Finally, make sure we log the query and displayed results
    support.log_matches(querystring, offset, pagesize, matches)
Пример #4
0
    def _enquire(self, request, query, order_by, group_by):
        enquire = xapian.Enquire(self._db)
        queries = []
        and_not_queries = []
        boolean_queries = []

        if query:
            query = self._extract_exact_search_terms(query, request)

        if query:
            parser = xapian.QueryParser()
            parser.set_database(self._db)
            for name, prop in self._props.items():
                if not prop.prefix:
                    continue
                if prop.boolean:
                    parser.add_boolean_prefix(name, prop.prefix)
                else:
                    parser.add_prefix(name, prop.prefix)
                parser.add_prefix('', prop.prefix)
                if prop.slot is not None and \
                        prop.typecast in [int, float, bool]:
                    value_range = xapian.NumberValueRangeProcessor(
                        prop.slot, name + ':')
                    parser.add_valuerangeprocessor(value_range)
            parser.add_prefix('', '')
            query = parser.parse_query(
                query, xapian.QueryParser.FLAG_PHRASE
                | xapian.QueryParser.FLAG_BOOLEAN
                | xapian.QueryParser.FLAG_LOVEHATE
                | xapian.QueryParser.FLAG_PARTIAL
                | xapian.QueryParser.FLAG_WILDCARD
                | xapian.QueryParser.FLAG_PURE_NOT, '')
            queries.append(query)

        for name, value in request.items():
            prop = self._props.get(name)
            if prop is None or not prop.prefix:
                continue

            sub_queries = []
            not_queries = []
            for needle in value if type(value) in (tuple, list) else [value]:
                if needle is None:
                    continue
                needle = prop.to_string(needle)[0]
                if needle.startswith('!'):
                    term = _term(prop.prefix, needle[1:])
                    not_queries.append(xapian.Query(term))
                elif needle.startswith('-'):
                    term = _term(prop.prefix, needle[1:])
                    and_not_queries.append(xapian.Query(term))
                else:
                    term = _term(prop.prefix, needle)
                    sub_queries.append(xapian.Query(term))

            if not_queries:
                not_query = xapian.Query(xapian.Query.OP_AND_NOT, [
                    xapian.Query(''),
                    xapian.Query(xapian.Query.OP_OR, not_queries)
                ])
                sub_queries.append(not_query)

            if sub_queries:
                if len(sub_queries) == 1:
                    query = sub_queries[0]
                else:
                    query = xapian.Query(xapian.Query.OP_OR, sub_queries)
                if prop.boolean:
                    boolean_queries.append(query)
                else:
                    queries.append(query)

        final = None
        if queries:
            final = xapian.Query(xapian.Query.OP_AND, queries)
        if boolean_queries:
            query = xapian.Query(xapian.Query.OP_AND, boolean_queries)
            if final is None:
                final = query
            else:
                final = xapian.Query(xapian.Query.OP_FILTER, [final, query])
        if final is None:
            final = xapian.Query('')
        for i in and_not_queries:
            final = xapian.Query(xapian.Query.OP_AND_NOT, [final, i])
        enquire.set_query(final)

        if hasattr(xapian, 'MultiValueKeyMaker'):
            sorter = xapian.MultiValueKeyMaker()
            if order_by:
                if order_by.startswith('+'):
                    reverse = False
                    order_by = order_by[1:]
                elif order_by.startswith('-'):
                    reverse = True
                    order_by = order_by[1:]
                else:
                    reverse = False
                prop = self._props.get(order_by)
                enforce(prop is not None and prop.slot is not None,
                        'Cannot sort using %r property of %r', order_by,
                        self.metadata.name)
                sorter.add_value(prop.slot, reverse)
            # Sort by ascending GUID to make order predictable all time
            sorter.add_value(0, False)
            enquire.set_sort_by_key(sorter, reverse=False)
        else:
            _logger.warning('In order to support sorting, '
                            'Xapian should be at least 1.2.0')

        if group_by:
            prop = self._props.get(group_by)
            enforce(prop is not None and prop.slot is not None,
                    'Cannot group by %r property of %r', group_by,
                    self.metadata.name)
            enquire.set_collapse_key(prop.slot)

        return enquire
Пример #5
0
    def _blocking_perform_search(self):
        # WARNING this call may run in a thread, so it's *not*
        #         allowed to touch gtk, otherwise hell breaks loose

        # performance only: this is only needed to avoid the
        # python __call__ overhead for each item if we can avoid it

        # use a unique instance of both enquire and xapian database
        # so concurrent queries don't result in an inconsistent database

        # an alternative would be to serialise queries
        enquire = xapian.Enquire(self.db.xapiandb)

        if self.filter and self.filter.required:
            xfilter = self.filter
        else:
            xfilter = None

        # go over the queries
        self.nr_apps, self.nr_pkgs = 0, 0
        _matches = self._matches
        match_docids = self.match_docids

        for q in self.search_query:
            LOG.debug("initial query: '%s'" % q)

            # for searches we may want to disable show/hide
            terms = [term for term in q]
            exact_pkgname_query = (len(terms) == 1 and
                                   terms[0].startswith("XP"))

            # see if we should do a app query and skip the pkg query
            # see bug #891613 and #1043159
            if exact_pkgname_query:
                with ExecutionTime("de-duplication"):
                    q_app = xapian.Query(terms[0].replace("XP", "AP"))
                    nr_apps, nr_pkgs = self._get_estimate_nr_apps_and_nr_pkgs(
                        enquire, q_app, xfilter)
                    if nr_apps == 1:
                        q = q_app
                        # this is a app query now
                        exact_pkgname_query = False

            with ExecutionTime("calculate nr_apps and nr_pkgs: "):
                nr_apps, nr_pkgs = self._get_estimate_nr_apps_and_nr_pkgs(
                    enquire, q, xfilter)
                self.nr_apps += nr_apps
                self.nr_pkgs += nr_pkgs

            # only show apps by default (unless in always visible mode)
            if self.nonapps_visible != NonAppVisibility.ALWAYS_VISIBLE:
                if not exact_pkgname_query:
                    q = xapian.Query(xapian.Query.OP_AND,
                                     xapian.Query("ATapplication"),
                                     q)

            LOG.debug("nearly completely filtered query: '%s'" % q)

            # filter out docs of pkgs of which there exists a doc of the app
            # FIXME: make this configurable again?
            enquire.set_query(xapian.Query(xapian.Query.OP_AND_NOT,
                                           q, xapian.Query("XD")))

            # sort results

            # cataloged time - what's new category
            if self.sortmode == SortMethods.BY_CATALOGED_TIME:
                sorter = xapian.MultiValueKeyMaker()
                if (self.db._axi_values and
                        "catalogedtime" in self.db._axi_values):
                    sorter.add_value(
                        self.db._axi_values["catalogedtime"])
                sorter.add_value(XapianValues.DB_CATALOGED_TIME)
                enquire.set_sort_by_key(sorter, reverse=True)
            elif self.sortmode == SortMethods.BY_TOP_RATED:
                from softwarecenter.backend.reviews import get_review_loader
                review_loader = get_review_loader(self.cache, self.db)
                sorter = TopRatedSorter(self.db, review_loader)
                enquire.set_sort_by_key(sorter, reverse=True)
            # search ranking - when searching
            elif self.sortmode == SortMethods.BY_SEARCH_RANKING:
                #enquire.set_sort_by_value(XapianValues.POPCON)
                # use the default enquire.set_sort_by_relevance()
                pass
            # display name - all categories / channels
            elif (self.db._axi_values and
                  "display_name" in self.db._axi_values):
                enquire.set_sort_by_key(LocaleSorter(self.db), reverse=False)
                # fallback to pkgname - if needed?
            # fallback to pkgname - if needed?
            else:
                enquire.set_sort_by_value_then_relevance(
                    XapianValues.PKGNAME, False)

            #~ try:
            if self.limit == 0:
                matches = enquire.get_mset(0, len(self.db), None, xfilter)
            else:
                matches = enquire.get_mset(0, self.limit, None, xfilter)
            LOG.debug("found ~%i matches" % matches.get_matches_estimated())
            #~ except:
                #~ logging.exception("get_mset")
                #~ matches = []

            # promote exact matches to a "app", this will make the
            # show/hide technical items work correctly
            if exact_pkgname_query and len(matches) == 1:
                self.nr_apps += 1
                self.nr_pkgs -= 2

            # add matches, but don't duplicate docids
            with ExecutionTime("append new matches to existing ones:"):
                for match in matches:
                    if not match.docid in match_docids:
                        _matches.append(match)
                        match_docids.add(match.docid)

        # if we have no results, try forcing pkgs to be displayed
        # if not NonAppVisibility.NEVER_VISIBLE is set
        if (not _matches and
            self.nonapps_visible not in (NonAppVisibility.ALWAYS_VISIBLE,
                                         NonAppVisibility.NEVER_VISIBLE)):
            self.nonapps_visible = NonAppVisibility.ALWAYS_VISIBLE
            self._blocking_perform_search()

        # wake up the UI if run in a search thread
        self._perform_search_complete = True