예제 #1
0
 def set_matchspy_deref(enq):
     """Set a matchspy, and then drop the reference, to check that it
     doesn't get deleted too soon.
     """
     spy = xapian.ValueCountMatchSpy(0)
     enq.add_matchspy(spy)
     del spy
예제 #2
0
파일: search.py 프로젝트: ra2003/pyXapiand
    def get_enquire(self):
        enquire = xapian.Enquire(self.database.database)
        # enquire.set_weighting_scheme(xapian.BoolWeight())
        # enquire.set_docid_order(xapian.Enquire.DONT_CARE)
        # if weighting_scheme:
        #     enquire.set_weighting_scheme(xapian.BM25Weight(*self.weighting_scheme))
        enquire.set_query(self.query)

        spies = {}
        sort_by = []
        warnings = []

        if self.facets:
            for name in self.facets:
                self.dead or 'alive'  # Raises DeadException when needed
                name = name.strip().lower()
                slot = get_slot(name)
                if slot:
                    spy = xapian.ValueCountMatchSpy(slot)
                    enquire.add_matchspy(spy)
                    spies[name] = spy
                else:
                    warnings.append("Ignored document value name (%r)" % name)

        if self.sort_by:
            for sort_field in self.sort_by:
                self.dead or 'alive'  # Raises DeadException when needed
                if sort_field.startswith('-'):
                    reverse = True
                    sort_field = sort_field[1:]  # Strip the '-'
                else:
                    reverse = False
                sort_by.append((sort_field, reverse))

            sorter = xapian.MultiValueKeyMaker()
            for name, reverse in sort_by:
                self.dead or 'alive'  # Raises DeadException when needed
                name = name.strip().lower()
                slot = get_slot(name)
                if slot:
                    sorter.add_value(slot, reverse)
                else:
                    warnings.append("Ignored document value name (%r)" % name)
            enquire.set_sort_by_key_then_relevance(sorter,
                                                   self.sort_by_reversed)

        if self.distinct:
            if self.distinct is True:
                field = 'ID'
            else:
                field = self.distinct
            enquire.set_collapse_key(get_slot(field))
        self.spies = spies
        self.warnings = warnings

        return enquire
예제 #3
0
파일: query.py 프로젝트: jingle3276/imdb250
def main(args):
    keyword = args.get('keyword')
    title = args.get('title')
    rated_list = args.get('rated')
    year_range = args.get('year_range')
    show_facets = args.get('show_facets')

    with closing(_x.Database('./xdb/movies.db')) as x_db:
        # get a query parser
        qp = _query_parser(x_db)

        if keyword:
            x_query = qp.parse_query(keyword)
        else:
            x_query = _x.Query.MatchAll

        if title:
            title_query = qp.parse_query(title, 0, 'S')
            x_query = _joinq(_x.Query.OP_FILTER, x_query, title_query)

        if rated_list:
            rated_queries = [
                _x.Query('XRATED:{}'.format(rated)) for rated in rated_list
            ]
            rated_query = _x.Query(_x.Query.OP_OR, rated_queries)
            x_query = _joinq(_x.Query.OP_FILTER, x_query, rated_query)

        if year_range:
            qp.add_valuerangeprocessor(_x.NumberValueRangeProcessor(SLOT_YEAR))
            year_range_query = qp.parse_query(year_range)
            x_query = _joinq(_x.Query.OP_FILTER, x_query, year_range_query)

        # setup the enquire object to perform the query
        enq = _x.Enquire(x_db)
        print str(x_query)
        enq.set_query(x_query)

        # Set up a spy to inspect value slots on matched documents
        spy = _x.ValueCountMatchSpy(SLOT_RATED)
        enq.add_matchspy(spy)

        # iterate through the matched set and display the stored json dup
        for res in enq.get_mset(0, x_db.get_doccount(), None, None):
            print json.dumps(json.loads(res.document.get_data()),
                             indent=4,
                             sort_keys=True)

        # Fetch and display the spy values
        if show_facets:
            facets = {item.term: int(item.termfreq) for item in spy.values()}
            print "Facets:{}, Total:{} ".format(facets, sum(facets.values()))
예제 #4
0
def search(dbpath, querystring, offset=0, pagesize=10):
    # offset - defines starting point within result set
    # pagesize - defines number of records to retrieve

    # Open the database we're going to search.
    db = xapian.Database(dbpath)

    # Set up a QueryParser with a stemmer and suitable prefixes
    queryparser = xapian.QueryParser()
    queryparser.set_stemmer(xapian.Stem("en"))
    queryparser.set_stemming_strategy(queryparser.STEM_SOME)
    queryparser.add_prefix("title", "S")
    queryparser.add_prefix("description", "XD")

    # And parse the query
    query = queryparser.parse_query(querystring)

    # Use an Enquire object on the database to run the query
    enquire = xapian.Enquire(db)
    enquire.set_query(query)

    # And print out something about each match
    matches = []

    ### Start of example code.
    # Set up a spy to inspect the MAKER value at slot 1
    spy = xapian.ValueCountMatchSpy(1)
    enquire.add_matchspy(spy)

    for match in enquire.get_mset(offset, pagesize, 100):
        fields = json.loads(match.document.get_data().decode('utf8'))
        print(
            u"%(rank)i: #%(docid)3.3i %(title)s" % {
                'rank': match.rank + 1,
                'docid': match.docid,
                'title': fields.get('TITLE', u''),
            })
        matches.append(match.docid)

    # Fetch and display the spy values
    for facet in spy.values():
        print("Facet: %(term)s; count: %(count)i" % {
            'term': facet.term.decode('utf-8'),
            'count': facet.termfreq
        })

    # Finally, make sure we log the query and displayed results
    support.log_matches(querystring, offset, pagesize, matches)
예제 #5
0
def test_matchspy():
    """Test use of matchspies.

    """
    db = setup_database()
    query = xapian.Query(xapian.Query.OP_OR, "was", "it")
    enq = xapian.Enquire(db)
    enq.set_query(query)

    def set_matchspy_deref(enq):
        """Set a matchspy, and then drop the reference, to check that it
        doesn't get deleted too soon.
        """
        spy = xapian.ValueCountMatchSpy(0)
        enq.add_matchspy(spy)
        del spy

    set_matchspy_deref(enq)
    mset = enq.get_mset(0, 10)
    expect(len(mset), 5)

    spy = xapian.ValueCountMatchSpy(0)
    enq.add_matchspy(spy)
    # Regression test for clear_matchspies() - used to always raise an
    # exception due to a copy and paste error in its definition.
    enq.clear_matchspies()
    mset = enq.get_mset(0, 10)
    expect([item for item in list(spy.values())], [])

    enq.add_matchspy(spy)
    mset = enq.get_mset(0, 10)
    expect(spy.get_total(), 5)
    expect([(item.term, item.termfreq) for item in list(spy.values())], [
        (xapian.sortable_serialise(1.5), 1),
        (xapian.sortable_serialise(2), 2),
    ])
    expect([(item.term, item.termfreq) for item in spy.top_values(10)], [
        (xapian.sortable_serialise(2), 2),
        (xapian.sortable_serialise(1.5), 1),
    ])