def set_matchspy_deref(enq): """Set a matchspy, and then drop the reference, to check that it doesn't get deleted too soon. """ spy = xapian.ValueCountMatchSpy(0) enq.add_matchspy(spy) del spy
def get_enquire(self): enquire = xapian.Enquire(self.database.database) # enquire.set_weighting_scheme(xapian.BoolWeight()) # enquire.set_docid_order(xapian.Enquire.DONT_CARE) # if weighting_scheme: # enquire.set_weighting_scheme(xapian.BM25Weight(*self.weighting_scheme)) enquire.set_query(self.query) spies = {} sort_by = [] warnings = [] if self.facets: for name in self.facets: self.dead or 'alive' # Raises DeadException when needed name = name.strip().lower() slot = get_slot(name) if slot: spy = xapian.ValueCountMatchSpy(slot) enquire.add_matchspy(spy) spies[name] = spy else: warnings.append("Ignored document value name (%r)" % name) if self.sort_by: for sort_field in self.sort_by: self.dead or 'alive' # Raises DeadException when needed if sort_field.startswith('-'): reverse = True sort_field = sort_field[1:] # Strip the '-' else: reverse = False sort_by.append((sort_field, reverse)) sorter = xapian.MultiValueKeyMaker() for name, reverse in sort_by: self.dead or 'alive' # Raises DeadException when needed name = name.strip().lower() slot = get_slot(name) if slot: sorter.add_value(slot, reverse) else: warnings.append("Ignored document value name (%r)" % name) enquire.set_sort_by_key_then_relevance(sorter, self.sort_by_reversed) if self.distinct: if self.distinct is True: field = 'ID' else: field = self.distinct enquire.set_collapse_key(get_slot(field)) self.spies = spies self.warnings = warnings return enquire
def main(args): keyword = args.get('keyword') title = args.get('title') rated_list = args.get('rated') year_range = args.get('year_range') show_facets = args.get('show_facets') with closing(_x.Database('./xdb/movies.db')) as x_db: # get a query parser qp = _query_parser(x_db) if keyword: x_query = qp.parse_query(keyword) else: x_query = _x.Query.MatchAll if title: title_query = qp.parse_query(title, 0, 'S') x_query = _joinq(_x.Query.OP_FILTER, x_query, title_query) if rated_list: rated_queries = [ _x.Query('XRATED:{}'.format(rated)) for rated in rated_list ] rated_query = _x.Query(_x.Query.OP_OR, rated_queries) x_query = _joinq(_x.Query.OP_FILTER, x_query, rated_query) if year_range: qp.add_valuerangeprocessor(_x.NumberValueRangeProcessor(SLOT_YEAR)) year_range_query = qp.parse_query(year_range) x_query = _joinq(_x.Query.OP_FILTER, x_query, year_range_query) # setup the enquire object to perform the query enq = _x.Enquire(x_db) print str(x_query) enq.set_query(x_query) # Set up a spy to inspect value slots on matched documents spy = _x.ValueCountMatchSpy(SLOT_RATED) enq.add_matchspy(spy) # iterate through the matched set and display the stored json dup for res in enq.get_mset(0, x_db.get_doccount(), None, None): print json.dumps(json.loads(res.document.get_data()), indent=4, sort_keys=True) # Fetch and display the spy values if show_facets: facets = {item.term: int(item.termfreq) for item in spy.values()} print "Facets:{}, Total:{} ".format(facets, sum(facets.values()))
def search(dbpath, querystring, offset=0, pagesize=10): # offset - defines starting point within result set # pagesize - defines number of records to retrieve # Open the database we're going to search. db = xapian.Database(dbpath) # Set up a QueryParser with a stemmer and suitable prefixes queryparser = xapian.QueryParser() queryparser.set_stemmer(xapian.Stem("en")) queryparser.set_stemming_strategy(queryparser.STEM_SOME) queryparser.add_prefix("title", "S") queryparser.add_prefix("description", "XD") # And parse the query query = queryparser.parse_query(querystring) # Use an Enquire object on the database to run the query enquire = xapian.Enquire(db) enquire.set_query(query) # And print out something about each match matches = [] ### Start of example code. # Set up a spy to inspect the MAKER value at slot 1 spy = xapian.ValueCountMatchSpy(1) enquire.add_matchspy(spy) for match in enquire.get_mset(offset, pagesize, 100): fields = json.loads(match.document.get_data().decode('utf8')) print( u"%(rank)i: #%(docid)3.3i %(title)s" % { 'rank': match.rank + 1, 'docid': match.docid, 'title': fields.get('TITLE', u''), }) matches.append(match.docid) # Fetch and display the spy values for facet in spy.values(): print("Facet: %(term)s; count: %(count)i" % { 'term': facet.term.decode('utf-8'), 'count': facet.termfreq }) # Finally, make sure we log the query and displayed results support.log_matches(querystring, offset, pagesize, matches)
def test_matchspy(): """Test use of matchspies. """ db = setup_database() query = xapian.Query(xapian.Query.OP_OR, "was", "it") enq = xapian.Enquire(db) enq.set_query(query) def set_matchspy_deref(enq): """Set a matchspy, and then drop the reference, to check that it doesn't get deleted too soon. """ spy = xapian.ValueCountMatchSpy(0) enq.add_matchspy(spy) del spy set_matchspy_deref(enq) mset = enq.get_mset(0, 10) expect(len(mset), 5) spy = xapian.ValueCountMatchSpy(0) enq.add_matchspy(spy) # Regression test for clear_matchspies() - used to always raise an # exception due to a copy and paste error in its definition. enq.clear_matchspies() mset = enq.get_mset(0, 10) expect([item for item in list(spy.values())], []) enq.add_matchspy(spy) mset = enq.get_mset(0, 10) expect(spy.get_total(), 5) expect([(item.term, item.termfreq) for item in list(spy.values())], [ (xapian.sortable_serialise(1.5), 1), (xapian.sortable_serialise(2), 2), ]) expect([(item.term, item.termfreq) for item in spy.top_values(10)], [ (xapian.sortable_serialise(2), 2), (xapian.sortable_serialise(1.5), 1), ])