def search(dbpath, querystring, offset=0, pagesize=10): # offset - defines starting point within result set # pagesize - defines number of records to retrive db = xapian.Database(dbpath) queryparser = xapian.QueryParser() # choose a language queryparser.set_stemmer(xapian.Stem("en")) queryparser.set_stemming_strategy(queryparser.STEM_SOME) queryparser.add_prefix("title", "S") queryparser.add_prefix("description", "XD") query = queryparser.parse_query(querystring) enquire = xapian.Enquire(db) enquire.set_query(query) matches = [] ret = "" for match in enquire.get_mset(offset, pagesize): fields = json.loads(match.document.get_data()) tmp = u"%(rank)i: #%(docid)3.3i %(title)s" % { 'rank': match.rank + 1, 'docid': match.docid, 'title': fields.get('TITLE', u''), } ret += tmp ret += '\n' matches.append(match.docid) support.log_matches(querystring, offset, pagesize, matches) return ret ### END of function
def search(dbpath, querystring, offset=0, pagesize=10): # offset - defines starting point within result set # pagesize - defines number of records to retrieve # Open the database we're going to search. db = xapian.Database(dbpath) ### Start of example code. # Set up a QueryParser with a stemmer and suitable prefixes queryparser = xapian.QueryParser() queryparser.set_stemmer(xapian.Stem("en")) queryparser.add_prefix("title", "S") queryparser.add_prefix("description", "XD") queryparser.add_boolean_prefix("material", "XM") # And parse the query query = queryparser.parse_query(querystring) ### End of example code. # Use an Enquire object on the database to run the query enquire = xapian.Enquire(db) enquire.set_query(query) # And print out something about each match matches = [] for match in enquire.get_mset(offset, pagesize): fields = json.loads(match.document.get_data()) print u"%(rank)i: #%(docid)3.3i %(title)s" % { 'rank': match.rank + 1, 'docid': match.docid, 'title': fields.get('TITLE', u''), } matches.append(match.docid) # Finally, make sure we log the query and displayed results support.log_matches(querystring, offset, pagesize, matches)
def search(dbpath, querystring, offset=0, pagesize=10): # offset - defines starting point within result set # pagesize - defines number of records to retrieve # Open the database we're going to search. db = xapian.Database(dbpath) # Set up a QueryParser with a stemmer and suitable prefixes queryparser = xapian.QueryParser() queryparser.set_stemmer(xapian.Stem("en")) queryparser.set_stemming_strategy(queryparser.STEM_SOME) queryparser.add_prefix("title", "S") queryparser.add_prefix("description", "XD") # And parse the query query = queryparser.parse_query(querystring) # Use an Enquire object on the database to run the query enquire = xapian.Enquire(db) enquire.set_query(query) # And print out something about each match matches = [] for match in enquire.get_mset(offset, pagesize): fields = json.loads(match.document.get_data()) print u"%(rank)i: #%(docid)3.3i %(title)s" % { 'rank': match.rank + 1, 'docid': match.docid, 'title': fields.get('TITLE', u''), } matches.append(match.docid) # Finally, make sure we log the query and displayed results support.log_matches(querystring, offset, pagesize, matches)
def search(dbpath, querystring, offset=0, pagesize=10): # offset - defines starting point within result set # pagesize - defines number of records to retrieve # Open the database we're going to search. db = xapian.Database(dbpath) # Set up a QueryParser with a stemmer and suitable prefixes queryparser = xapian.QueryParser() queryparser.set_stemmer(xapian.Stem("en")) queryparser.set_stemming_strategy(queryparser.STEM_SOME) queryparser.add_prefix("title", "S") queryparser.add_prefix("description", "XD") # And parse the query query = queryparser.parse_query(querystring) # Use an Enquire object on the database to run the query enquire = xapian.Enquire(db) enquire.set_query(query) # Start of example code. class DistanceKeyMaker(xapian.KeyMaker): def __call__(self, doc): # we want to return a sortable string which represents # the distance from Washington, DC to the middle of this # state. value = doc.get_value(4).decode('utf8') x, y = map(float, value.split(',')) washington = (38.012, -77.037) return xapian.sortable_serialise( support.distance_between_coords((x, y), washington)) enquire.set_sort_by_key_then_relevance(DistanceKeyMaker(), False) # End of example code. # And print out something about each match matches = [] for match in enquire.get_mset(offset, pagesize): fields = json.loads(match.document.get_data().decode('utf8')) print( u"%(rank)i: #%(docid)3.3i %(name)s %(date)s\n Population %(pop)s" % { 'rank': match.rank + 1, 'docid': match.docid, 'name': fields.get('name', u''), 'date': support.format_date(fields.get('admitted', u'')), 'pop': support.format_numeral(int(fields.get('population', 0))), 'lat': fields.get('latitude', u''), 'lon': fields.get('longitude', u''), }) matches.append(match.docid) # Finally, make sure we log the query and displayed results support.log_matches(querystring, offset, pagesize, matches)
def search(dbpath, querystring, materials, offset=0, pagesize=10): # offset - defines starting point within result set # pagesize - defines number of records to retrieve # Open the database we're going to search. db = xapian.Database(dbpath) ### Start of example code. # Set up a QueryParser with a stemmer and suitable prefixes queryparser = xapian.QueryParser() queryparser.set_stemmer(xapian.Stem("en")) queryparser.set_stemming_strategy(queryparser.STEM_SOME) queryparser.add_prefix("title", "S") queryparser.add_prefix("description", "XD") # And parse the query query = queryparser.parse_query(querystring) if len(materials) > 0: # Filter the results to ones which contain at least one of the # materials. # Build a query for each material value material_queries = [ xapian.Query('XM' + material.lower()) for material in materials ] # Combine these queries with an OR operator material_query = xapian.Query(xapian.Query.OP_OR, material_queries) # Use the material query to filter the main query query = xapian.Query(xapian.Query.OP_FILTER, query, material_query) ### End of example code. # Use an Enquire object on the database to run the query enquire = xapian.Enquire(db) enquire.set_query(query) # And print out something about each match matches = [] for match in enquire.get_mset(offset, pagesize): fields = json.loads(match.document.get_data()) print( u"%(rank)i: #%(docid)3.3i %(title)s" % { 'rank': match.rank + 1, 'docid': match.docid, 'title': fields.get('TITLE', u''), }) matches.append(match.docid) # Finally, make sure we log the query and displayed results support.log_matches(querystring, offset, pagesize, matches)
def search(dbpath, querystring, offset=0, pagesize=10): # offset - defines starting point within result set # pagesize - defines number of records to retrieve # Open the database we're going to search. db = xapian.Database(dbpath) # Set up a QueryParser with a stemmer and suitable prefixes queryparser = xapian.QueryParser() queryparser.set_stemmer(xapian.Stem("en")) queryparser.set_stemming_strategy(queryparser.STEM_SOME) queryparser.add_prefix("title", "S") queryparser.add_prefix("description", "XD") # And parse the query query = queryparser.parse_query(querystring) # Use an Enquire object on the database to run the query enquire = xapian.Enquire(db) enquire.set_query(query) # Start of example code. class DistanceKeyMaker(xapian.KeyMaker): def __call__(self, doc): # we want to return a sortable string which represents # the distance from Washington, DC to the middle of this # state. coords = map(float, doc.get_value(4).split(",")) washington = (38.012, -77.037) return xapian.sortable_serialise(support.distance_between_coords(coords, washington)) enquire.set_sort_by_key_then_relevance(DistanceKeyMaker(), False) # End of example code. # And print out something about each match matches = [] for match in enquire.get_mset(offset, pagesize): fields = json.loads(match.document.get_data()) print( u"%(rank)i: #%(docid)3.3i %(name)s %(date)s\n Population %(pop)s" % { "rank": match.rank + 1, "docid": match.docid, "name": fields.get("name", u""), "date": support.format_date(fields.get("admitted", u"")), "pop": support.format_numeral(int(fields.get("population", 0))), "lat": fields.get("latitude", u""), "lon": fields.get("longitude", u""), } ) matches.append(match.docid) # Finally, make sure we log the query and displayed results support.log_matches(querystring, offset, pagesize, matches)
def search(dbpath, querystring, materials, offset=0, pagesize=10): # offset - defines starting point within result set # pagesize - defines number of records to retrieve # Open the database we're going to search. db = xapian.Database(dbpath) ### Start of example code. # Set up a QueryParser with a stemmer and suitable prefixes queryparser = xapian.QueryParser() queryparser.set_stemmer(xapian.Stem("en")) queryparser.set_stemming_strategy(queryparser.STEM_SOME) queryparser.add_prefix("title", "S") queryparser.add_prefix("description", "XD") # And parse the query query = queryparser.parse_query(querystring) if len(materials) > 0: # Filter the results to ones which contain at least one of the # materials. # Build a query for each material value material_queries = [ xapian.Query('XM' + material.lower()) for material in materials ] # Combine these queries with an OR operator material_query = xapian.Query(xapian.Query.OP_OR, material_queries) # Use the material query to filter the main query query = xapian.Query(xapian.Query.OP_FILTER, query, material_query) ### End of example code. # Use an Enquire object on the database to run the query enquire = xapian.Enquire(db) enquire.set_query(query) # And print out something about each match matches = [] for match in enquire.get_mset(offset, pagesize): fields = json.loads(match.document.get_data().decode('utf8')) print(u"%(rank)i: #%(docid)3.3i %(title)s" % { 'rank': match.rank + 1, 'docid': match.docid, 'title': fields.get('TITLE', u''), }) matches.append(match.docid) # Finally, make sure we log the query and displayed results support.log_matches(querystring, offset, pagesize, matches)
def search(dbpath, querystring, offset=0, pagesize=10): # offset - defines starting point within result set # pagesize - defines number of records to retrieve # Open the database we're going to search. db = xapian.Database(dbpath) # Set up a QueryParser with a stemmer and suitable prefixes queryparser = xapian.QueryParser() queryparser.set_stemmer(xapian.Stem("en")) queryparser.set_stemming_strategy(queryparser.STEM_SOME) queryparser.add_prefix("title", "S") queryparser.add_prefix("description", "XD") # And parse the query query = queryparser.parse_query(querystring) # Use an Enquire object on the database to run the query enquire = xapian.Enquire(db) enquire.set_query(query) # And print out something about each match matches = [] ### Start of example code. # Set up a spy to inspect the MAKER value at slot 1 spy = xapian.ValueCountMatchSpy(1) enquire.add_matchspy(spy) for match in enquire.get_mset(offset, pagesize, 100): fields = json.loads(match.document.get_data().decode('utf8')) print( u"%(rank)i: #%(docid)3.3i %(title)s" % { 'rank': match.rank + 1, 'docid': match.docid, 'title': fields.get('TITLE', u''), }) matches.append(match.docid) # Fetch and display the spy values for facet in spy.values(): print("Facet: %(term)s; count: %(count)i" % { 'term': facet.term.decode('utf-8'), 'count': facet.termfreq }) # Finally, make sure we log the query and displayed results support.log_matches(querystring, offset, pagesize, matches)
def search(dbpath, querystring, offset=0, pagesize=10): # offset - defines starting point within result set # pagesize - defines number of records to retrieve # Open the database we're going to search. db = xapian.Database(dbpath) # Set up a QueryParser with a stemmer and suitable prefixes queryparser = xapian.QueryParser() queryparser.set_stemmer(xapian.Stem("en")) queryparser.set_stemming_strategy(queryparser.STEM_SOME) queryparser.add_prefix("title", "S") queryparser.add_prefix("description", "XD") # And parse the query query = queryparser.parse_query(querystring) # Use an Enquire object on the database to run the query enquire = xapian.Enquire(db) enquire.set_query(query) # Start of example code. keymaker = xapian.MultiValueKeyMaker() keymaker.add_value(1, False) keymaker.add_value(3, True) enquire.set_sort_by_key_then_relevance(keymaker, False) # End of example code. # And print out something about each match matches = [] for match in enquire.get_mset(offset, pagesize): fields = json.loads(match.document.get_data().decode('utf8')) print( u"%(rank)i: #%(docid)3.3i %(name)s %(date)s\n Population %(pop)s" % { 'rank': match.rank + 1, 'docid': match.docid, 'name': fields.get('name', u''), 'date': support.format_date(fields.get('admitted', u'')), 'pop': support.format_numeral(int(fields.get('population', 0))), 'lat': fields.get('latitude', u''), 'lon': fields.get('longitude', u''), }) matches.append(match.docid) # Finally, make sure we log the query and displayed results support.log_matches(querystring, offset, pagesize, matches)
def search(dbpath, querystring, offset=0, pagesize=10): # offset - defines starting point within result set # pagesize - defines number of records to retrieve # Open the database we're going to search. db = xapian.Database(dbpath) # Set up a QueryParser with a stemmer and suitable prefixes queryparser = xapian.QueryParser() queryparser.set_stemmer(xapian.Stem("en")) queryparser.set_stemming_strategy(queryparser.STEM_SOME) queryparser.add_prefix("title", "S") queryparser.add_prefix("description", "XD") # And parse the query query = queryparser.parse_query(querystring) # Use an Enquire object on the database to run the query enquire = xapian.Enquire(db) enquire.set_query(query) # And print out something about each match matches = [] ### Start of example code. # Set up a spy to inspect the MAKER value at slot 1 spy = xapian.ValueCountMatchSpy(1) enquire.add_matchspy(spy) for match in enquire.get_mset(offset, pagesize, 100): fields = json.loads(match.document.get_data()) print(u"%(rank)i: #%(docid)3.3i %(title)s" % { 'rank': match.rank + 1, 'docid': match.docid, 'title': fields.get('TITLE', u''), }) matches.append(match.docid) # Fetch and display the spy values for facet in spy.values(): print("Facet: %(term)s; count: %(count)i" % { 'term' : facet.term, 'count' : facet.termfreq }) # Finally, make sure we log the query and displayed results support.log_matches(querystring, offset, pagesize, matches)
def search(dbpath, querystring, offset=0, pagesize=10): # offset - defines starting point within result set # pagesize - defines number of records to retrieve # Open the database we're going to search. db = xapian.Database(dbpath) # Set up a QueryParser with a stemmer and suitable prefixes queryparser = xapian.QueryParser() queryparser.set_stemmer(xapian.Stem("en")) queryparser.set_stemming_strategy(queryparser.STEM_SOME) queryparser.add_prefix("title", "S") queryparser.add_prefix("description", "XD") # And parse the query query = queryparser.parse_query(querystring) # Use an Enquire object on the database to run the query enquire = xapian.Enquire(db) enquire.set_query(query) # Start of example code. keymaker = xapian.MultiValueKeyMaker() keymaker.add_value(1, False) keymaker.add_value(3, True) enquire.set_sort_by_key_then_relevance(keymaker, False) # End of example code. # And print out something about each match matches = [] for index, match in enumerate(enquire.get_mset(offset, pagesize)): fields = json.loads(match.document.get_data()) print u"%(rank)i: #%(docid)3.3i %(name)s %(date)s\n Population %(pop)s" % { 'rank': offset + index + 1, 'docid': match.docid, 'name': fields.get('name', u''), 'date': support.format_date(fields.get('admitted', u'')), 'pop': support.format_numeral(fields.get('population', 0)), 'lat': fields.get('latitude', u''), 'lon': fields.get('longitude', u''), } matches.append(match.docid) # Finally, make sure we log the query and displayed results support.log_matches(querystring, offset, pagesize, matches)
def search(dbpath, querystring, offset=0, pagesize=10): # offset - defines starting point within result set # pagesize - defines number of records to retrieve # Open the database we're going to search. db = xapian.Database(dbpath) # Set up a QueryParser with a stemmer and suitable prefixes queryparser = xapian.QueryParser() queryparser.set_stemmer(xapian.Stem("en")) queryparser.set_stemming_strategy(queryparser.STEM_SOME) queryparser.add_prefix("title", "S") queryparser.add_prefix("description", "XD") # and add in range processors queryparser.add_rangeprocessor( xapian.NumberRangeProcessor(0, 'mm', xapian.RP_SUFFIX) ) queryparser.add_rangeprocessor( xapian.NumberRangeProcessor(1) ) # And parse the query query = queryparser.parse_query(querystring) # Use an Enquire object on the database to run the query enquire = xapian.Enquire(db) enquire.set_query(query) # And print out something about each match matches = [] for match in enquire.get_mset(offset, pagesize): fields = json.loads(match.document.get_data()) print(u"%(rank)i: #%(docid)3.3i (%(date)s) %(measurements)s\n %(title)s" % { 'rank': match.rank + 1, 'docid': match.docid, 'measurements': fields.get('MEASUREMENTS', u''), 'date': fields.get('DATE_MADE', u''), 'title': fields.get('TITLE', u''), }) matches.append(match.docid) # Finally, make sure we log the query and displayed results support.log_matches(querystring, offset, pagesize, matches)
def search(dbpath, querystring, offset=0, pagesize=10): # offset - defines starting point within result set # pagesize - defines number of records to retrieve # Open the database we're going to search. db = xapian.WritableDatabase(dbpath) # Start of adding synonyms db.add_synonym("time", "calendar") # End of adding synonyms # Set up a QueryParser with a stemmer and suitable prefixes queryparser = xapian.QueryParser() queryparser.set_stemmer(xapian.Stem("en")) queryparser.set_stemming_strategy(queryparser.STEM_SOME) queryparser.add_prefix("title", "S") queryparser.add_prefix("description", "XD") # Start of set database queryparser.set_database(db) # End of set database # And parse the query query = queryparser.parse_query(querystring, queryparser.FLAG_SYNONYM) # Use an Enquire object on the database to run the query enquire = xapian.Enquire(db) enquire.set_query(query) # And print out something about each match matches = [] for match in enquire.get_mset(offset, pagesize): fields = json.loads(match.document.get_data().decode('utf8')) print(u"%(rank)i: #%(docid)3.3i %(title)s" % { 'rank': match.rank + 1, 'docid': match.docid, 'title': fields.get('TITLE', u''), }) matches.append(match.docid) # Finally, make sure we log the query and displayed results support.log_matches(querystring, offset, pagesize, matches)
def search(dbpath, querystring, offset=0, pagesize=10): # offset - defines starting point within result set # pagesize - defines number of records to retrieve # Open the database we're going to search. db = xapian.Database(dbpath) # Set up a QueryParser with a stemmer and suitable prefixes queryparser = xapian.QueryParser() queryparser.set_stemmer(xapian.Stem("en")) queryparser.set_stemming_strategy(queryparser.STEM_SOME) queryparser.add_prefix("title", "S") queryparser.add_prefix("description", "XD") # and add in range processors # Start of custom RP code class PopulationRangeProcessor(xapian.RangeProcessor): def __init__(self, slot, low, high): super(PopulationRangeProcessor, self).__init__() self.nrp = xapian.NumberRangeProcessor(slot) self.low = low self.high = high def __call__(self, begin, end): if len(begin) > 0: try: _begin = int(begin) if _begin < self.low or _begin > self.high: raise ValueError() except: return xapian.Query(xapian.Query.OP_INVALID) if len(end) > 0: try: _end = int(end) if _end < self.low or _end > self.high: raise ValueError() except: return xapian.Query(xapian.Query.OP_INVALID) return self.nrp(begin, end) queryparser.add_rangeprocessor( PopulationRangeProcessor(3, 500000, 50000000) ) # End of custom RP code # Start of date example code queryparser.add_rangeprocessor( xapian.DateRangeProcessor(2, xapian.RP_DATE_PREFER_MDY, 1860) ) queryparser.add_rangeprocessor( xapian.NumberRangeProcessor(1) ) # End of date example code # And parse the query query = queryparser.parse_query(querystring) # Use an Enquire object on the database to run the query enquire = xapian.Enquire(db) enquire.set_query(query) # And print out something about each match matches = [] for match in enquire.get_mset(offset, pagesize): fields = json.loads(match.document.get_data().decode('utf8')) population = support.format_numeral(int(fields.get('population', 0))) date = support.format_date(fields.get('admitted')) print(u"""\ %(rank)i: #%(docid)3.3i %(name)s %(date)s Population %(pop)s""" % { 'rank': match.rank + 1, 'docid': match.docid, 'name': fields.get('name', u''), 'date': date, 'pop': population, 'lat': fields.get('latitude', u''), 'lon': fields.get('longitude', u''), }) matches.append(match.docid) # Finally, make sure we log the query and displayed results support.log_matches(querystring, offset, pagesize, matches)
def search(dbpath, querystring, offset=0, pagesize=10): # offset - defines starting point within result set # pagesize - defines number of records to retrieve # Open the database we're going to search. db = xapian.Database(dbpath) # Set up a QueryParser with a stemmer and suitable prefixes queryparser = xapian.QueryParser() queryparser.set_stemmer(xapian.Stem("en")) queryparser.set_stemming_strategy(queryparser.STEM_SOME) queryparser.add_prefix("title", "S") queryparser.add_prefix("description", "XD") # and add in range processors # Start of custom RP code class PopulationRangeProcessor(xapian.RangeProcessor): def __init__(self, slot, low, high): super(PopulationRangeProcessor, self).__init__() self.nrp = xapian.NumberRangeProcessor(slot) self.low = low self.high = high def __call__(self, begin, end): if len(begin) > 0: try: _begin = int(begin) if _begin < self.low or _begin > self.high: raise ValueError() except: return xapian.Query(xapian.Query.OP_INVALID) if len(end) > 0: try: _end = int(end) if _end < self.low or _end > self.high: raise ValueError() except: return xapian.Query(xapian.Query.OP_INVALID) return self.nrp(begin, end) queryparser.add_rangeprocessor( PopulationRangeProcessor(3, 500000, 50000000) ) # End of custom RP code # Start of date example code queryparser.add_rangeprocessor( xapian.DateRangeProcessor(2, xapian.RP_DATE_PREFER_MDY, 1860) ) queryparser.add_rangeprocessor( xapian.NumberRangeProcessor(1) ) # End of date example code # And parse the query query = queryparser.parse_query(querystring) # Use an Enquire object on the database to run the query enquire = xapian.Enquire(db) enquire.set_query(query) # And print out something about each match matches = [] for match in enquire.get_mset(offset, pagesize): fields = json.loads(match.document.get_data()) population = support.format_numeral(int(fields.get('population', 0))) date = support.format_date(fields.get('admitted')) print(u"""\ %(rank)i: #%(docid)3.3i %(name)s %(date)s Population %(pop)s""" % { 'rank': match.rank + 1, 'docid': match.docid, 'name': fields.get('name', u''), 'date': date, 'pop': population, 'lat': fields.get('latitude', u''), 'lon': fields.get('longitude', u''), }) matches.append(match.docid) # Finally, make sure we log the query and displayed results support.log_matches(querystring, offset, pagesize, matches)
def search(dbpath, querystring, offset=0, pagesize=10): # offset - defines starting point within result set # pagesize - defines number of records to retrieve # Open the database we're going to search. db = xapian.Database(dbpath) # Set up a QueryParser with a stemmer and suitable prefixes queryparser = xapian.QueryParser() queryparser.set_stemmer(xapian.Stem("en")) queryparser.add_prefix("title", "S") queryparser.add_prefix("description", "XD") # and add in value range processors # Start of custom VRP code class PopulationValueRangeProcessor(xapian.ValueRangeProcessor): def __init__(self, value, low, high): super(PopulationValueRangeProcessor, self).__init__() self.nvrp = xapian.NumberValueRangeProcessor(value) self.low = low self.high = high def __call__(self, begin, end): if begin != u"": try: _begin = int(begin) if _begin < self.low or _begin > self.high: raise ValueError() except: return (xapian.BAD_VALUENO, begin, end) if end != u"": try: _end = int(end) if _end < self.low or _end > self.high: raise ValueError() except: return (xapian.BAD_VALUENO, begin, end) return self.nvrp(begin, end) queryparser.add_valuerangeprocessor( PopulationValueRangeProcessor(3, 500000, 50000000) ) # End of custom VRP code # Start of date example code queryparser.add_valuerangeprocessor( xapian.DateValueRangeProcessor(2, True, 1860) ) queryparser.add_valuerangeprocessor( xapian.NumberValueRangeProcessor(1, '') ) # End of date example code # And parse the query query = queryparser.parse_query(querystring) # Use an Enquire object on the database to run the query enquire = xapian.Enquire(db) enquire.set_query(query) # And print out something about each match matches = [] for match in enquire.get_mset(offset, pagesize): fields = json.loads(match.document.get_data()) print u"%(rank)i: #%(docid)3.3i %(name)s %(date)s\n Population %(pop)s" % { 'rank': match.rank + 1, 'docid': match.docid, 'name': fields.get('name', u''), 'date': fields.get('admitted', u''), 'pop': fields.get('population', u''), 'lat': fields.get('latitude', u''), 'lon': fields.get('longitude', u''), } matches.append(match.docid) # Finally, make sure we log the query and displayed results support.log_matches(querystring, offset, pagesize, matches)