def count(self, query): """Return total number of matching documents in index""" query = unicode(query) # Must be unicode ix = whoosh_open_dir_32_or_64(self.index_dir) with ix.searcher() as searcher: query = QueryParser("title", ix.schema).parse(query) results = searcher.search(query) n = len(results) ix.close() return n
def search(humanReadableId): query = request.args.get('q', '').strip() pagination = None if query: index_base_dir = config().get_path("ZIM", "wikipedia_index_dir") index_dir = os.path.join(index_base_dir, humanReadableId) page = int(request.args.get('page', 1)) # Load index so we can query it for which fields exist ix = whoosh_open_dir_32_or_64(index_dir) # Set a higher value for the title field so it is weighted more weighting = scoring.BM25F(title_B=1.0) # Sort pages with "Image:" in their title after # regular articles def image_pages_last(searcher, docnum): fields = searcher.stored_fields(docnum) if fields['title'].find("Image:") == 0: return 1 else: return 0 # Support older whoosh indexes that do not have a reverse_links field if 'reverse_links' in ix.schema.names(): sortedby = sorting.MultiFacet([ sorting.FunctionFacet(image_pages_last), sorting.ScoreFacet(), sorting.FieldFacet("reverse_links", reverse=True), ]) else: sortedby = sorting.MultiFacet([ sorting.FunctionFacet(image_pages_last), sorting.ScoreFacet(), ]) (pagination, suggestion) = paginated_search(ix, ["title", "content"], query, page, weighting=weighting, sort_column=sortedby) else: flash(_('Please input keyword(s)'), 'error') return render_template('zim/search.html', humanReadableId=humanReadableId, pagination=pagination, suggestion=suggestion, keywords=query, endpoint_desc=EndPointDescription( 'zim_views.search', {'humanReadableId': humanReadableId}))
def search(): query = request.args.get('q', '').strip() pagination = None if query: index_dir = config().get_path('GUTENBERG', 'index_dir') page = int(request.args.get('page', 1)) ix = whoosh_open_dir_32_or_64(index_dir) (pagination, suggestion) = paginated_search(ix, DEFAULT_SEARCH_COLUMNS, query, page, sort_column='creator') else: flash(_('Please input keyword(s)'), 'error') #print pagination.items return render_template('gutenberg/search.html', pagination=pagination, keywords=query, suggestion=suggestion, fn_author_to_query=author_to_query, endpoint_desc=EndPointDescription('gutenberg.search', None), files_exist=files_exist)
def search(self, query, page=1, pagelen=20): """Return a sorted list of results. pagelen specifies the number of hits per page. page specifies the page of results to return (first page is 1) Set pagelen = None or 0 to retrieve all results. """ query = unicode(query) # Must be unicode ix = whoosh_open_dir_32_or_64(self.index_dir) with ix.searcher() as searcher: query = QueryParser("title", ix.schema).parse(query) if pagelen is not None and pagelen != 0: try: results = searcher.search_page(query, page, pagelen=pagelen, sortedby="score", reverse=True) except ValueError, e: # Invalid page number results = [] else:
def search(self, query, page=1, pagelen=20): """Return a sorted list of results. pagelen specifies the number of hits per page. page specifies the page of results to return (first page is 1) Set pagelen = None or 0 to retrieve all results. """ query = unicode(query) # Must be unicode population_sort_facet = sorting.FieldFacet("population", reverse=True) ix = whoosh_open_dir_32_or_64(self.index_dir) with ix.searcher() as searcher: # query = QueryParser("ngram_name", ix.schema).parse(query) mparser = MultifieldParser(["ngram_name", "admin1_code", "country_code"], schema=ix.schema) query = mparser.parse(query) if pagelen is not None and pagelen != 0: try: results = searcher.search_page(query, page, pagelen=pagelen) except ValueError, e: # Invalid page number results = [] else:
def search(humanReadableId): query = request.args.get('q', '').strip() pagination = None if query: index_base_dir = config().get_path("ZIM", "wikipedia_index_dir") index_dir = os.path.join(index_base_dir, humanReadableId) page = int(request.args.get('page', 1)) # Load index so we can query it for which fields exist ix = whoosh_open_dir_32_or_64(index_dir) # Set a higher value for the title field so it is weighted more weighting = scoring.BM25F(title_B=1.0) # Sort pages with "Image:" in their title after # regular articles def image_pages_last(searcher, docnum): fields = searcher.stored_fields(docnum) if fields['title'].find("Image:") == 0: return 1; else: return 0; # Support older whoosh indexes that do not have a reverse_links field if 'reverse_links' in ix.schema.names(): sortedby = sorting.MultiFacet([ sorting.FunctionFacet(image_pages_last), sorting.ScoreFacet(), sorting.FieldFacet("reverse_links", reverse=True), ]) else: sortedby = sorting.MultiFacet([ sorting.FunctionFacet(image_pages_last), sorting.ScoreFacet(), ]) (pagination, suggestion) = paginated_search(ix, ["title", "content"], query, page, weighting=weighting, sort_column=sortedby) else: flash(_('Please input keyword(s)'), 'error') return render_template('zim/search.html', humanReadableId=humanReadableId, pagination=pagination, suggestion=suggestion, keywords=query, endpoint_desc=EndPointDescription('zim_views.search', {'humanReadableId':humanReadableId}))
def paginated_search(index_dir, search_columns, query_text, page=1, pagelen=20, sort_column=None, weighting=scoring.BM25F): """ Return a tuple consisting of an object that emulates an SQLAlchemy pagination object and corrected query suggestion pagelen specifies number of hits per page page specifies page of results (first page is 1) """ query_text = unicode(query_text) # Must be unicode ix = whoosh_open_dir_32_or_64(index_dir) with ix.searcher(weighting=weighting) as searcher: query = MultifieldParser(search_columns, ix.schema).parse(query_text) try: # search_page returns whoosh.searching.ResultsPage results = searcher.search_page(query, page, pagelen=pagelen, sortedby=sort_column) total = results.total except ValueError: # Invalid page number results = [] total = 0 paginate = pagination_helper.Pagination(page, pagelen, total, [dict(r.items()) for r in results]) corrections = deduplicate_corrections(get_query_corrections(searcher, query, query_text)) # list of Corrector objects #hf = whoosh.highlight.HtmlFormatter(classname="change") #html = corrections.format_string(hf) return (paginate, [c.string for c in corrections])
def search(self, query, page=1, pagelen=20): """Return a sorted list of results. pagelen specifies the number of hits per page. page specifies the page of results to return (first page is 1) Set pagelen = None or 0 to retrieve all results. """ query = unicode(query) # Must be unicode population_sort_facet = sorting.FieldFacet("population", reverse=True) ix = whoosh_open_dir_32_or_64(self.index_dir) with ix.searcher() as searcher: # query = QueryParser("ngram_name", ix.schema).parse(query) mparser = MultifieldParser( ["ngram_name", "admin1_code", "country_code"], schema=ix.schema) query = mparser.parse(query) if pagelen is not None and pagelen != 0: try: results = searcher.search_page(query, page, pagelen=pagelen) except ValueError, e: # Invalid page number results = [] else:
def search(): query = request.args.get('q', '').strip() pagination = None if query: index_dir = config().get_path('GUTENBERG', 'index_dir') page = int(request.args.get('page', 1)) ix = whoosh_open_dir_32_or_64(index_dir) (pagination, suggestion) = paginated_search(ix, DEFAULT_SEARCH_COLUMNS, query, page, sort_column='creator') else: flash(_('Please input keyword(s)'), 'error') #print pagination.items return render_template('gutenberg/search.html', pagination=pagination, keywords=query, suggestion=suggestion, fn_author_to_query=author_to_query, endpoint_desc=EndPointDescription( 'gutenberg.search', None), files_exist=files_exist)