示例#1
0
 def count(self, query):
     """Return total number of matching documents in index"""
     query = unicode(query)  # Must be unicode
     ix = whoosh_open_dir_32_or_64(self.index_dir)
     with ix.searcher() as searcher:
         query = QueryParser("title", ix.schema).parse(query)
         results = searcher.search(query)
         n = len(results)
     ix.close()
     return n
 def count(self, query):
     """Return total number of matching documents in index"""
     query = unicode(query)  # Must be unicode
     ix = whoosh_open_dir_32_or_64(self.index_dir)
     with ix.searcher() as searcher:
         query = QueryParser("title", ix.schema).parse(query)
         results = searcher.search(query)
         n = len(results)
     ix.close()
     return n
示例#3
0
def search(humanReadableId):
    query = request.args.get('q', '').strip()
    pagination = None
    if query:
        index_base_dir = config().get_path("ZIM", "wikipedia_index_dir")
        index_dir = os.path.join(index_base_dir, humanReadableId)
        page = int(request.args.get('page', 1))

        # Load index so we can query it for which fields exist
        ix = whoosh_open_dir_32_or_64(index_dir)

        # Set a higher value for the title field so it is weighted more
        weighting = scoring.BM25F(title_B=1.0)

        # Sort pages with "Image:" in their title after
        # regular articles
        def image_pages_last(searcher, docnum):
            fields = searcher.stored_fields(docnum)
            if fields['title'].find("Image:") == 0:
                return 1
            else:
                return 0

        # Support older whoosh indexes that do not have a reverse_links field
        if 'reverse_links' in ix.schema.names():
            sortedby = sorting.MultiFacet([
                sorting.FunctionFacet(image_pages_last),
                sorting.ScoreFacet(),
                sorting.FieldFacet("reverse_links", reverse=True),
            ])
        else:
            sortedby = sorting.MultiFacet([
                sorting.FunctionFacet(image_pages_last),
                sorting.ScoreFacet(),
            ])

        (pagination, suggestion) = paginated_search(ix, ["title", "content"],
                                                    query,
                                                    page,
                                                    weighting=weighting,
                                                    sort_column=sortedby)
    else:
        flash(_('Please input keyword(s)'), 'error')

    return render_template('zim/search.html',
                           humanReadableId=humanReadableId,
                           pagination=pagination,
                           suggestion=suggestion,
                           keywords=query,
                           endpoint_desc=EndPointDescription(
                               'zim_views.search',
                               {'humanReadableId': humanReadableId}))
示例#4
0
def search():
    query = request.args.get('q', '').strip()
    pagination = None
    if query:
        index_dir = config().get_path('GUTENBERG', 'index_dir')
        page = int(request.args.get('page', 1))
        ix = whoosh_open_dir_32_or_64(index_dir)
        (pagination, suggestion) = paginated_search(ix, DEFAULT_SEARCH_COLUMNS, query, page, sort_column='creator')
    else:
        flash(_('Please input keyword(s)'), 'error')
    #print pagination.items
    return render_template('gutenberg/search.html', pagination=pagination,
                           keywords=query, suggestion=suggestion, fn_author_to_query=author_to_query,
                           endpoint_desc=EndPointDescription('gutenberg.search', None),
                           files_exist=files_exist)
 def search(self, query, page=1, pagelen=20):
     """Return a sorted list of results.
     pagelen specifies the number of hits per page.
     page specifies the page of results to return (first page is 1)
     Set pagelen = None or 0 to retrieve all results.
     """
     query = unicode(query)  # Must be unicode
     ix = whoosh_open_dir_32_or_64(self.index_dir)
     with ix.searcher() as searcher:
         query = QueryParser("title", ix.schema).parse(query)
         if pagelen is not None and pagelen != 0:
             try:
                 results = searcher.search_page(query, page, pagelen=pagelen,
                                                sortedby="score", reverse=True)
             except ValueError, e:  # Invalid page number
                 results = []
         else:
示例#6
0
 def search(self, query, page=1, pagelen=20):
     """Return a sorted list of results.
     pagelen specifies the number of hits per page.
     page specifies the page of results to return (first page is 1)
     Set pagelen = None or 0 to retrieve all results.
     """
     query = unicode(query)  # Must be unicode
     population_sort_facet = sorting.FieldFacet("population", reverse=True)
     ix = whoosh_open_dir_32_or_64(self.index_dir)
     with ix.searcher() as searcher:
         # query = QueryParser("ngram_name", ix.schema).parse(query)
         mparser = MultifieldParser(["ngram_name", "admin1_code", "country_code"], schema=ix.schema)
         query = mparser.parse(query)
         if pagelen is not None and pagelen != 0:
             try:
                 results = searcher.search_page(query, page, pagelen=pagelen)
             except ValueError, e:  # Invalid page number
                 results = []
         else:
 def search(self, query, page=1, pagelen=20):
     """Return a sorted list of results.
     pagelen specifies the number of hits per page.
     page specifies the page of results to return (first page is 1)
     Set pagelen = None or 0 to retrieve all results.
     """
     query = unicode(query)  # Must be unicode
     ix = whoosh_open_dir_32_or_64(self.index_dir)
     with ix.searcher() as searcher:
         query = QueryParser("title", ix.schema).parse(query)
         if pagelen is not None and pagelen != 0:
             try:
                 results = searcher.search_page(query,
                                                page,
                                                pagelen=pagelen,
                                                sortedby="score",
                                                reverse=True)
             except ValueError, e:  # Invalid page number
                 results = []
         else:
示例#8
0
def search(humanReadableId):
    query = request.args.get('q', '').strip()
    pagination = None
    if query:
        index_base_dir = config().get_path("ZIM", "wikipedia_index_dir")
        index_dir = os.path.join(index_base_dir, humanReadableId)
        page = int(request.args.get('page', 1))
    
        # Load index so we can query it for which fields exist
        ix = whoosh_open_dir_32_or_64(index_dir)

        # Set a higher value for the title field so it is weighted more
        weighting = scoring.BM25F(title_B=1.0)

        # Sort pages with "Image:" in their title after
        # regular articles
        def image_pages_last(searcher, docnum):
            fields = searcher.stored_fields(docnum)
            if fields['title'].find("Image:") == 0:
                return 1;
            else:
                return 0;

        # Support older whoosh indexes that do not have a reverse_links field
        if 'reverse_links' in ix.schema.names():
            sortedby = sorting.MultiFacet([ sorting.FunctionFacet(image_pages_last),
                                            sorting.ScoreFacet(),
                                            sorting.FieldFacet("reverse_links", reverse=True),
                                           ])
        else:
            sortedby = sorting.MultiFacet([ sorting.FunctionFacet(image_pages_last),
                                            sorting.ScoreFacet(),
                                           ])

        (pagination, suggestion) = paginated_search(ix, ["title", "content"], query, page, weighting=weighting, sort_column=sortedby)
    else:
        flash(_('Please input keyword(s)'), 'error')

    return render_template('zim/search.html', humanReadableId=humanReadableId, pagination=pagination, suggestion=suggestion, keywords=query, endpoint_desc=EndPointDescription('zim_views.search', {'humanReadableId':humanReadableId}))
def paginated_search(index_dir, search_columns, query_text, page=1, pagelen=20, sort_column=None, weighting=scoring.BM25F):
    """
    Return a tuple consisting of an object that emulates an SQLAlchemy pagination object and corrected query suggestion
    pagelen specifies number of hits per page
    page specifies page of results (first page is 1)
    """
    query_text = unicode(query_text)  # Must be unicode
    ix = whoosh_open_dir_32_or_64(index_dir)
    with ix.searcher(weighting=weighting) as searcher:
        query = MultifieldParser(search_columns, ix.schema).parse(query_text)
        try:
            # search_page returns whoosh.searching.ResultsPage
            results = searcher.search_page(query, page, pagelen=pagelen, sortedby=sort_column)
            total = results.total
        except ValueError:  # Invalid page number
            results = []
            total = 0
        paginate = pagination_helper.Pagination(page, pagelen, total, [dict(r.items()) for r in results])
        corrections = deduplicate_corrections(get_query_corrections(searcher, query, query_text))  # list of Corrector objects

        #hf = whoosh.highlight.HtmlFormatter(classname="change")
        #html = corrections.format_string(hf)
        return (paginate, [c.string for c in corrections])
示例#10
0
 def search(self, query, page=1, pagelen=20):
     """Return a sorted list of results.
     pagelen specifies the number of hits per page.
     page specifies the page of results to return (first page is 1)
     Set pagelen = None or 0 to retrieve all results.
     """
     query = unicode(query)  # Must be unicode
     population_sort_facet = sorting.FieldFacet("population", reverse=True)
     ix = whoosh_open_dir_32_or_64(self.index_dir)
     with ix.searcher() as searcher:
         # query = QueryParser("ngram_name", ix.schema).parse(query)
         mparser = MultifieldParser(
             ["ngram_name", "admin1_code", "country_code"],
             schema=ix.schema)
         query = mparser.parse(query)
         if pagelen is not None and pagelen != 0:
             try:
                 results = searcher.search_page(query,
                                                page,
                                                pagelen=pagelen)
             except ValueError, e:  # Invalid page number
                 results = []
         else:
示例#11
0
def search():
    query = request.args.get('q', '').strip()
    pagination = None
    if query:
        index_dir = config().get_path('GUTENBERG', 'index_dir')
        page = int(request.args.get('page', 1))
        ix = whoosh_open_dir_32_or_64(index_dir)
        (pagination, suggestion) = paginated_search(ix,
                                                    DEFAULT_SEARCH_COLUMNS,
                                                    query,
                                                    page,
                                                    sort_column='creator')
    else:
        flash(_('Please input keyword(s)'), 'error')
    #print pagination.items
    return render_template('gutenberg/search.html',
                           pagination=pagination,
                           keywords=query,
                           suggestion=suggestion,
                           fn_author_to_query=author_to_query,
                           endpoint_desc=EndPointDescription(
                               'gutenberg.search', None),
                           files_exist=files_exist)