예제 #1
0
def search(humanReadableId):
    query = request.args.get('q', '').strip()
    pagination = None
    if query:
        index_base_dir = config().get_path("ZIM", "wikipedia_index_dir")
        index_dir = os.path.join(index_base_dir, humanReadableId)
        page = int(request.args.get('page', 1))

        # Load index so we can query it for which fields exist
        ix = whoosh_open_dir_32_or_64(index_dir)

        # Set a higher value for the title field so it is weighted more
        weighting = scoring.BM25F(title_B=1.0)

        # Sort pages with "Image:" in their title after
        # regular articles
        def image_pages_last(searcher, docnum):
            fields = searcher.stored_fields(docnum)
            if fields['title'].find("Image:") == 0:
                return 1
            else:
                return 0

        # Support older whoosh indexes that do not have a reverse_links field
        if 'reverse_links' in ix.schema.names():
            sortedby = sorting.MultiFacet([
                sorting.FunctionFacet(image_pages_last),
                sorting.ScoreFacet(),
                sorting.FieldFacet("reverse_links", reverse=True),
            ])
        else:
            sortedby = sorting.MultiFacet([
                sorting.FunctionFacet(image_pages_last),
                sorting.ScoreFacet(),
            ])

        (pagination, suggestion) = paginated_search(ix, ["title", "content"],
                                                    query,
                                                    page,
                                                    weighting=weighting,
                                                    sort_column=sortedby)
    else:
        flash(_('Please input keyword(s)'), 'error')

    return render_template('zim/search.html',
                           humanReadableId=humanReadableId,
                           pagination=pagination,
                           suggestion=suggestion,
                           keywords=query,
                           endpoint_desc=EndPointDescription(
                               'zim_views.search',
                               {'humanReadableId': humanReadableId}))
예제 #2
0
def test_sorting_function():
    schema = fields.Schema(id=fields.STORED,
                           text=fields.TEXT(stored=True, vector=True))
    ix = RamStorage().create_index(schema)
    w = ix.writer()
    domain = ("alfa", "bravo", "charlie")
    count = 1
    for w1 in domain:
        for w2 in domain:
            for w3 in domain:
                for w4 in domain:
                    w.add_document(id=count,
                                   text=u(" ").join((w1, w2, w3, w4)))
                    count += 1
    w.commit()

    def fn(searcher, docnum):
        v = dict(searcher.vector_as("frequency", docnum, "text"))
        # Sort documents that have equal number of "alfa"
        # and "bravo" first
        return 0 - 1.0 / (abs(v.get("alfa", 0) - v.get("bravo", 0)) + 1.0)
    fnfacet = sorting.FunctionFacet(fn)

    with ix.searcher() as s:
        q = query.And([query.Term("text", u("alfa")),
                       query.Term("text", u("bravo"))])
        results = s.search(q, sortedby=fnfacet)
        r = [hit["text"] for hit in results]
        for t in r[:10]:
            tks = t.split()
            assert tks.count("alfa") == tks.count("bravo")