Пример #1
0
def otherTest():
    from whoosh.index import create_in
    from whoosh.fields import Schema, TEXT
    schema = Schema(title=TEXT(stored=True), content=TEXT(stored=True))
    ix = create_in("/home/luoxing/windows/indexdir", schema)
    xmlfile = '/home/luoxing/windows/enwiki.xml'
    writer = ix.writer()
    number = 0
    progress = ProgressBar(maxval= entitydb.getTotalPage())
    for title, content in extract.getPages(xmlfile):
        number += 1
        progress.update(number)
        if content:
            writer.add_document(title=title, content=content)
    progress.finsh()
    writer.commit()