Exemplo n.º 1
0
def setUp():
    global dump_db, dump_db_file

    dump_file = pkg_resources.resource_filename(
        __name__, 'test_data/enwiki-pages-articles-sample.xml.bz2')
    dump_reader = WikiDumpReader(dump_file)
    dump_db_file = NamedTemporaryFile()

    DumpDB.build(dump_reader, dump_db_file.name, 1, 1)
    dump_db = DumpDB(dump_db_file.name)
Exemplo n.º 2
0
 def setUp(self):
     sample_dump_file = pkg_resources.resource_filename(
         __name__, '../test_data/enwiki-pages-articles-sample.xml.bz2')
     self.dump_reader = WikiDumpReader(sample_dump_file)
     self.pages = list(self.dump_reader)
Exemplo n.º 3
0
def build_dump_db(dump_file, out_file, **kwargs):
    dump_reader = WikiDumpReader(dump_file)
    DumpDB.build(dump_reader,
                 out_file,
                 preprocess_func=normalize_text,
                 **kwargs)
Exemplo n.º 4
0
def build_dump_db(dump_file: str, out_file: str, **kwargs):
    dump_reader = WikiDumpReader(dump_file)
    DumpDB.build(dump_reader, out_file, **kwargs)