示例#1
0
def main():
    entity_db = EntityDB()
    freebase_notable_types_f = sys.argv[1]
    dbpedia_en_f = sys.argv[2]
    dbpedia_de_f = sys.argv[3]
    geo_f = sys.argv[4]
    gerword_def_f = sys.argv[5]
    gerword_undef_f = sys.argv[6]
    prefix_dawg_fn = sys.argv[-1]
    dawg_fn = sys.argv[-2]
    entities_fn = sys.argv[-3]
    if len(sys.argv) > 10:
        with open(sys.argv[7]) as f:
            entity_db.add_to_keep_list(
                [l.strip().decode("utf-8").lower() for l in f.readlines()])

    add_unambig_freebase(freebase_notable_types_f, entity_db)
    #add_freebase(freebase_dump_gz_f, entity_db)
    #add_dbpedia(dbpedia_en_f, dbpedia_de_f, entity_db)
    #add_geonames(geo_f, entity_db)
    add_wikt(gerword_def_f, gerword_undef_f, entity_db)

    with open(dawg_fn, 'wb') as dawg_fb:
        with open(entities_fn, "w") as pickle_f:
            with open(prefix_dawg_fn, "wb") as pd_fb:
                entity_db.dump(pickle_f, dawg_fb, pd_fb)
示例#2
0
def main():

    dir_old = sys.argv[1]
    dir_new = sys.argv[2]
    edb = EntityDB()
    a = ModifyEBD(EntityDB.load(dir_old), edb)
    a.unidecode_entities()
    edb.dump(dir_new)