def main(): entity_db = EntityDB() freebase_notable_types_f = sys.argv[1] dbpedia_en_f = sys.argv[2] dbpedia_de_f = sys.argv[3] geo_f = sys.argv[4] gerword_def_f = sys.argv[5] gerword_undef_f = sys.argv[6] prefix_dawg_fn = sys.argv[-1] dawg_fn = sys.argv[-2] entities_fn = sys.argv[-3] if len(sys.argv) > 10: with open(sys.argv[7]) as f: entity_db.add_to_keep_list( [l.strip().decode("utf-8").lower() for l in f.readlines()]) add_unambig_freebase(freebase_notable_types_f, entity_db) #add_freebase(freebase_dump_gz_f, entity_db) #add_dbpedia(dbpedia_en_f, dbpedia_de_f, entity_db) #add_geonames(geo_f, entity_db) add_wikt(gerword_def_f, gerword_undef_f, entity_db) with open(dawg_fn, 'wb') as dawg_fb: with open(entities_fn, "w") as pickle_f: with open(prefix_dawg_fn, "wb") as pd_fb: entity_db.dump(pickle_f, dawg_fb, pd_fb)
def main(): dir_old = sys.argv[1] dir_new = sys.argv[2] edb = EntityDB() a = ModifyEBD(EntityDB.load(dir_old), edb) a.unidecode_entities() edb.dump(dir_new)