def load_nordlys_config(file_name): """Loads nordlys config file. If local file is provided, global one is ignored.""" config_path = os.sep.join([BASE_DIR, "config"]) local_config = os.sep.join([config_path, "local", file_name]) if os.path.exists(local_config): return FileUtils.load_config(local_config) else: return FileUtils.load_config(os.sep.join([config_path, file_name]))
def main(args): config = FileUtils.load_config(args.config) er = ER(config, ElasticCache(DBPEDIA_INDEX)) if args.query: res = er.retrieve(args.query) pprint(res) else: er.batch_retrieval()
def main(args): s_t = time.time() # start time config = FileUtils.load_config(args.config) if args.config != "" else get_config() r = Retrieval(config) r.batch_retrieval() e_t = time.time() # end time print("Execution time(min):\t" + str((e_t - s_t) / 60) + "\n")
def main(args): config = FileUtils.load_config(args.config) el = EL(config, Entity()) if args.query: res = el.link(args.query) pprint(res) else: el.batch_linking()
def main(args): config = FileUtils.load_config(args.config) tti = TTI(config) if args.query: res = tti.identify(args.query) pprint(res) else: tti.batch_identification()
def main(args): config = FileUtils.load_config(args.config) er = ER(config) if args.query: res = er.retrieve(args.query) pprint(res) else: er.batch_retrieval()
def main(args): config = FileUtils.load_config(args.config) type2entity_file = os.path.expanduser(os.path.join(config.get("type2entity_file", ""))) entity_abstracts_file = os.path.expanduser(os.path.join(config.get("entity_abstracts_file", ""))) if (not os.path.isfile(type2entity_file)) or (not os.path.isfile(entity_abstracts_file)): exit(1) indexer = IndexerDBpediaTypes(config) indexer.build_index(force=True) PLOGGER.info("Index build: <{}>".format(indexer.name))
def main(args): conf = FileUtils.load_config(args.config) el = EL(conf, Entity(), ElasticCache(DBPEDIA_INDEX), FeatureCache()) if conf.get("gen_model", False): LTR.train(conf) elif args.query: res = el.link(args.query) pprint(res) else: el.batch_linking()
def main(args): config = FileUtils.load_config(args.config) dbpedia_path = config.get("dbpedia_files_path", "") # Check DBpedia files PLOGGER.info("Checking needed DBpedia files under {}".format(dbpedia_path)) for fname in [ENTITY_ABSTRACTS_FILE] + ENTITY_TYPES_FILES: if os.path.isfile(os.sep.join([dbpedia_path, fname])): PLOGGER.info(" - {}: OK".format(fname)) else: PLOGGER.error(" - {}: Missing".format(fname)) exit(1) indexer = IndexerDBpediaTypes(config) indexer.build_index(force=True)
def main(args): config = FileUtils.load_config(args.config) if "_uri" not in config["index_name"]: print("index name might not be correct, please check again!") exit(0) indexer = IndexerDBpediaURI(config) fields_file = config.get("fields_file", "output/field_counts.json") if "fields_file" not in config: field_counts2json(fields_file) indexer.build() print("Index build: " + config["index_name"])
def main(args): config = FileUtils.load_config(args.config) if "_uri" not in config["index_name"]: PLOGGER.error("index name might not be correct, please check again!") exit(0) if "fields_file" not in config: fields_count = compute_field_counts() else: fields_count = json.load(config["fields_file"]) indexer = IndexerDBpediaURI(config, fields_count) indexer.build() PLOGGER.info("Index build: " + config["index_name"])
def main(args): config = FileUtils.load_config(args.config) elastic_term = ElasticCache(config["text_index"]) lambdas = config.get("lambdas", [0.9, 0.1]) queries = json.load(open(config["query_file"], "r")) mappings = json.load(open(config["mapping_file"], "r")) annots = load_annot(config["annot_file"]) run = load_run(config["run_file"]) instances = Instances() # gets the results out_file = open(config["output_file"], "w") qid_int = 0 for qid, query in sorted(queries.items()): print("Scoring ", qid, "...") results, libsvm_str = {}, "" query_len = len(elastic_term.analyze_query(query).split()) scorer = ScorerELR(ElasticCache(config["uri_index"]), annots[qid], query_len, lambdas) for doc_id, p_T_d in sorted(run[qid].items()): query_mappings = get_mapping_query(annots[qid], mappings) p_E_d = scorer.score_doc(doc_id, query_mappings) properties = { 'doc_id': doc_id, 'query': query, 'qid': qid, 'qid_int': qid_int } features = {'p_T_d': p_T_d, 'p_E_d': p_E_d} ins = Instance(qid + "_" + doc_id, features=features, properties=properties) instances.add_instance(ins) # libsvm_str += ins.to_libsvm(qid_prop="qod_int") results[doc_id] = (lambdas[0] * p_T_d) + (lambdas[1] * p_E_d) qid_int += 1 # Write trec format out_str = trec_format(results, qid, "elr") out_file.write(out_str) out_file.close() print("Output file:", config["output_file"]) instances.to_json(config["json_file"]) print("Output file:", config["json_file"])
def main(args): config = FileUtils.load_config(args.config) type2entity_file = os.path.expanduser(config.get("type2entity_file", "")) if not os.path.isfile(type2entity_file): print("invalid path to type-to-entity source file: ", type2entity_file) exit(1) entity_abstracts_file = os.path.expanduser( config.get("entity_abstracts_file", "")) if not os.path.isfile(entity_abstracts_file): print("invalid path to entity abstracts source file: ", entity_abstracts_file) exit(1) indexer = IndexerDBpediaTypes(config, type2entity_file, entity_abstracts_file) indexer.build_index(force=True) print("Index build: <{}>".format(indexer.name))
def main(args): example_config = {"index_name": "toy_index", # "query_file": "data/queries/test_queries.json", "first_pass": { "num_docs": 1000, "field": "content", # "model": "LMJelinekMercer", # "model_params": {"lambda": 0.1} }, "second_pass": { "field": "content", "model": "lm", "smoothing_method": "jm", "smoothing_param": 0.1 }, "output_file": "output/test_retrieval.txt" } config = FileUtils.load_config(args.config) if args.config != "" else example_config r = Retrieval(config) r.batch_retrieval()
def main(args): config = FileUtils.load_config(args.config) fb2dbp2mongo = Freebase2DBpedia2Mongo(config) mappings = fb2dbp2mongo.load_fb2dbp_mapping() fb2dbp2mongo.build_collection(mappings)
def main(args): config = FileUtils.load_config(args.config) dbm = DBpedia2Mongo(config) dbm.build_dbpedia()
def main(config): ml = ML(FileUtils.load_config(config)) inss = ml.run()
def main(args): config = FileUtils.load_config(args.config) indexer = IndexerDBpedia(config) indexer.build() print("Index build: " + config["index_name"])
def main(args): config = FileUtils.load_config(args.config) dbp_sf2mongo = DBpediaSurfaceforms2Mongo(config) dbp_sf2mongo.build_collection()
def main(args): config = FileUtils.load_config(args.config) ml = ML(config) ml.run()
def main(args): config = FileUtils.load_config(args.config) sfm = FACCToMongo(config) sfm.build()
def main(args): config = FileUtils.load_config(args.config) w2v_to_mongo = Word2VecToMongo(config) w2v_to_mongo.build()