Exemplo n.º 1
0
def load_nordlys_config(file_name):
    """Loads nordlys config file. If local file is provided, global one is ignored."""
    config_path = os.sep.join([BASE_DIR, "config"])
    local_config = os.sep.join([config_path, "local", file_name])
    if os.path.exists(local_config):
        return FileUtils.load_config(local_config)
    else:
        return FileUtils.load_config(os.sep.join([config_path, file_name]))
Exemplo n.º 2
0
Arquivo: er.py Projeto: zxlzr/nordlys
def main(args):
    config = FileUtils.load_config(args.config)
    er = ER(config, ElasticCache(DBPEDIA_INDEX))

    if args.query:
        res = er.retrieve(args.query)
        pprint(res)
    else:
        er.batch_retrieval()
Exemplo n.º 3
0
def main(args):
    s_t = time.time()  # start time

    config = FileUtils.load_config(args.config) if args.config != "" else get_config()
    r = Retrieval(config)
    r.batch_retrieval()

    e_t = time.time()  # end time
    print("Execution time(min):\t" + str((e_t - s_t) / 60) + "\n")
Exemplo n.º 4
0
def main(args):
    config = FileUtils.load_config(args.config)
    el = EL(config, Entity())

    if args.query:
        res = el.link(args.query)
        pprint(res)
    else:
        el.batch_linking()
Exemplo n.º 5
0
def main(args):
    config = FileUtils.load_config(args.config)
    tti = TTI(config)

    if args.query:
        res = tti.identify(args.query)
        pprint(res)
    else:
        tti.batch_identification()
Exemplo n.º 6
0
def main(args):
    config = FileUtils.load_config(args.config)
    er = ER(config)

    if args.query:
        res = er.retrieve(args.query)
        pprint(res)
    else:
        er.batch_retrieval()
Exemplo n.º 7
0
def main(args):
    config = FileUtils.load_config(args.config)

    type2entity_file = os.path.expanduser(os.path.join(config.get("type2entity_file", "")))
    entity_abstracts_file = os.path.expanduser(os.path.join(config.get("entity_abstracts_file", "")))
    if (not os.path.isfile(type2entity_file)) or (not os.path.isfile(entity_abstracts_file)):
        exit(1)

    indexer = IndexerDBpediaTypes(config)
    indexer.build_index(force=True)
    PLOGGER.info("Index build: <{}>".format(indexer.name))
Exemplo n.º 8
0
def main(args):
    conf = FileUtils.load_config(args.config)
    el = EL(conf, Entity(), ElasticCache(DBPEDIA_INDEX), FeatureCache())

    if conf.get("gen_model", False):
        LTR.train(conf)
    elif args.query:
        res = el.link(args.query)
        pprint(res)
    else:
        el.batch_linking()
Exemplo n.º 9
0
def main(args):
    config = FileUtils.load_config(args.config)
    dbpedia_path = config.get("dbpedia_files_path", "")
    # Check DBpedia files
    PLOGGER.info("Checking needed DBpedia files under {}".format(dbpedia_path))
    for fname in [ENTITY_ABSTRACTS_FILE] + ENTITY_TYPES_FILES:
        if os.path.isfile(os.sep.join([dbpedia_path, fname])):
            PLOGGER.info("  - {}: OK".format(fname))
        else:
            PLOGGER.error("  - {}: Missing".format(fname))
            exit(1)

    indexer = IndexerDBpediaTypes(config)
    indexer.build_index(force=True)
Exemplo n.º 10
0
def main(args):
    config = FileUtils.load_config(args.config)
    if "_uri" not in config["index_name"]:
        print("index name might not be correct, please check again!")
        exit(0)

    indexer = IndexerDBpediaURI(config)

    fields_file = config.get("fields_file", "output/field_counts.json")
    if "fields_file" not in config:
        field_counts2json(fields_file)

    indexer.build()
    print("Index build: " + config["index_name"])
Exemplo n.º 11
0
def main(args):
    config = FileUtils.load_config(args.config)
    if "_uri" not in config["index_name"]:
        PLOGGER.error("index name might not be correct, please check again!")
        exit(0)

    if "fields_file" not in config:
        fields_count = compute_field_counts()
    else:
        fields_count = json.load(config["fields_file"])

    indexer = IndexerDBpediaURI(config, fields_count)

    indexer.build()
    PLOGGER.info("Index build: " + config["index_name"])
Exemplo n.º 12
0
def main(args):
    config = FileUtils.load_config(args.config)
    elastic_term = ElasticCache(config["text_index"])
    lambdas = config.get("lambdas", [0.9, 0.1])

    queries = json.load(open(config["query_file"], "r"))
    mappings = json.load(open(config["mapping_file"], "r"))
    annots = load_annot(config["annot_file"])
    run = load_run(config["run_file"])

    instances = Instances()
    # gets the results
    out_file = open(config["output_file"], "w")
    qid_int = 0
    for qid, query in sorted(queries.items()):
        print("Scoring ", qid, "...")
        results, libsvm_str = {}, ""
        query_len = len(elastic_term.analyze_query(query).split())
        scorer = ScorerELR(ElasticCache(config["uri_index"]), annots[qid],
                           query_len, lambdas)
        for doc_id, p_T_d in sorted(run[qid].items()):
            query_mappings = get_mapping_query(annots[qid], mappings)
            p_E_d = scorer.score_doc(doc_id, query_mappings)
            properties = {
                'doc_id': doc_id,
                'query': query,
                'qid': qid,
                'qid_int': qid_int
            }
            features = {'p_T_d': p_T_d, 'p_E_d': p_E_d}
            ins = Instance(qid + "_" + doc_id,
                           features=features,
                           properties=properties)
            instances.add_instance(ins)
            # libsvm_str += ins.to_libsvm(qid_prop="qod_int")
            results[doc_id] = (lambdas[0] * p_T_d) + (lambdas[1] * p_E_d)
        qid_int += 1

        # Write trec format
        out_str = trec_format(results, qid, "elr")
        out_file.write(out_str)

    out_file.close()
    print("Output file:", config["output_file"])
    instances.to_json(config["json_file"])
    print("Output file:", config["json_file"])
Exemplo n.º 13
0
def main(args):
    config = FileUtils.load_config(args.config)

    type2entity_file = os.path.expanduser(config.get("type2entity_file", ""))
    if not os.path.isfile(type2entity_file):
        print("invalid path to type-to-entity source file: ", type2entity_file)
        exit(1)

    entity_abstracts_file = os.path.expanduser(
        config.get("entity_abstracts_file", ""))
    if not os.path.isfile(entity_abstracts_file):
        print("invalid path to entity abstracts source file: ",
              entity_abstracts_file)
        exit(1)

    indexer = IndexerDBpediaTypes(config, type2entity_file,
                                  entity_abstracts_file)
    indexer.build_index(force=True)
    print("Index build: <{}>".format(indexer.name))
Exemplo n.º 14
0
def main(args):
    example_config = {"index_name": "toy_index",
                      # "query_file": "data/queries/test_queries.json",
                      "first_pass": {
                          "num_docs": 1000,
                          "field": "content",
                          # "model": "LMJelinekMercer",
                          # "model_params": {"lambda": 0.1}
                      },
                      "second_pass": {
                          "field": "content",
                          "model": "lm",
                          "smoothing_method": "jm",
                          "smoothing_param": 0.1
                      },
                      "output_file": "output/test_retrieval.txt"
                      }
    config = FileUtils.load_config(args.config) if args.config != "" else example_config
    r = Retrieval(config)
    r.batch_retrieval()
Exemplo n.º 15
0
def main(args):
    config = FileUtils.load_config(args.config)
    fb2dbp2mongo = Freebase2DBpedia2Mongo(config)
    mappings = fb2dbp2mongo.load_fb2dbp_mapping()
    fb2dbp2mongo.build_collection(mappings)
Exemplo n.º 16
0
def main(args):
    config = FileUtils.load_config(args.config)
    dbm = DBpedia2Mongo(config)
    dbm.build_dbpedia()
Exemplo n.º 17
0
def main(config):
    ml = ML(FileUtils.load_config(config))
    inss = ml.run()
Exemplo n.º 18
0
def main(args):
    config = FileUtils.load_config(args.config)
    indexer = IndexerDBpedia(config)
    indexer.build()
    print("Index build: " + config["index_name"])
Exemplo n.º 19
0
def main(args):
    config = FileUtils.load_config(args.config)
    dbp_sf2mongo = DBpediaSurfaceforms2Mongo(config)
    dbp_sf2mongo.build_collection()
Exemplo n.º 20
0
Arquivo: ml.py Projeto: zxlzr/nordlys
def main(args):
    config = FileUtils.load_config(args.config)
    ml = ML(config)
    ml.run()
Exemplo n.º 21
0
def main(args):
    config = FileUtils.load_config(args.config)
    sfm = FACCToMongo(config)
    sfm.build()
Exemplo n.º 22
0
def main(args):
    config = FileUtils.load_config(args.config)
    w2v_to_mongo = Word2VecToMongo(config)
    w2v_to_mongo.build()