Пример #1
0
def test_nested_ner():
    source_file_path = os.path.join(ROOT_PATH, "data_preprocess/test/dev_ace05.json")
    target_file_path = os.path.join(ROOT_PATH, "data_preprocess/test/mrc-dev_ace05.json")
    entity_sign = "nested"
    dataset_name = "en_ace2005"
    query_sign = "default"
    generate_query_ner_dataset(source_file_path, target_file_path, entity_sign=entity_sign, dataset_name=dataset_name, query_sign=query_sign)
Пример #2
0
def main():
    argument_configs = collect_arguments()

    generate_query_ner_dataset(argument_configs.path_to_source_data_file,
                               argument_configs.path_to_save_mrc_data_file,
                               entity_sign=argument_configs.entity_sign,
                               dataset_name=argument_configs.dataset_name,
                               query_sign=argument_configs.query_sign)
Пример #3
0
def test_flat_ner():
    source_file_path = os.path.join(ROOT_PATH, "data_preprocess/test/dev_msra.bmes")
    target_file_path = os.path.join(ROOT_PATH, "data_preprocess/test/mrc-dev_msra.json")
    d_repo ="/data/nfsdata2/xiaoya/data_repo/msra_ner"
    source_file_path = os.path.join(d_repo, "bmes.test")
    target_file_path = os.path.join(d_repo, "mrc-ner.dev")
    entity_sign = "flat"
    dataset_name = "zh_msra"
    query_sign = "default"
    generate_query_ner_dataset(source_file_path, target_file_path, entity_sign=entity_sign, dataset_name=dataset_name, query_sign=query_sign)
def test_nested_ner():
    # source_file_path = os.path.join(ROOT_PATH, "data_preprocess/test/dev_ace05.json")
    # target_file_path = os.path.join(ROOT_PATH, "data_preprocess/test/mrc-dev_ace05.json")
    # entity_sign = "nested"
    # dataset_name = "en_ace2005"
    # query_sign = "default"
    source_file_path = "./mrc_test.txt"
    target_file_path = "./mrc_test.json"
    entity_sign = "flat"
    dataset_name = "mrc_train"
    query_sign = "default"
    generate_query_ner_dataset(source_file_path,
                               target_file_path,
                               entity_sign=entity_sign,
                               dataset_name=dataset_name,
                               query_sign=query_sign)
def test_nested_ner():
    d_repo = "../../../sec-ner/data/GENIA_Std"

    # source_file_path = os.path.join(ROOT_PATH, "data_preprocess/test/dev_ace05.json")
    # target_file_path = os.path.join(ROOT_PATH, "data_preprocess/test/mrc-dev_ace05.json")

    entity_sign = "nested"
    # dataset_name = "en_ace2005"
    dataset_name = "en_jnlpba"
    query_sign = "default"

    source_file_path = os.path.join(d_repo, "mrc_train.json")
    target_file_path = os.path.join(d_repo, "mrc-ner.train")
    generate_query_ner_dataset(source_file_path,
                               target_file_path,
                               entity_sign=entity_sign,
                               dataset_name=dataset_name,
                               query_sign=query_sign)

    source_file_path = os.path.join(d_repo, "mrc_dev.json")
    target_file_path = os.path.join(d_repo, "mrc-ner.dev")
    generate_query_ner_dataset(source_file_path,
                               target_file_path,
                               entity_sign=entity_sign,
                               dataset_name=dataset_name,
                               query_sign=query_sign)

    source_file_path = os.path.join(d_repo, "mrc_test.json")
    target_file_path = os.path.join(d_repo, "mrc-ner.test")
    generate_query_ner_dataset(source_file_path,
                               target_file_path,
                               entity_sign=entity_sign,
                               dataset_name=dataset_name,
                               query_sign=query_sign)
Пример #6
0
def test_flat_ner():
    # source_file_path = os.path.join(ROOT_PATH, "data_preprocess/example/dev_msra.bmes")
    # target_file_path = os.path.join(ROOT_PATH, "data_preprocess/example/mrc-dev_msra.json")
    # d_repo ="/data/nfsdata2/xiaoya/data_repo/msra_ner"
    # source_file_path = os.path.join(d_repo, "bmes.test")
    # target_file_path = os.path.join(d_repo, "mrc-ner.test")
    # entity_sign = "flat"
    # dataset_name = "zh_msra"
    # query_sign = "default"
    # generate_query_ner_dataset(source_file_path, target_file_path, entity_sign=entity_sign, dataset_name=dataset_name, query_sign=query_sign)
    # source_file_path = os.path.join(ROOT_PATH, "data_preprocess/example/ontonotes/dev.bmse")
    # target_file_path = os.path.join(ROOT_PATH, "data_preprocess/example/ontonotes/dev.bmse.json")
    source_file_path = os.path.join(
        ROOT_PATH, "data_preprocess/example/ace05/train1.bsme")
    target_file_path = os.path.join(
        ROOT_PATH, "data_preprocess/example/ace05/train1.bsme.json")
    entity_sign = "flat"
    dataset_name = "en_ace2005"
    query_sign = "default"
    generate_query_ner_dataset(source_file_path,
                               target_file_path,
                               entity_sign=entity_sign,
                               dataset_name=dataset_name,
                               query_sign=query_sign)
def test_flat_ner():
    # source_file_path = os.path.join(ROOT_PATH, "data_preprocess/test/dev_msra.bmes")
    # target_file_path = os.path.join(ROOT_PATH, "data_preprocess/test/mrc-dev_msra.json")

    # d_repo = "../../../sec-ner/data/JNLPBA"
    d_repo = "../../../sec-ner/data/GENIA_term_3.02"

    entity_sign = "flat"
    dataset_name = "en_jnlpba"
    # dataset_name = "en_fine_genia"
    query_sign = "default"

    source_file_path = os.path.join(d_repo, "train.tsv")
    # source_file_path = os.path.join(d_repo, "jnlpba_train.tsv")
    target_file_path = os.path.join(d_repo, "mrc-ner.train")
    generate_query_ner_dataset(source_file_path,
                               target_file_path,
                               entity_sign=entity_sign,
                               dataset_name=dataset_name,
                               query_sign=query_sign)

    source_file_path = os.path.join(d_repo, "dev.tsv")
    # source_file_path = os.path.join(d_repo, "jnlpba_dev.tsv")
    target_file_path = os.path.join(d_repo, "mrc-ner.dev")
    generate_query_ner_dataset(source_file_path,
                               target_file_path,
                               entity_sign=entity_sign,
                               dataset_name=dataset_name,
                               query_sign=query_sign)

    source_file_path = os.path.join(d_repo, "test.tsv")
    # source_file_path = os.path.join(d_repo, "jnlpba_test.tsv")
    target_file_path = os.path.join(d_repo, "mrc-ner.test")
    generate_query_ner_dataset(source_file_path,
                               target_file_path,
                               entity_sign=entity_sign,
                               dataset_name=dataset_name,
                               query_sign=query_sign)