示例#1
0
def preprocess_data_to_merge(input_standoff_folder_gold,
                             output_conll_folder_gold, output_conll_file_gold,
                             input_standoff_folder_pred,
                             output_conll_folder_pred, output_conll_file_pred):

    # if not os.path.exists(output_conll_folder_gold):
    #     os.makedirs(output_conll_folder_gold)
    # else:
    #     shutil.rmtree(output_conll_folder_gold)

    # anntoconll_wlp.covert_standoff_to_conll(input_folder_main= input_standoff_folder_gold, output_folder = output_conll_folder_gold)

    anntoconll_wlp.convert_standoff_conll_single_file(
        input_standoff_folder_gold, output_conll_folder_gold,
        output_conll_file_gold)

    list_of_test_files_stand_off = Read_Files_in_Input_Folder(
        output_conll_folder_gold)

    for file_name in list_of_test_files_stand_off:
        file_values = file_name.split("/")
        protocol_name = file_values[-1]
        conll2standoff.process(file_name, input_standoff_folder_gold)

    copy_text_files(input_standoff_folder_gold, input_standoff_folder_pred)

    anntoconll_wlp.convert_standoff_conll_single_file(
        input_standoff_folder_pred, output_conll_folder_pred,
        output_conll_file_pred)
示例#2
0
def preprocess_data(input_standoff_folder_train, output_conll_folder_train,
                    output_conll_file_train, input_standoff_folder_test,
                    output_conll_folder_test, output_conll_file_test):
    anntoconll_wlp.convert_standoff_conll_single_file(
        input_standoff_folder_train, output_conll_folder_train,
        output_conll_file_train)
    anntoconll_wlp.convert_standoff_conll_single_file(
        input_standoff_folder_test, output_conll_folder_test,
        output_conll_file_test)