def preprocess_data_to_merge(input_standoff_folder_gold, output_conll_folder_gold, output_conll_file_gold, input_standoff_folder_pred, output_conll_folder_pred, output_conll_file_pred): # if not os.path.exists(output_conll_folder_gold): # os.makedirs(output_conll_folder_gold) # else: # shutil.rmtree(output_conll_folder_gold) # anntoconll_wlp.covert_standoff_to_conll(input_folder_main= input_standoff_folder_gold, output_folder = output_conll_folder_gold) anntoconll_wlp.convert_standoff_conll_single_file( input_standoff_folder_gold, output_conll_folder_gold, output_conll_file_gold) list_of_test_files_stand_off = Read_Files_in_Input_Folder( output_conll_folder_gold) for file_name in list_of_test_files_stand_off: file_values = file_name.split("/") protocol_name = file_values[-1] conll2standoff.process(file_name, input_standoff_folder_gold) copy_text_files(input_standoff_folder_gold, input_standoff_folder_pred) anntoconll_wlp.convert_standoff_conll_single_file( input_standoff_folder_pred, output_conll_folder_pred, output_conll_file_pred)
def preprocess_data(input_standoff_folder_train, output_conll_folder_train, output_conll_file_train, input_standoff_folder_test, output_conll_folder_test, output_conll_file_test): anntoconll_wlp.convert_standoff_conll_single_file( input_standoff_folder_train, output_conll_folder_train, output_conll_file_train) anntoconll_wlp.convert_standoff_conll_single_file( input_standoff_folder_test, output_conll_folder_test, output_conll_file_test)