""" This is the integration test of all important methods in this package Note: Please run this script from root folder """ from lib.data_preprocessor import prepare_crf_data from lib.model_generator import generate_model from lib.xml_stream_parser import XMLStreamParser from lib.word_templaterizer import TemplateGenerator from lib.cross_validation import * if __name__ == "__main__": """ Integration test for parsing You need: 1. A tempate file 2. Training data folder path 3. Testing data folder path """ print("Integration test for parsing") # Data Preprocessing from original data folder prepare_crf_data("test/data/note_texts/", "crf_files/note_train_features") # Generate target model generate_model("crf_files/final_template", "crf_files/train_features", "crf_files/final_model") parser = XMLStreamParser("test/data/fake_notes.xml", "crf_files/final_model", "NOTE_TEXT") parser.parse_and_write_to("test/data/fake_notes_parsed.xml")
from lib.data_preprocessor import prepare_crf_data from lib.model_generator import generate_model from lib.batch_processor import multi_processing if __name__ == '__main__': ''' Integration test for batch parsing You need: 1. A tempate file 2. Training data folder path 3. xml files folder path 4. parsed xml files folder path 5. number of process ''' print("Integration test for batch parsing") # Data Preprocessing from original data folder prepare_crf_data("test/data/train_test_data/", "crf_files/train_features") # Generate target model generate_model("crf_files/final_template", "crf_files/train_features", "crf_files/final_model") # set the folder of origial data original_folder = "/home/groups/pearl/notes_revised/" # set the folder for parsed files parsed_file_dir = "/home/groups/pearl/notes_revised_parsed/" # Multi process xml files multi_processing(11, original_folder, parsed_file_dir)