def load(): entities = [ 'Drug', 'Form', 'Route', 'ADE', 'Reason', 'Frequency', 'Duration', 'Dosage', 'Strength' ] pipeline = N2C2Pipeline(entities=entities) model = Model(pipeline) model_directory = resource_filename('medacy_model_clinical_notes', 'model') model.load(os.path.join(model_directory, 'n2c2_2020_jan_22.pkl')) return model
def load(): entities = ['Drug', 'Form', 'Route', 'ADE', 'Reason', 'Frequency', 'Duration', 'Dosage', 'Strength'] pipeline = BertPipeline( entities=entities, using_crf=True, pretrained_model='emilyalsentzer/Bio_ClinicalBERT', **PIPELINE_ARGS) model = Model(pipeline) model_directory = resource_filename('medacy_bert_model_clinical_notes', 'model') model_directory = os.path.join(model_directory, 'torch') model.load(model_directory) return model
def _activate_model(model_path, pipeline_class, args, kwargs): """ Creates a Model with the given pipeline configuration and sets its weights to the pickled model path :param model_path: path to the model pickle file :param pipeline_class: the pipeline class for the pickled model :param args, kwargs: arguments to pass to the pipeline constructor :return: a usable Model instance """ pipeline_instance = pipeline_class(*args, **kwargs) model = Model(pipeline_instance) model.load(model_path) return model
def test_fit_predict_dump_load(self): """Fits a model, tests that it predicts correctly, dumps and loads it, then tests that it still predicts""" model = Model(self.pipeline) # Test attempting to predict before fitting with self.assertRaises(RuntimeError): model.predict('Lorem ipsum dolor sit amet.') model.fit(self.dataset, groundtruth_directory=self.groundtruth_2_directory) # Test X and y data are set self.assertTrue(model.X_data) self.assertTrue(model.y_data) # Test that there is at least one prediction resulting_ann = model.predict( 'To exclude the possibility that alterations in PSSD might be a consequence of changes in the volume of reference, we used a subset of the vibratome sections' ) self.assertIsInstance(resulting_ann, Annotations) self.assertTrue(resulting_ann) # Test prediction over directory resulting_dataset = model.predict( self.dataset.data_directory, prediction_directory=self.prediction_directory) self.assertIsInstance(resulting_dataset, Dataset) self.assertEqual(len(self.dataset), len(resulting_dataset)) # Test that groundtruth is written groundtruth_dataset = Dataset(self.groundtruth_2_directory) expected = [d.file_name for d in self.dataset] actual = [d.file_name for d in groundtruth_dataset] self.assertListEqual(expected, actual) # Test that the groundtruth ann files have content for ann in groundtruth_dataset.generate_annotations(): self.assertTrue(ann) # Test pickling a model pickle_path = os.path.join(self.prediction_directory, 'test.pkl') model.dump(pickle_path) new_model = Model(self.pipeline) new_model.load(pickle_path) # Test that there is at least one prediction resulting_ann = new_model.predict( 'To exclude the possibility that alterations in PSSD might be a consequence of changes in the volume of reference, we used a subset of the vibratome sections' ) self.assertIsInstance(resulting_ann, Annotations) self.assertTrue(resulting_ann)
def test_predict(self): """ predict() has different functionality depending on what is passed to it; therefore this test ensures that each type of input is handled correctly """ # Init the Model pipe = TestingPipeline(entities=self.entities) sample_model_path = os.path.join(test_dir, 'sample_models', 'sample_test_pipe.pkl') model = Model(pipe) model.load(sample_model_path) # Test passing a Dataset dataset_output = model.predict(self.dataset) self.assertIsInstance(dataset_output, Dataset) self.assertEqual(len(dataset_output), len(self.dataset)) # Test passing a directory directory_output = model.predict(self.dataset.data_directory) self.assertIsInstance(directory_output, Dataset) self.assertEqual(len(directory_output), len(self.dataset)) # Test passing a string string_output = model.predict('This is a sample string.') self.assertIsInstance(string_output, Annotations) # Test that the predictions are written to the expected location when no path is provided expected_dir = os.path.join(self.dataset.data_directory, 'predictions') self.assertTrue(os.path.isdir(expected_dir)) # Delete that directory shutil.rmtree(expected_dir) # Test predicting to a specific directory model.predict(self.dataset.data_directory, prediction_directory=self.prediction_directory_2) expected_files = os.listdir(self.prediction_directory_2) self.assertEqual(6, len(expected_files))