def test_prediction_with_testing_pipeline(self): """Tests that a model created with the BiLSTM+CRF can be fitted and used to predict""" pipeline = LstmSystematicReviewPipeline( entities=self.entities, word_embeddings=os.path.join(test_dir, 'test_word_embeddings.txt'), cuda_device=-1 ) model = Model(pipeline) model.fit(self.dataset) resulting_dataset = model.predict(self.dataset, prediction_directory=self.prediction_directory) self.assertIsInstance(resulting_dataset, Dataset)
def test_prediction_with_testing_pipeline(self): """Tests that a model created with the BiLSTM+CRF can be fitted and used to predict""" pipeline = BertPipeline(entities=self.entities, cuda_device=-1) pipeline_crf = BertPipeline(entities=self.entities, cuda_device=-1, using_crf=True) for pipe in [pipeline, pipeline_crf]: model = Model(pipe) model.fit(self.dataset) resulting_dataset = model.predict( self.dataset, prediction_directory=self.prediction_directory) self.assertIsInstance(resulting_dataset, Dataset)
def drug_extraction(img): model = Model.load_external('medacy_model_clinical_notes') all_text_as_list = text_from_image(img) print(all_text_as_list) #print("hi") all_text = "" for line in all_text_as_list: annotation = model.predict(all_text) # print("v") print(annotation) all_text += line + " " print(all_text) annotation = model.predict(all_text) print(annotation) #print(annotation.annotations[2][0]) keys_del = [] for key in range(len(annotation.annotations)): # print(key) if (annotation.annotations[key][3] not in drugs_dict["drugs"]): keys_del.append(key) ##for key in keys_del: ## del annotation.annotations[key] return annotation
def test_fit_with_clinical_pipeline(self): """ Loads in training data and uses it to fit a model using the Clinical Pipeline :return: """ train_loader = DataLoader(self.train_dir) metamap = MetaMap( metamap_path= "/home/share/programs/metamap/2016/public_mm/bin/metamap", cache_output=False) train_loader.metamap(metamap) pipeline = ClinicalPipeline(metamap, entities=['Strength']) model = Model(pipeline) model.fit(train_loader) self.assertIsInstance(model, Model) self.assertIsNot(model.model, None)
def test_prediction_with_testing_pipeline(self): """ Constructs a model that memorizes an entity, predicts it on same file, writes to ann :return: """ pipeline = TestingPipeline(entities=['tradename']) #train on Abelcet.ann model = Model(pipeline, n_jobs=1) model.fit(self.train_dataset) #predict on both model.predict(self.test_dataset, prediction_directory=self.prediction_directory) second_ann_file = "%s.ann" % self.test_dataset.get_data_files( )[1].file_name annotations = Annotations(os.path.join(self.prediction_directory, second_ann_file), annotation_type='ann') print(annotations) self.assertIsInstance(annotations, Annotations)
def test_prediction_with_clinical_pipeline(self): """ Constructs a model that memorizes an entity, predicts it on same file, writes to ann :return: """ train_loader = DataLoader(self.train_dir) test_loader = DataLoader(self.test_dir) metamap = MetaMap( metamap_path= "/home/share/programs/metamap/2016/public_mm/bin/metamap", cache_output=False) train_loader.metamap(metamap) test_loader.metamap(metamap) pipeline = ClinicalPipeline(metamap, entities=['Strength']) model = Model(pipeline) model.fit(train_loader) model.predict(test_loader) with open(self.test_dir + "/predictions/" + "predict_test.ann") as f: self.assertEqual(f.read(), "T1 Strength 7 11 5 mg\n")
logging.basicConfig( filename=model_directory + '/build_%s.log' % current_time, level=logging.DEBUG) #set level=logging.DEBUG for more information #Initialize everything needed for model #Metamaps the dataset, if it not already, and stores the metamapped files for access in training_dataset. See Dataset API for details. metamap = MetaMap( metamap_path="/home/share/programs/metamap/2016/public_mm/bin/metamap", convert_ascii=False) train_dataset.metamap(metamap, n_jobs=30) #Selects the pre-processing pipeline this model should be trained with respect to. pipeline = SystematicReviewPipeline(metamap=metamap, entities=entities) model = Model( pipeline, n_jobs=1 ) #number of cores to utilize during feature extraction when training the model. Note: this is done by forking, not threading hence utlizes a large amount of memory. #Write information about model before training with open(model_directory + "/model_information.txt", 'w') as model_info: model_info.write("Entities: [%s]\n" % ", ".join(entities)) model_info.write("Training Files: %i\n" % len(train_dataset.get_data_files())) model_info.write(model_notes + "\n") model_info.write(str(model)) model.fit(train_dataset) #dump fitted model current_time = datetime.datetime.fromtimestamp( time.time()).strftime('%Y_%m_%d_%H.%M.%S')
level=logging.DEBUG) #set level=logging.DEBUG for more information #entity types entities = ['ADR', 'Indication', 'Drug'] # training_dataset, evaluation_dataset, meta_data = Dataset.load_external('medacy_dataset_smm4h_2019') training_dataset = Dataset( '/home/mahendrand/VE/SMM4H/data_smmh4h/task2/training/dataset') #path = '../data_smmh4h/task2/training/dataset_1' #set metamap path metamap = MetaMap( metamap_path="/home/share/programs/metamap/2016/public_mm/bin/metamap", convert_ascii=True) training_dataset.metamap(metamap) # pipeline = SystematicReviewPipeline(metamap=None, entities=meta_data['entities']) pipeline = SystematicReviewPipeline(metamap=metamap, entities=entities) model = Model( pipeline, n_jobs=1 ) #distribute documents between 30 processes during training and prediction model.fit(training_dataset) model.cross_validate(num_folds=5, dataset=training_dataset, write_predictions=True) #location to store the clinical model model.dump('/home/mahendrand/VE/SMM4H/medaCy/medacy/clinical_model.pickle') #location to store the predictions #model.predict(training_dataset, prediction_directory='/home/mahendrand/VE/SMM4H/data_smmh4h/task2/training/dataset/metamap_predictions')
from medacy.model import Model model = Model.load_external('medacy_model_clinical_notes') def tagMedical(text): annotation = model.predict(text) return formatResponse(annotation) def formatResponse(annotation): entities_dict = annotation.get_entity_annotations(return_dictionary=True) entities = [{'text':entity[3], 'start':entity[1], 'end':entity[2], 'label':entity[0]} for entity in entities_dict.values()] return entities