def load(): entities = [ 'Drug', 'Form', 'Route', 'ADE', 'Reason', 'Frequency', 'Duration', 'Dosage', 'Strength' ] pipeline = ClinicalPipeline(entities=entities) model = Model(pipeline, n_jobs=1) model_directory = resource_filename('medacy_model_clinical_notes', 'model') model.load( os.path.join(model_directory, 'n2c2_2018_no_metamap_2018_12_22_16.49.17.pkl')) return model
def drug_extraction(img): model = Model.load_external('medacy_model_clinical_notes') all_text_as_list = text_from_image(img) all_text = "" for line in all_text_as_list: annotation = model.predict(all_text) print(annotation) all_text += line + " " print(all_text) annotation = model.predict(all_text) keys_del = [] for key in annotation.annotations['entities'].keys(): print(key) if (annotation.annotations['entities'][key][3] not in drugs_dict["drugs"]): keys_del.append(key) for key in keys_del: del annotation.annotations['entities'][key] return annotation
def main(): parser = create_parser() opts, args = parser.parse_args() if len(args) != 1: parser.error("invalid number of arguments") model = Model.load_external('medacy_model_clinical_notes') sqlite_file = args[0] conn = sqlite3.connect(sqlite_file) cursor = conn.cursor() cursor.execute('SELECT ED_ENC_NUM, NOTE_TEXT FROM Documents') for i, (num, raw) in enumerate(cursor): annotation = model.predict(raw) #print (json.dumps(annotation.annotations)) #entities = annotation.entities; conn.execute('UPDATE Documents SET Result=? WHERE ED_ENC_NUM = ?', (json.dumps(annotation.annotations), num)) if i % 20 == 0: conn.commit() conn.commit() conn.close()
def test_prediction_with_testing_pipeline(self): """ Constructs a model that memorizes an entity, predicts it on same file, writes to ann :return: """ pipeline = TestingPipeline(entities=['tradename']) #train on Abelcet.ann model = Model(pipeline, n_jobs=1) model.fit(self.train_dataset) #predict on both model.predict(self.test_dataset, prediction_directory=self.prediction_directory) second_ann_file = "%s.ann" % self.test_dataset.get_data_files( )[1].file_name annotations = Annotations(os.path.join(self.prediction_directory, second_ann_file), annotation_type='ann') self.assertIsInstance(annotations, Annotations)
#entity types entities = ['Reason', 'ADE', 'Drug'] # entities = ['Symptom', 'Form', 'Route', 'Frequency', 'Duration', 'Dosage', 'Strength', 'Drug'] # dirPred = '/home/mahendrand/VE/Predictions/CV/N2C2' training_dataset = Dataset('/home/mahendrand/VE/Data/N2C2/data') #set metamap path metamap = MetaMap( metamap_path="/home/share/programs/metamap/2016/public_mm/bin/metamap", convert_ascii=True) training_dataset.metamap(metamap) # pipeline = ClinicalPipeline(metamap=metamap, entities=entities) pipeline = SystematicReviewPipeline(metamap=metamap, entities=entities) model = Model( pipeline, n_jobs=1 ) #distribute documents between 30 processes during training and prediction model.fit(training_dataset) #cross validation # model.cross_validate(num_folds = 5, training_dataset = training_dataset, prediction_directory=dirPred, groundtruth_directory=True) #location to store the clinical model # model.dump('/home/mahendrand/VE/SMM4H/medaCy/medacy/clinical_model.pickle') #run on a separate testing dataset testing_dataset_END = Dataset('/home/mahendrand/VE/Data/END/drug') # location to store the predictions model.predict(
from medacy.ner.pipelines import SystematicReviewPipeline from medacy.ner.model import Model from medacy.pipeline_components import MetaMap import logging,sys # print logs # logging.basicConfig(stream=sys.stdout,level=logging.DEBUG) #set level=logging.DEBUG for more information #entity types entities = ['CellLine','Dose','DoseDuration', 'DoseDurationUnits', 'DoseFrequency', 'DoseRoute', 'DoseUnits', 'Endpoint','EndpointUnitOfMeasure', 'GroupName', 'GroupSize', 'SampleSize', 'Sex', 'Species', 'Strain', 'TestArticle', 'TestArticlePurity', 'TestArticleVerification', 'TimeAtDose', 'TimeAtFirstDose', 'TimeAtLastDose', 'TimeEndpointAssessed', 'TimeUnits', 'Vehicle' ] # training_dataset, evaluation_dataset, meta_data = Dataset.load_external('medacy_dataset_smm4h_2019') training_dataset = Dataset('/home/mahendrand/VE/TAC/data_TAC') #set metamap path metamap = MetaMap(metamap_path="/home/share/programs/metamap/2016/public_mm/bin/metamap", convert_ascii=True) training_dataset.metamap(metamap) # pipeline = SystematicReviewPipeline(metamap=None, entities=meta_data['entities']) pipeline = SystematicReviewPipeline(metamap=metamap, entities=entities) model = Model(pipeline, n_jobs=1) #distribute documents between 30 processes during training and prediction model.fit(training_dataset) model.cross_validate(num_folds = 5, dataset = training_dataset, write_predictions=True) #location to store the clinical model model.dump('/home/mahendrand/VE/SMM4H/medaCy/medacy/clinical_model.pickle') #location to store the predictions #model.predict(training_dataset, prediction_directory='/home/mahendrand/VE/SMM4H/data_smmh4h/task2/training/dataset/metamap_predictions')
# # training_dataset_1.metamap(metamap) # # model_1 = Model(pipeline, n_jobs=1) # model_1.fit(training_dataset_1) # #run on a separate testing dataset # testing_dataset_1= Dataset('/home/mahendrand/VE/Data/CADEC_END/1/test') # # location to store the predictions # model.predict(testing_dataset_1, prediction_directory='/home/mahendrand/VE/Data/preds/5 fold/CADEC_END') # # # #fold 2 training_dataset_2 = Dataset('/home/mahendrand/VE/Data/CADEC_END/2/train') # training_dataset_2.metamap(metamap) # model_2 = Model(pipeline, n_jobs=1) model_2.fit(training_dataset_2) #run on a separate testing dataset testing_dataset_2 = Dataset('/home/mahendrand/VE/Data/CADEC_END/2/test') # location to store the predictions model.predict( testing_dataset_2, prediction_directory='/home/mahendrand/VE/Data/preds/5 fold/CADEC_END') # # # #fold 3 # training_dataset_3 = Dataset('/home/mahendrand/VE/Data/CADEC_END/3/train') # training_dataset_3.metamap(metamap) # # model_3 = Model(pipeline, n_jobs=1)
#logging.basicConfig(filename=model_directory+'/build_%cd .log' % current_time,level=logging.DEBUG) #set level=logging.DEBUG for more information logging.basicConfig( stream=sys.stdout, level=logging.DEBUG) #set level=logging.DEBUG for more information # entities = ['Form','Route','Frequency', 'Reason', 'Duration', 'Dosage', 'ADE', 'Strength', 'Drug' ] entities = ['Symptom', 'Drug'] # training_dataset, evaluation_dataset, meta_data = Dataset.load_external('medacy_dataset_smm4h_2019') training_dataset = Dataset('/home/mahendrand/VE/Data/N2C2/symptom') #training_dataset.set_data_limit(10) # pipeline = SystematicReviewPipeline(metamap=None, entities=meta_data['entities']) pipeline = ClinicalPipeline(metamap=None, entities=entities) model = Model( pipeline, n_jobs=1 ) #distribute documents between 30 processes during training and prediction # model.fit(training_dataset) model.cross_validate(num_folds=5, training_dataset=training_dataset, prediction_directory=True, groundtruth_directory=True) # model.dump('/home/mahendrand/VE/SMM4H/medaCy/medacy/clinical_model.pickle') # model.predict(training_dataset, prediction_directory='/home/mahendrand/VE/data_smmh4h/task2/training/metamap_predictions') # model.predict(training_dataset) # train_dataset, evaluation_dataset, meta_data = Dataset.load_external('medacy_dataset_smm4h_2019')
def __init__(self, model='medacy_model_clinical_notes'): super().__init__() self.add_detector(self.annotate) self.model = Model.load_external(model)
create_directory(dirTrain) print("Fold : ",i) for item in ann_files_1: shutil.copy(dataset1 + '/' + item, dirTrain) for item in ann_files_2: shutil.copy(dataset2 + '/' + item, dirTrain) for item in txt_files_1: shutil.copy(dataset1 + '/' + item, dirTrain) for item in txt_files_2: shutil.copy(dataset2 + '/' + item, dirTrain) for item in ann_files_1[i * num_files:(i + 1) * num_files]: shutil.copy(dataset1 + '/' + item, dirTest) os.remove(dirTrain + '/' + item) for item in txt_files_1[i * num_files:(i + 1) * num_files]: shutil.copy(dataset1 + '/' + item, dirTest) os.remove(dirTrain + '/' + item) training_dataset = Dataset(dirTrain) training_dataset.metamap(metamap) model = Model(pipeline, n_jobs=1) model.fit(training_dataset) # run on a separate testing dataset testing_dataset = Dataset(dirTest) # location to store the predictions model.predict(testing_dataset, prediction_directory = dirPrediction)