示例#1
0
    def test_prediction_with_testing_pipeline(self):
        """Tests that a model created with the BiLSTM+CRF can be fitted and used to predict"""
        pipeline = LstmSystematicReviewPipeline(
            entities=self.entities,
            word_embeddings=os.path.join(test_dir, 'test_word_embeddings.txt'),
            cuda_device=-1
        )

        model = Model(pipeline)
        model.fit(self.dataset)
        resulting_dataset = model.predict(self.dataset, prediction_directory=self.prediction_directory)
        self.assertIsInstance(resulting_dataset, Dataset)
示例#2
0
    def test_prediction_with_testing_pipeline(self):
        """Tests that a model created with the BiLSTM+CRF can be fitted and used to predict"""
        pipeline = BertPipeline(entities=self.entities, cuda_device=-1)

        pipeline_crf = BertPipeline(entities=self.entities,
                                    cuda_device=-1,
                                    using_crf=True)

        for pipe in [pipeline, pipeline_crf]:
            model = Model(pipe)
            model.fit(self.dataset)
            resulting_dataset = model.predict(
                self.dataset, prediction_directory=self.prediction_directory)
            self.assertIsInstance(resulting_dataset, Dataset)
def drug_extraction(img):
    model = Model.load_external('medacy_model_clinical_notes')
    all_text_as_list = text_from_image(img)
    print(all_text_as_list)
    #print("hi")
    all_text = ""
    for line in all_text_as_list:
        annotation = model.predict(all_text)
        #	print("v")
        print(annotation)
        all_text += line + " "
    print(all_text)
    annotation = model.predict(all_text)
    print(annotation)
    #print(annotation.annotations[2][0])

    keys_del = []
    for key in range(len(annotation.annotations)):

        #	print(key)
        if (annotation.annotations[key][3] not in drugs_dict["drugs"]):
            keys_del.append(key)
    ##for key in keys_del:
    ##	del annotation.annotations[key]
    return annotation
示例#4
0
    def test_fit_with_clinical_pipeline(self):
        """
        Loads in training data and uses it to fit a model using the Clinical Pipeline
        :return:
        """
        train_loader = DataLoader(self.train_dir)
        metamap = MetaMap(
            metamap_path=
            "/home/share/programs/metamap/2016/public_mm/bin/metamap",
            cache_output=False)

        train_loader.metamap(metamap)

        pipeline = ClinicalPipeline(metamap, entities=['Strength'])

        model = Model(pipeline)
        model.fit(train_loader)

        self.assertIsInstance(model, Model)
        self.assertIsNot(model.model, None)
    def test_prediction_with_testing_pipeline(self):
        """
        Constructs a model that memorizes an entity, predicts it on same file, writes to ann
        :return:
        """

        pipeline = TestingPipeline(entities=['tradename'])

        #train on Abelcet.ann
        model = Model(pipeline, n_jobs=1)
        model.fit(self.train_dataset)

        #predict on both
        model.predict(self.test_dataset,
                      prediction_directory=self.prediction_directory)

        second_ann_file = "%s.ann" % self.test_dataset.get_data_files(
        )[1].file_name
        annotations = Annotations(os.path.join(self.prediction_directory,
                                               second_ann_file),
                                  annotation_type='ann')
        print(annotations)
        self.assertIsInstance(annotations, Annotations)
示例#6
0
    def test_prediction_with_clinical_pipeline(self):
        """
        Constructs a model that memorizes an entity, predicts it on same file, writes to ann
        :return:
        """

        train_loader = DataLoader(self.train_dir)
        test_loader = DataLoader(self.test_dir)
        metamap = MetaMap(
            metamap_path=
            "/home/share/programs/metamap/2016/public_mm/bin/metamap",
            cache_output=False)

        train_loader.metamap(metamap)
        test_loader.metamap(metamap)

        pipeline = ClinicalPipeline(metamap, entities=['Strength'])

        model = Model(pipeline)
        model.fit(train_loader)
        model.predict(test_loader)

        with open(self.test_dir + "/predictions/" + "predict_test.ann") as f:
            self.assertEqual(f.read(), "T1	Strength 7 11	5 mg\n")
示例#7
0
logging.basicConfig(
    filename=model_directory + '/build_%s.log' % current_time,
    level=logging.DEBUG)  #set level=logging.DEBUG for more information

#Initialize everything needed for model

#Metamaps the dataset, if it not already, and stores the metamapped files for access in training_dataset. See Dataset API for details.
metamap = MetaMap(
    metamap_path="/home/share/programs/metamap/2016/public_mm/bin/metamap",
    convert_ascii=False)
train_dataset.metamap(metamap, n_jobs=30)

#Selects the pre-processing pipeline this model should be trained with respect to.
pipeline = SystematicReviewPipeline(metamap=metamap, entities=entities)
model = Model(
    pipeline, n_jobs=1
)  #number of cores to utilize during feature extraction when training the model. Note: this is done by forking, not threading hence utlizes a large amount of memory.

#Write information about model before training
with open(model_directory + "/model_information.txt", 'w') as model_info:
    model_info.write("Entities: [%s]\n" % ", ".join(entities))
    model_info.write("Training Files: %i\n" %
                     len(train_dataset.get_data_files()))
    model_info.write(model_notes + "\n")
    model_info.write(str(model))

model.fit(train_dataset)

#dump fitted model
current_time = datetime.datetime.fromtimestamp(
    time.time()).strftime('%Y_%m_%d_%H.%M.%S')
示例#8
0
    level=logging.DEBUG)  #set level=logging.DEBUG for more information

#entity types
entities = ['ADR', 'Indication', 'Drug']

# training_dataset, evaluation_dataset, meta_data = Dataset.load_external('medacy_dataset_smm4h_2019')
training_dataset = Dataset(
    '/home/mahendrand/VE/SMM4H/data_smmh4h/task2/training/dataset')
#path = '../data_smmh4h/task2/training/dataset_1'
#set metamap path
metamap = MetaMap(
    metamap_path="/home/share/programs/metamap/2016/public_mm/bin/metamap",
    convert_ascii=True)
training_dataset.metamap(metamap)

# pipeline = SystematicReviewPipeline(metamap=None, entities=meta_data['entities'])
pipeline = SystematicReviewPipeline(metamap=metamap, entities=entities)
model = Model(
    pipeline, n_jobs=1
)  #distribute documents between 30 processes during training and prediction

model.fit(training_dataset)
model.cross_validate(num_folds=5,
                     dataset=training_dataset,
                     write_predictions=True)

#location to store the clinical model
model.dump('/home/mahendrand/VE/SMM4H/medaCy/medacy/clinical_model.pickle')

#location to store the predictions
#model.predict(training_dataset, prediction_directory='/home/mahendrand/VE/SMM4H/data_smmh4h/task2/training/dataset/metamap_predictions')
from medacy.model import Model

model = Model.load_external('medacy_model_clinical_notes')

def tagMedical(text):
    annotation = model.predict(text)
    return formatResponse(annotation)

def formatResponse(annotation):
    entities_dict = annotation.get_entity_annotations(return_dictionary=True)
    entities = [{'text':entity[3], 'start':entity[1], 'end':entity[2], 'label':entity[0]} for entity in entities_dict.values()]
    return entities