示例#1
0
#Write information about model before training
with open(model_directory + "/model_information.txt", 'w') as model_info:
    model_info.write("Entities: [%s]\n" % ", ".join(entities))
    model_info.write("Training Files: %i\n" %
                     len(train_dataset.get_data_files()))
    model_info.write(model_notes + "\n")
    model_info.write(str(model))

model.fit(train_dataset)

#dump fitted model
current_time = datetime.datetime.fromtimestamp(
    time.time()).strftime('%Y_%m_%d_%H.%M.%S')
model.dump(model_directory + "/tac_2018_%s_%s.pkl" %
           (model_name, current_time))

#If no evaluation data is available, skip this and cross validate the model

#predict using trained model
evaluation_dataset.metamap(
    metamap, n_jobs=30)  #will automatically skip if dataset is pre-metamapped

#predicts over the datasets in evaluation_dataset utilizing the model trained above, then stores those predictions
#in a given output directory
model.predict(evaluation_dataset,
              prediction_directory=os.path.join(model_directory,
                                                'predictions/'))

#performs sequence stratified cross validation over the trained model. Note that all extracted features are stored in memory while this runs.
model.cross_validate(num_folds=10)
示例#2
0
    level=logging.DEBUG)  #set level=logging.DEBUG for more information

#entity types
entities = ['ADR', 'Indication', 'Drug']

# training_dataset, evaluation_dataset, meta_data = Dataset.load_external('medacy_dataset_smm4h_2019')
training_dataset = Dataset(
    '/home/mahendrand/VE/SMM4H/data_smmh4h/task2/training/dataset')
#path = '../data_smmh4h/task2/training/dataset_1'
#set metamap path
metamap = MetaMap(
    metamap_path="/home/share/programs/metamap/2016/public_mm/bin/metamap",
    convert_ascii=True)
training_dataset.metamap(metamap)

# pipeline = SystematicReviewPipeline(metamap=None, entities=meta_data['entities'])
pipeline = SystematicReviewPipeline(metamap=metamap, entities=entities)
model = Model(
    pipeline, n_jobs=1
)  #distribute documents between 30 processes during training and prediction

model.fit(training_dataset)
model.cross_validate(num_folds=5,
                     dataset=training_dataset,
                     write_predictions=True)

#location to store the clinical model
model.dump('/home/mahendrand/VE/SMM4H/medaCy/medacy/clinical_model.pickle')

#location to store the predictions
#model.predict(training_dataset, prediction_directory='/home/mahendrand/VE/SMM4H/data_smmh4h/task2/training/dataset/metamap_predictions')