def train_whole_model(self): """ Train the whole model from scratch with the target data (train + validation data) """ # MODEL BUILDING if self.load_model: self.timestamps['model_start'] = datetime.datetime.now() model = load_model(self.path_pretrained_model) self.timestamps['model_end'] = datetime.datetime.now() # get the callbacks model_none, callbacks = choose_model(params=self.params, do_compile=True, no_model=True) else: model, callbacks = self.build_model() apply_oversampling = self.params.plx.get('apply_oversampling') if self.stage == "training": # If in Training stage, separate between training and validation data model, history = self.train_model_training( model=model, callbacks=callbacks, apply_oversampling=apply_oversampling) else: # For Evaluation stage, train the model with the whole training data model, history = self.train_model_evaluation( model=model, callbacks=callbacks, apply_oversampling=apply_oversampling) print("Training done.") # Save the Performances and the trained model self.training_history = history # save for later evaluation self.model = model model.save(self.path_resulting_model)
def finetuning_cnn(self): # get pretrained model from directory if self.winslow: model = self.load_multi_gpu_model() else: model = load_model(self.path_pretrained_model) # freeze the last layers, leave the cnn part unfrozen for layer in model.layers[-4:]: layer.trainable = False # check the trainable status of the layers for layer in model.layers: print(layer, layer.trainable) # get the callbacks model_none, callbacks = choose_model(params=self.params, do_compile=True, no_model=True) apply_oversampling = self.params.plx.get('apply_oversampling') # set a small learning rate for evaluation and recompile the model optimizer = self.get_optimizer() model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) # train on the target dataset if self.stage == "training": # If in Training stage, separate between training and validation data model, history = self.train_model_training( model=model, callbacks=callbacks, apply_oversampling=apply_oversampling) else: # For Evaluation stage, train the model with the whole training data model, history = self.train_model_evaluation( model=model, callbacks=callbacks, apply_oversampling=apply_oversampling) print("Finetuning done.") # Save the Performances and the trained model self.training_history = history # save for later evaluation self.model = model model.save(self.path_resulting_model)
def main(): """ This is the main workflow for the ml-algorithm """ # get parameters params = Params() # get additional parameters for iitnet plx: dict = pp3.get_parameters() params.plx.update(plx) # params.plx['batch_size'] = 250 params.plx['subject_batch'] = 1 # ! params.plx['apply_downsampling'] = True # param common_frequency has to be set # NOTE: mdl_architecture has to be set to 'iitnet_cnn_bilstm' # adjust winslow parameters if 'WINSLOW_PIPELINE_NAME' in os.environ: winslow_params(params) # Build model model, callbacks = choose_model(params) # Get data data_int = DataInt(save_path=params.plx["save_path"], perform_save_raw=params.plx["save_raw_data"], key_labels=params.plx["key_labels"], uuid=params.plx["experiment_uuid"]) # Process data, if not already processed train_total = params.plx.get('train_count') + params.plx.get('val_count') if not params.plx.get("data_already_processed"): # Process Data process_data(params, data_int, params.plx["data_count"]) else: # recover self.experiment.data_objects_list = List of the subject names preprocessed_data_path = params.plx["save_path"] + params.plx["experiment_uuid"] pickle_object = params.plx["experiment_uuid"] + ".pckl" subject_folders = [name for name in os.listdir(preprocessed_data_path) if not name == pickle_object] relevant_subjects = subject_folders[:train_total] data_int.experiment.recover_data_objectlist(relevant_subjects) print("Data already processed. Recover", str(len(relevant_subjects)), "Subjects from", preprocessed_data_path) # Model Training print("####\n\n\nTraining###\n\n") num_epochs = params.plx.get('epochs') apply_oversampling = params.plx.get('apply_oversampling') # !only on training data train_generator = InterIntraEpochGenerator(data_int, params, params.plx.get('train_count'), shuffle=True, oversampling=apply_oversampling) validation_generator = InterIntraEpochGenerator(data_int, params, params.plx.get('val_count'), start_val=params.plx['train_count']) model.fit_generator(generator=train_generator, epochs=num_epochs, callbacks=callbacks, workers=0, validation_data=validation_generator, use_multiprocessing=False) # Model Evaluation print("####\n\n\nEvaluation###\n\n") evaluation_obj = Eval() evaluation_obj.evaluate(params=params, data_int=data_int, model=model)
def train_iitnet_allsubjects(): """ !NOT USED ANYMORE! !RUN ON WINSLOW WITH MORE RAM TO USE >200 SUBJECTS! Problem: the model training cant handle more than 200 subjects at a time (less with a bigger model). So every epoch, the model is trained on 200 subjects first, saved, reloaded and then trained on another 200 subjects, etc. """ # log timestamps of relevant stages start_processing = datetime.datetime.now() timestamps = {'processstart': start_processing} # print used devices print("Using GPU:", K.tensorflow_backend._get_available_gpus()) # get parameters params = Params() # get additional parameters for iitnet plx: dict = pp3.get_parameters() params.plx.update(plx) # adjust winslow parameters if 'WINSLOW_PIPELINE_NAME' in os.environ: winslow_params(params) params.plx['subject_batch'] = 1 # ! # NOTE: mdl_architecture has to be set to 'iitnet_cnn_bilstm' # define number of training subjects train_count = params.plx.get('train_count') val_count = params.plx.get('val_count') total_count = train_count + val_count data_int = DataInt(save_path=params.plx["save_path"], perform_save_raw=params.plx["save_raw_data"], key_labels=params.plx["key_labels"], uuid=params.plx["experiment_uuid"]) # Process data, if not already processed if not params.plx.get("data_already_processed"): # Process Data process_data(params, data_int, params.plx["data_count"]) else: # recover self.experiment.data_objects_list = List of the subject names preprocessed_data_path = params.plx["save_path"] + params.plx[ "experiment_uuid"] # "D:/PhysioNet/processed/sa6pr7/" pickle_object = params.plx["experiment_uuid"] + ".pckl" subject_folders = [ name for name in os.listdir(preprocessed_data_path) if not name == pickle_object ] relevant_subjects = subject_folders[:total_count] data_int.experiment.recover_data_objectlist(relevant_subjects) print("Data already processed. Recover", str(len(relevant_subjects)), "Subjects from", preprocessed_data_path) num_epochs = params.plx.get('epochs') apply_oversampling = params.plx.get( 'apply_oversampling') # !only on training data # build model timestamps['modelstart'] = datetime.datetime.now() model, callbacks = choose_model(params, do_compile=False) timestamps['modelend'] = datetime.datetime.now() # save untrained model model = compile_model_iitnet(params=params, model=model) print("Save untrained model ... ", end=" ") model_save_path = params.file_path_raw_mdl model.save(model_save_path) print("done") timestamps_trainingstart = [] timestamps_trainingend = [] all_val_accs = [] all_val_loss = [] timestamps['crossval_start'] = datetime.datetime.now() # split the training data total_training_runs = int((train_count // 200) + 1) train_per_run = int(train_count // total_training_runs) validation_per_run = int(val_count // total_training_runs) for training_run in range(total_training_runs): # train on max. 200 subjects, evaluate on validation_per_run subjects # load the model print("Load model ... ", end=" ") model = load_model(model_save_path) print("done.") # set indices train_start = training_run * train_per_run train_end = train_start + train_per_run val_start = train_end train_generator = InterIntraEpochGenerator( data_int, params, train_per_run, start_val=train_start, shuffle=True, oversampling=apply_oversampling) validation_generator = InterIntraEpochGenerator(data_int, params, validation_per_run, start_val=val_start) # model training print("####\n\n\nTraining###\n\n") timestamps_trainingstart.append(datetime.datetime.now()) history = model.fit_generator(generator=train_generator, epochs=num_epochs, callbacks=callbacks, workers=0, validation_data=validation_generator, use_multiprocessing=False) timestamps_trainingend.append(datetime.datetime.now()) print("Saving model ... ", end=" ") model.save(model_save_path) print('done.') print("Model Training done. Save Performance to Log ... ", end=" ") # log the performance val_acc_history = history.history[ 'val_accuracy'] # val_accuracy for Winslow, val_acc local val_loss_history = history.history['val_loss'] all_val_accs.append(val_acc_history) all_val_loss.append(val_loss_history) print("done.") print("=======> Logging Performance Evaluation <=======") timestamps['crossval_end'] = datetime.datetime.now() timestamps['trainstarts'] = timestamps_trainingstart timestamps['trainends'] = timestamps_trainingend record_performance(all_val_accs, all_val_loss, params, timestamps)
def build_model(self): self.timestamps['model_start'] = datetime.datetime.now() model, callbacks = choose_model(self.params, do_compile=True) self.timestamps['model_end'] = datetime.datetime.now() return model, callbacks
def train_iitnet_crossvalid(): """ Train the iitnet using cross validation. Using only training and validation data for parameter tuning. Best model will then be evaluated in a separate program. """ # log timestamps of relevant stages start_processing = datetime.datetime.now() timestamps = {'processstart': start_processing} # print used devices print("Using GPU:", K.tensorflow_backend._get_available_gpus()) # get parameters params = Params() # get additional parameters for iitnet plx: dict = pp3.get_parameters() params.plx.update(plx) # adjust winslow parameters if 'WINSLOW_PIPELINE_NAME' in os.environ: winslow_params(params) params.plx['subject_batch'] = 1 # ! # NOTE: mdl_architecture has to be set to 'iitnet_cnn_bilstm' # set local parameters for the cross validation k = params.plx.get('k_crossval') train_total = params.plx.get('train_count') + params.plx.get('val_count') count_per_fold = train_total // k data_int = DataInt(save_path=params.plx["save_path"], perform_save_raw=params.plx["save_raw_data"], key_labels=params.plx["key_labels"], uuid=params.plx["experiment_uuid"]) # Process data, if not already processed if not params.plx.get("data_already_processed"): # Process Data process_data(params, data_int, params.plx["data_count"]) else: # recover self.experiment.data_objects_list = List of the subject names preprocessed_data_path = params.plx["save_path"] + params.plx[ "experiment_uuid"] # "D:/PhysioNet/processed/sa6pr7/" pickle_object = params.plx["experiment_uuid"] + ".pckl" subject_folders = [ name for name in os.listdir(preprocessed_data_path) if not name == pickle_object ] relevant_subjects = subject_folders[:train_total] data_int.experiment.recover_data_objectlist(relevant_subjects) print("Data already processed. Recover", str(len(relevant_subjects)), "Subjects from", preprocessed_data_path) num_epochs = params.plx.get('epochs') apply_oversampling = params.plx.get( 'apply_oversampling') # !only on training data timestamps['modelstart'] = datetime.datetime.now() # build model model, callbacks = choose_model(params, compile=False) timestamps['modelend'] = datetime.datetime.now() # save untrained model if k > 1: print("Save untrained model ... ", end=" ") model.save(params.file_path_raw_mdl) print("done") timestamps_trainingstart = [] timestamps_trainingend = [] all_val_accs = [] all_val_loss = [] timestamps['crossval_start'] = datetime.datetime.now() for i in range(k): print("\n=============================================") print("=======> Cross Validation - Fold #", i + 1, "<=======") print("=============================================") # get raw model if k > 1: print("Load untrained model ... ", end=" ") model = load_model(params.file_path_raw_mdl) print("done") # compile model model = compile_model_iitnet(params=params, model=model) # set indices for the data to be loaded in this fold if k == 1: train_start = 0 train_end = int(train_total * 0.8) val_start = train_end train_count = train_end val_count = train_total - train_count else: train_start = i * count_per_fold train_end = train_start + (count_per_fold * (k - 1)) if train_end >= train_total: train_end -= train_total val_start = train_end if val_start >= train_total: val_start = 0 # configure the data generators for training and validation train_count = train_total - count_per_fold val_count = count_per_fold train_generator = InterIntraEpochGenerator( data_int, params, train_count, start_val=train_start, shuffle=True, oversampling=apply_oversampling, crossval_samples=train_total) validation_generator = InterIntraEpochGenerator( data_int, params, val_count, start_val=val_start, crossval_samples=train_total) # model training print("####\n\n\nTraining###\n\n") timestamps_trainingstart.append(datetime.datetime.now()) history = model.fit_generator(generator=train_generator, epochs=num_epochs, callbacks=callbacks, workers=0, validation_data=validation_generator, use_multiprocessing=False) timestamps_trainingend.append(datetime.datetime.now()) print("Model Training done. Save Performance to Log ... ", end=" ") # log the performance of this fold val_acc_history = history.history[ 'val_accuracy'] # val_accuracy for Winslow, val_acc local val_loss_history = history.history['val_loss'] all_val_accs.append(val_acc_history) all_val_loss.append(val_loss_history) print("done.") print("=======> Cross Validation - Performance Evaluation <=======") timestamps['crossval_end'] = datetime.datetime.now() timestamps['trainstarts'] = timestamps_trainingstart timestamps['trainends'] = timestamps_trainingend train_parameters = count_params(model.trainable_weights) record_performance(all_val_accs, all_val_loss, params, timestamps, train_parameters)