def train_whole_model(self):
        """
        Train the whole model from scratch with the target data (train + validation data)
        """
        # MODEL BUILDING
        if self.load_model:
            self.timestamps['model_start'] = datetime.datetime.now()
            model = load_model(self.path_pretrained_model)
            self.timestamps['model_end'] = datetime.datetime.now()
            # get the callbacks
            model_none, callbacks = choose_model(params=self.params,
                                                 do_compile=True,
                                                 no_model=True)
        else:
            model, callbacks = self.build_model()
        apply_oversampling = self.params.plx.get('apply_oversampling')

        if self.stage == "training":
            # If in Training stage, separate between training and validation data
            model, history = self.train_model_training(
                model=model,
                callbacks=callbacks,
                apply_oversampling=apply_oversampling)
        else:
            # For Evaluation stage, train the model with the whole training data
            model, history = self.train_model_evaluation(
                model=model,
                callbacks=callbacks,
                apply_oversampling=apply_oversampling)
        print("Training done.")

        # Save the Performances and the trained model
        self.training_history = history  # save for later evaluation
        self.model = model
        model.save(self.path_resulting_model)
    def finetuning_cnn(self):
        # get pretrained model from directory
        if self.winslow:
            model = self.load_multi_gpu_model()
        else:
            model = load_model(self.path_pretrained_model)

        # freeze the last layers, leave the cnn part unfrozen
        for layer in model.layers[-4:]:
            layer.trainable = False

        # check the trainable status of the layers
        for layer in model.layers:
            print(layer, layer.trainable)

        # get the callbacks
        model_none, callbacks = choose_model(params=self.params,
                                             do_compile=True,
                                             no_model=True)
        apply_oversampling = self.params.plx.get('apply_oversampling')

        # set a small learning rate for evaluation and recompile the model
        optimizer = self.get_optimizer()
        model.compile(loss='categorical_crossentropy',
                      optimizer=optimizer,
                      metrics=['accuracy'])

        # train on the target dataset
        if self.stage == "training":
            # If in Training stage, separate between training and validation data
            model, history = self.train_model_training(
                model=model,
                callbacks=callbacks,
                apply_oversampling=apply_oversampling)
        else:
            # For Evaluation stage, train the model with the whole training data
            model, history = self.train_model_evaluation(
                model=model,
                callbacks=callbacks,
                apply_oversampling=apply_oversampling)
        print("Finetuning done.")

        # Save the Performances and the trained model
        self.training_history = history  # save for later evaluation
        self.model = model
        model.save(self.path_resulting_model)
def main():
    """
    This is the main workflow for the ml-algorithm
    """

    # get parameters
    params = Params()

    # get additional parameters for iitnet
    plx: dict = pp3.get_parameters()
    params.plx.update(plx)
    # params.plx['batch_size'] = 250
    params.plx['subject_batch'] = 1  # !
    params.plx['apply_downsampling'] = True     # param common_frequency has to be set
    # NOTE: mdl_architecture has to be set to 'iitnet_cnn_bilstm'

    # adjust winslow parameters
    if 'WINSLOW_PIPELINE_NAME' in os.environ:
        winslow_params(params)

    # Build model
    model, callbacks = choose_model(params)

    # Get data
    data_int = DataInt(save_path=params.plx["save_path"],
                       perform_save_raw=params.plx["save_raw_data"],
                       key_labels=params.plx["key_labels"],
                       uuid=params.plx["experiment_uuid"])

    # Process data, if not already processed
    train_total = params.plx.get('train_count') + params.plx.get('val_count')

    if not params.plx.get("data_already_processed"):
        # Process Data
        process_data(params, data_int, params.plx["data_count"])
    else:
        # recover self.experiment.data_objects_list = List of the subject names
        preprocessed_data_path = params.plx["save_path"] + params.plx["experiment_uuid"]
        pickle_object = params.plx["experiment_uuid"] + ".pckl"
        subject_folders = [name for name in os.listdir(preprocessed_data_path) if not name == pickle_object]

        relevant_subjects = subject_folders[:train_total]
        data_int.experiment.recover_data_objectlist(relevant_subjects)

        print("Data already processed. Recover", str(len(relevant_subjects)), "Subjects from", preprocessed_data_path)

    # Model Training
    print("####\n\n\nTraining###\n\n")
    num_epochs = params.plx.get('epochs')
    apply_oversampling = params.plx.get('apply_oversampling')   # !only on training data

    train_generator = InterIntraEpochGenerator(data_int, params, params.plx.get('train_count'), shuffle=True,
                                               oversampling=apply_oversampling)
    validation_generator = InterIntraEpochGenerator(data_int, params, params.plx.get('val_count'),
                                                    start_val=params.plx['train_count'])

    model.fit_generator(generator=train_generator,
                        epochs=num_epochs,
                        callbacks=callbacks,
                        workers=0,
                        validation_data=validation_generator,
                        use_multiprocessing=False)

    # Model Evaluation
    print("####\n\n\nEvaluation###\n\n")
    evaluation_obj = Eval()
    evaluation_obj.evaluate(params=params,
                            data_int=data_int,
                            model=model)
Exemplo n.º 4
0
def train_iitnet_allsubjects():
    """
    !NOT USED ANYMORE! !RUN ON WINSLOW WITH MORE RAM TO USE >200 SUBJECTS!

    Problem: the model training cant handle more than 200 subjects at a time (less with a bigger model).
    So every epoch, the model is trained on 200 subjects first, saved, reloaded and then
    trained on another 200 subjects, etc.
    """

    # log timestamps of relevant stages
    start_processing = datetime.datetime.now()
    timestamps = {'processstart': start_processing}

    # print used devices
    print("Using GPU:", K.tensorflow_backend._get_available_gpus())

    # get parameters
    params = Params()
    # get additional parameters for iitnet
    plx: dict = pp3.get_parameters()
    params.plx.update(plx)

    # adjust winslow parameters
    if 'WINSLOW_PIPELINE_NAME' in os.environ:
        winslow_params(params)

    params.plx['subject_batch'] = 1  # !
    # NOTE: mdl_architecture has to be set to 'iitnet_cnn_bilstm'

    # define number of training subjects
    train_count = params.plx.get('train_count')
    val_count = params.plx.get('val_count')
    total_count = train_count + val_count

    data_int = DataInt(save_path=params.plx["save_path"],
                       perform_save_raw=params.plx["save_raw_data"],
                       key_labels=params.plx["key_labels"],
                       uuid=params.plx["experiment_uuid"])

    # Process data, if not already processed
    if not params.plx.get("data_already_processed"):
        # Process Data
        process_data(params, data_int, params.plx["data_count"])
    else:
        # recover self.experiment.data_objects_list = List of the subject names
        preprocessed_data_path = params.plx["save_path"] + params.plx[
            "experiment_uuid"]  # "D:/PhysioNet/processed/sa6pr7/"
        pickle_object = params.plx["experiment_uuid"] + ".pckl"
        subject_folders = [
            name for name in os.listdir(preprocessed_data_path)
            if not name == pickle_object
        ]

        relevant_subjects = subject_folders[:total_count]
        data_int.experiment.recover_data_objectlist(relevant_subjects)

        print("Data already processed. Recover", str(len(relevant_subjects)),
              "Subjects from", preprocessed_data_path)

    num_epochs = params.plx.get('epochs')
    apply_oversampling = params.plx.get(
        'apply_oversampling')  # !only on training data

    # build model
    timestamps['modelstart'] = datetime.datetime.now()
    model, callbacks = choose_model(params, do_compile=False)
    timestamps['modelend'] = datetime.datetime.now()
    # save untrained model
    model = compile_model_iitnet(params=params, model=model)
    print("Save untrained model ... ", end=" ")
    model_save_path = params.file_path_raw_mdl
    model.save(model_save_path)
    print("done")

    timestamps_trainingstart = []
    timestamps_trainingend = []
    all_val_accs = []
    all_val_loss = []
    timestamps['crossval_start'] = datetime.datetime.now()

    # split the training data
    total_training_runs = int((train_count // 200) + 1)
    train_per_run = int(train_count // total_training_runs)
    validation_per_run = int(val_count // total_training_runs)

    for training_run in range(total_training_runs):
        # train on max. 200 subjects, evaluate on validation_per_run subjects

        # load the model
        print("Load model ... ", end=" ")
        model = load_model(model_save_path)
        print("done.")

        # set indices
        train_start = training_run * train_per_run
        train_end = train_start + train_per_run
        val_start = train_end

        train_generator = InterIntraEpochGenerator(
            data_int,
            params,
            train_per_run,
            start_val=train_start,
            shuffle=True,
            oversampling=apply_oversampling)
        validation_generator = InterIntraEpochGenerator(data_int,
                                                        params,
                                                        validation_per_run,
                                                        start_val=val_start)

        # model training
        print("####\n\n\nTraining###\n\n")
        timestamps_trainingstart.append(datetime.datetime.now())

        history = model.fit_generator(generator=train_generator,
                                      epochs=num_epochs,
                                      callbacks=callbacks,
                                      workers=0,
                                      validation_data=validation_generator,
                                      use_multiprocessing=False)

        timestamps_trainingend.append(datetime.datetime.now())

        print("Saving model ... ", end=" ")
        model.save(model_save_path)
        print('done.')

    print("Model Training done. Save Performance to Log ... ", end=" ")

    # log the performance
    val_acc_history = history.history[
        'val_accuracy']  # val_accuracy for Winslow, val_acc local
    val_loss_history = history.history['val_loss']

    all_val_accs.append(val_acc_history)
    all_val_loss.append(val_loss_history)
    print("done.")

    print("=======> Logging Performance Evaluation <=======")
    timestamps['crossval_end'] = datetime.datetime.now()
    timestamps['trainstarts'] = timestamps_trainingstart
    timestamps['trainends'] = timestamps_trainingend
    record_performance(all_val_accs, all_val_loss, params, timestamps)
    def build_model(self):
        self.timestamps['model_start'] = datetime.datetime.now()
        model, callbacks = choose_model(self.params, do_compile=True)
        self.timestamps['model_end'] = datetime.datetime.now()

        return model, callbacks
def train_iitnet_crossvalid():
    """
    Train the iitnet using cross validation. Using only training and validation data
    for parameter tuning. Best model will then be evaluated in a separate program.
    """

    # log timestamps of relevant stages
    start_processing = datetime.datetime.now()
    timestamps = {'processstart': start_processing}

    # print used devices
    print("Using GPU:", K.tensorflow_backend._get_available_gpus())

    # get parameters
    params = Params()
    # get additional parameters for iitnet
    plx: dict = pp3.get_parameters()
    params.plx.update(plx)

    # adjust winslow parameters
    if 'WINSLOW_PIPELINE_NAME' in os.environ:
        winslow_params(params)

    params.plx['subject_batch'] = 1  # !
    # NOTE: mdl_architecture has to be set to 'iitnet_cnn_bilstm'

    # set local parameters for the cross validation
    k = params.plx.get('k_crossval')
    train_total = params.plx.get('train_count') + params.plx.get('val_count')
    count_per_fold = train_total // k

    data_int = DataInt(save_path=params.plx["save_path"],
                       perform_save_raw=params.plx["save_raw_data"],
                       key_labels=params.plx["key_labels"],
                       uuid=params.plx["experiment_uuid"])

    # Process data, if not already processed
    if not params.plx.get("data_already_processed"):
        # Process Data
        process_data(params, data_int, params.plx["data_count"])
    else:
        # recover self.experiment.data_objects_list = List of the subject names
        preprocessed_data_path = params.plx["save_path"] + params.plx[
            "experiment_uuid"]  # "D:/PhysioNet/processed/sa6pr7/"
        pickle_object = params.plx["experiment_uuid"] + ".pckl"
        subject_folders = [
            name for name in os.listdir(preprocessed_data_path)
            if not name == pickle_object
        ]

        relevant_subjects = subject_folders[:train_total]
        data_int.experiment.recover_data_objectlist(relevant_subjects)

        print("Data already processed. Recover", str(len(relevant_subjects)),
              "Subjects from", preprocessed_data_path)

    num_epochs = params.plx.get('epochs')
    apply_oversampling = params.plx.get(
        'apply_oversampling')  # !only on training data

    timestamps['modelstart'] = datetime.datetime.now()
    # build model
    model, callbacks = choose_model(params, compile=False)
    timestamps['modelend'] = datetime.datetime.now()
    # save untrained model
    if k > 1:
        print("Save untrained model ... ", end=" ")
        model.save(params.file_path_raw_mdl)
        print("done")

    timestamps_trainingstart = []
    timestamps_trainingend = []
    all_val_accs = []
    all_val_loss = []
    timestamps['crossval_start'] = datetime.datetime.now()

    for i in range(k):
        print("\n=============================================")
        print("=======> Cross Validation - Fold #", i + 1, "<=======")
        print("=============================================")

        # get raw model
        if k > 1:
            print("Load untrained model ... ", end=" ")
            model = load_model(params.file_path_raw_mdl)
            print("done")
        # compile model
        model = compile_model_iitnet(params=params, model=model)

        # set indices for the data to be loaded in this fold
        if k == 1:
            train_start = 0
            train_end = int(train_total * 0.8)
            val_start = train_end
            train_count = train_end
            val_count = train_total - train_count
        else:
            train_start = i * count_per_fold
            train_end = train_start + (count_per_fold * (k - 1))
            if train_end >= train_total:
                train_end -= train_total
            val_start = train_end
            if val_start >= train_total:
                val_start = 0

            # configure the data generators for training and validation
            train_count = train_total - count_per_fold
            val_count = count_per_fold

        train_generator = InterIntraEpochGenerator(
            data_int,
            params,
            train_count,
            start_val=train_start,
            shuffle=True,
            oversampling=apply_oversampling,
            crossval_samples=train_total)
        validation_generator = InterIntraEpochGenerator(
            data_int,
            params,
            val_count,
            start_val=val_start,
            crossval_samples=train_total)

        # model training
        print("####\n\n\nTraining###\n\n")
        timestamps_trainingstart.append(datetime.datetime.now())

        history = model.fit_generator(generator=train_generator,
                                      epochs=num_epochs,
                                      callbacks=callbacks,
                                      workers=0,
                                      validation_data=validation_generator,
                                      use_multiprocessing=False)

        timestamps_trainingend.append(datetime.datetime.now())
        print("Model Training done. Save Performance to Log ... ", end=" ")

        # log the performance of this fold
        val_acc_history = history.history[
            'val_accuracy']  # val_accuracy for Winslow, val_acc local
        val_loss_history = history.history['val_loss']

        all_val_accs.append(val_acc_history)
        all_val_loss.append(val_loss_history)
        print("done.")

    print("=======> Cross Validation - Performance Evaluation <=======")
    timestamps['crossval_end'] = datetime.datetime.now()
    timestamps['trainstarts'] = timestamps_trainingstart
    timestamps['trainends'] = timestamps_trainingend
    train_parameters = count_params(model.trainable_weights)
    record_performance(all_val_accs, all_val_loss, params, timestamps,
                       train_parameters)