示例#1
0
    def cross_validate(subj_results_dir, label_names=None):
        # printing the start
        print_manager('RUNNING CROSS-VALIDATION', 'double-dashed')

        # getting fold file paths
        file_paths = CrossValidation.fold_file_paths(subj_results_dir)

        # getting figures and tables directories
        figures_dir, tables_dir = \
            CrossValidation.get_figures_and_tables_dirs(subj_results_dir)

        # determining if ml or dl
        learning_type = CrossValidation.get_learning_type(subj_results_dir)

        # figures (only if deep learning)
        if learning_type == 'dl':
            CrossValidation.figures_manager(file_paths, figures_dir)

        # tables
        CrossValidation.tables_manager(file_paths=file_paths,
                                       tables_dir=tables_dir,
                                       label_names=label_names,
                                       learning_type=learning_type)

        # printing the end
        print_manager('CROSS-VALIDATION ENDED', 'last', bottom_return=1)
示例#2
0
    def prepare_for_transfer_learning(self,
                                      cross_subj_dir_path,
                                      subject_id,
                                      train_anyway=False):
        # printing the start
        print_manager('PREPARING FOR TRANSFER LEARNING', 'double-dashed')

        # getting this subject cross-subject dir
        cross_subj_this_subj_dir_path = join(cross_subj_dir_path, 'subj_cross',
                                             my_formatter(subject_id, 'fold'))

        # loading
        self.model.load_weights(
            join(cross_subj_this_subj_dir_path, 'net_best_val_loss.h5'))

        if train_anyway is False:
            # pre-saving this net as best one
            self.model.save(self.h5_model_path)

            # creating metrics tracker instance
            self.metrics_tracker = MetricsTracker(
                dataset=self.dataset,
                epochs=self.epochs,
                n_classes=self.n_classes,
                batch_size=self.batch_size,
                h5_model_path=self.h5_model_path,
                fold_stats_path=self.fold_stats_path)

            # loading cross-subject info
            with open(join(cross_subj_this_subj_dir_path, 'fold_stats.pickle'),
                      'rb') as f:
                results = load(f)

            # forcing best net to be the 0 one
            self.metrics_tracker.best['loss'] = results['test']['loss']
            self.metrics_tracker.best['acc'] = results['test']['acc']
            self.metrics_tracker.best['idx'] = 0

        # printing the end
        print_manager('DONE!!', print_style='last', bottom_return=1)
示例#3
0
def dl_loader(data_dir,
              name_to_start_codes,
              channel_names,
              subject_id=1,
              resampling_freq=None,
              clean_ival_ms=(0, 4000),
              epoch_ival_ms=(-500, 4000),
              train_test_split=True,
              clean_on_all_channels=True,
              standardize_mode=0):
    # loading and pre-processing data
    cnt, clean_trial_mask = load_and_preprocess_data(
        data_dir=data_dir,
        name_to_start_codes=name_to_start_codes,
        channel_names=channel_names,
        subject_id=subject_id,
        resampling_freq=resampling_freq,
        clean_ival_ms=clean_ival_ms,
        train_test_split=train_test_split,
        clean_on_all_channels=clean_on_all_channels,
        standardize_mode=standardize_mode)
    print_manager('EPOCHING AND CLEANING WITH MASK', 'double-dashed')

    # epoching continuous data (from RawArray to SignalAndTarget)
    print_manager('Epoching...')
    epo = create_signal_target_from_raw_mne(cnt, name_to_start_codes,
                                            epoch_ival_ms)
    print_manager('DONE!!', bottom_return=1)

    # cleaning epoched signal with mask
    print_manager('cleaning with mask...')
    epo.X = epo.X[clean_trial_mask]
    epo.y = epo.y[clean_trial_mask]
    print_manager('DONE!!', 'last', bottom_return=1)

    # returning only the epoched signal
    return epo
示例#4
0
    def freeze_layers(self, layers_to_freeze):
        print_manager('FREEZING LAYERS', 'double-dashed')
        if layers_to_freeze == 0:
            print('NOTHING TO FREEZE!!')
        else:
            print("I'm gonna gonna freeze {} layers.".format(layers_to_freeze))

            # freezing layers
            frozen = 0
            if layers_to_freeze > 0:
                idx = 0
                step = 1
            else:
                idx = -1
                step = -1
                layers_to_freeze = -layers_to_freeze
            while frozen < layers_to_freeze:
                layer = self.model.layers[idx]
                if layer.name[:4] == 'conv' or layer.name[:5] == 'dense':
                    layer.trainable = False
                    frozen += 1
                idx += step

            # creating optimizer instance
            if self.optimizer is 'Adam':
                opt = optimizers.Adam(lr=self.learning_rate)
            else:
                opt = optimizers.Adam(lr=self.learning_rate)

            # compiling model
            self.model.compile(loss=self.loss,
                               optimizer=opt,
                               metrics=['accuracy'])

            # printing model information
            self.model.summary()
        print_manager('DONE!!', print_style='last', bottom_return=1)
示例#5
0
    def on_train_end(self, logs=None):
        # printing the end of training
        print_manager('TRAINING ENDED', 'last', bottom_return=1)

        # printing the start of testing
        print_manager('RUNNING TESTING', 'double-dashed')

        # loading best net
        print('BEST NET found at epoch: {}'.format(self.best['idx'] + 1))
        print('Loading best net weights and testing.')
        self.model.load_weights(self.h5_model_path)

        # running test
        test_loss, test_acc = self.model.evaluate(self.dataset.X_test,
                                                  self.dataset.y_test,
                                                  verbose=1)
        print('Test loss:', test_loss)
        print('Test  acc:', test_acc)

        # making predictions on X_test with final model and getting also
        # y_test from memory; parsing both back from categorical
        y_test = self.dataset.y_test.argmax(axis=1)
        y_pred = self.model.predict(self.dataset.X_test).argmax(axis=1)

        # computing confusion matrix
        conf_mtx = confusion_matrix(y_true=y_test, y_pred=y_pred)
        print("\nConfusion matrix:\n", conf_mtx)

        # creating results dictionary
        results = {
            'train': {
                'loss': self.train['loss'],
                'acc': self.train['acc']
            },
            'valid': {
                'loss': self.valid['loss'],
                'acc': self.valid['acc']
            },
            'best': {
                'loss': self.best['loss'],
                'acc': self.best['acc'],
                'idx': self.best['idx']
            },
            'test': {
                'loss': test_loss,
                'acc': test_acc,
                'conf_mtx': conf_mtx.tolist()
            }
        }

        # dumping and saving
        with open(self.fold_stats_path, 'wb') as f:
            dump(results, f)

        # printing the end
        print_manager('TESTING ENDED', 'last', bottom_return=1)
示例#6
0
    def epo_to_dataset(self, leave_subj, parsing_type=0):
        print_manager('FOLD ALL BUT ' + str(leave_subj), 'double-dashed')
        print_manager('Creating current fold...')

        print_manager('DONE!!', bottom_return=1)
        print_manager('Parsing epoched signal to EEGDataset...')
        if parsing_type is 0:
            self.fold_data = CrossValidation.create_dataset_static(
                self.fold_data, self.folds[leave_subj - 1])
        elif parsing_type is 1:
            self.fold_data = CrossValidation.create_dataset_static(
                self.data, self.folds[leave_subj - 1])
        else:
            raise ValueError(
                'parsing_type {} not supported.'.format(parsing_type))
        print_manager('DONE!!', bottom_return=1)
        print_manager('We obtained a ' + str(self.fold_data))
        print_manager('DATA READY!!', 'last', bottom_return=1)
示例#7
0
    def cnt_to_epo(self, parsing_type):
        # checking if data is cnt; if not, the method will not work
        if isinstance(self.data, RawArray):
            """
            WHATS GOING ON HERE?
            --------------------
            If parsing_type is 0, then there will be a 'soft parsing
            routine', data will parsed and stored in fold_data instead of
            in the main data property
            """
            if parsing_type == 0:
                # parsing from cnt to epoch
                print_manager('Parsing cnt signal to epoched one...')
                self.fold_data = create_signal_target_from_raw_mne(
                    self.data, self.name_to_start_codes, self.epoch_ival_ms)
                print_manager('DONE!!', bottom_return=1)

                # cleaning signal and labels with mask
                print_manager('Cleaning epoched signal with mask...')
                self.fold_data.X = self.fold_data.X[self.clean_trial_mask]
                self.fold_data.y = self.fold_data.y[self.clean_trial_mask]
                self.fold_subject_labels = \
                    self.subject_labels[self.clean_trial_mask]
                print_manager('DONE!!', bottom_return=1)
            elif parsing_type == 1:
                """
                WHATS GOING ON HERE?
                --------------------
                If parsing_type is 1, then the epoched signal will replace 
                the original one in the data property
                """
                print_manager('Parsing cnt signal to epoched one...')
                self.data = create_signal_target_from_raw_mne(
                    self.data, self.name_to_start_codes, self.epoch_ival_ms)
                print_manager('DONE!!', bottom_return=1)

                # cleaning signal and labels
                print_manager('Cleaning epoched signal with mask...')
                self.data.X = self.data.X[self.clean_trial_mask]
                self.data.y = self.data.y[self.clean_trial_mask]
                self.subject_labels = \
                    self.subject_labels[self.clean_trial_mask]
                print_manager('DONE!!', bottom_return=1)
            else:
                raise ValueError(
                    'parsing_type {} not supported.'.format(parsing_type))

            # now that we have an epoched signal, we can already create
            # folds for cross-subject validation
            self.create_balanced_folds()
示例#8
0
def load_and_preprocess_data(data_dir,
                             name_to_start_codes,
                             channel_names,
                             subject_id=1,
                             resampling_freq=None,
                             clean_ival_ms=(0, 4000),
                             train_test_split=True,
                             clean_on_all_channels=True,
                             standardize_mode=None):
    # TODO: create here another get_data_files_paths function if you have a
    #  different file configuration; in every case, file_paths must be a
    #  list of paths to valid BBCI standard files
    # getting data paths
    file_paths = get_data_files_paths(data_dir,
                                      subject_id=subject_id,
                                      train_test_split=train_test_split)

    # starting the loading routine
    print_manager('DATA LOADING ROUTINE FOR SUBJ ' + str(subject_id),
                  'double-dashed')
    print_manager('Loading continuous data...')

    # pre-allocating main cnt
    cnt = None

    # loading files and merging them
    for idx, current_path in enumerate(file_paths):
        current_cnt = load_cnt(file_path=current_path,
                               channel_names=channel_names,
                               clean_on_all_channels=clean_on_all_channels)
        # if the path is the first one...
        if idx is 0:
            # ...copying current_cnt as the main one, else...
            cnt = deepcopy(current_cnt)
        else:
            # merging current_cnt with the main one
            cnt = concatenate_raws_with_events([cnt, current_cnt])
    print_manager('DONE!!', bottom_return=1)

    # getting clean_trial_mask
    print_manager('Getting clean trial mask...')
    clean_trial_mask = get_clean_trial_mask(
        cnt=cnt,
        name_to_start_codes=name_to_start_codes,
        clean_ival_ms=clean_ival_ms)
    print_manager('DONE!!', bottom_return=1)

    # pick only right channels
    log.info('Picking only right channels...')
    cnt = pick_right_channels(cnt, channel_names)
    print_manager('DONE!!', bottom_return=1)

    # resample continuous data
    if resampling_freq is not None:
        log.info('Resampling continuous data...')
        cnt = resample_cnt(cnt, resampling_freq)
        print_manager('DONE!!', bottom_return=1)

    # standardize continuous data
    if standardize_mode is not None:
        log.info('Standardizing continuous data...')
        log.info('Standardize mode: {}'.format(standardize_mode))
        cnt = standardize_cnt(cnt=cnt, standardize_mode=standardize_mode)
        print_manager('DONE!!', 'last', bottom_return=1)

    return cnt, clean_trial_mask
示例#9
0
    def make_crops(self, crop_sample_size=None, crop_step=None):
        # TODO: validating inputs
        if crop_sample_size is not None:
            # printing
            print_manager('CROPPING ROUTINE', 'double-dashed')

            # cropping train
            print_manager('Cropping train...')
            self.X_train, self.y_train = self.crop_X_y(self.X_train,
                                                       self.y_train,
                                                       crop_sample_size,
                                                       crop_step)
            print_manager('DONE!!', bottom_return=1)

            # cropping valid
            print_manager('Cropping validation...')
            self.X_valid, self.y_valid = self.crop_X_y(self.X_valid,
                                                       self.y_valid,
                                                       crop_sample_size,
                                                       crop_step)
            print_manager('DONE!!', bottom_return=1)

            # cropping test
            print_manager('Cropping test...')
            self.X_test, self.y_test = self.crop_X_y(self.X_test, self.y_test,
                                                     crop_sample_size,
                                                     crop_step)
            print_manager('DONE!!', 'last', bottom_return=1)
                         n_folds=n_folds,
                         fold_size=fold_size,
                         validation_frac=0.1,
                         random_state=random_state,
                         shuffle=True,
                         swap_train_test=swap_train_test)
    if n_folds is None:
        cv.balance_train_set(train_size=fold_size)

    # pre-allocating experiment
    exp = None

    # cycling on folds for cross validation
    for fold_idx, current_fold in enumerate(cv.folds):
        # clearing TF graph (https://github.com/keras-team/keras/issues/3579)
        print_manager('CLEARING KERAS BACKEND', print_style='double-dashed')
        K.clear_session()
        print_manager(print_style='last', bottom_return=1)

        # printing fold information
        print_manager('SUBJECT {}, FOLD {}'.format(subject_id, fold_idx + 1),
                      print_style='double-dashed')
        cv.print_fold_classes(fold_idx)
        print_manager(print_style='last', bottom_return=1)

        # creating EEGDataset for current fold
        dataset = cv.create_dataset(fold=current_fold)

        # creating experiment instance
        exp = DLExperiment(
            # non-default inputs
示例#11
0
    def train(self):
        # saving a model picture
        # TODO: model_pic.png saving routine

        # saving a model report
        with open(self.model_report_path, 'w') as mr:
            self.model.summary(print_fn=lambda x: mr.write(x + '\n'))

        # pre-allocating callbacks list
        callbacks = []

        # saving a train report
        csv = CSVLogger(self.train_report_path)
        callbacks.append(csv)

        # saving model each epoch
        if self.save_model_at_each_epoch:
            mcp = ModelCheckpoint(self.h5_model_path)
            callbacks.append(mcp)
        # else:
        # mcp = ModelCheckpoint(self.h5_model_path,
        #                      monitor='val_loss',
        #                      save_best_only=True)
        # callbacks.append(mcp)

        # if early_stopping is True...
        if self.early_stopping is True:
            # putting epochs to a very large number
            epochs = 1000

            # creating early stopping callback
            esc = EarlyStopping(monitor=self.monitor,
                                min_delta=self.min_delta,
                                patience=self.patience,
                                verbose=1)
            callbacks.append(esc)
        else:
            # getting user defined epochs value
            epochs = self.epochs

        # using fit_generator if a data generator is required
        if self.data_generator is True:
            training_generator = EEGDataGenerator(
                self.dataset.X_train, self.dataset.y_train, self.batch_size,
                self.n_classes, self.crop_sample_size, self.crop_step)
            validation_generator = EEGDataGenerator(
                self.dataset.X_train, self.dataset.y_train, self.batch_size,
                self.n_classes, self.crop_sample_size, self.crop_step)

            # training!
            print_manager(
                'RUNNING TRAINING ON FOLD {}'.format(self.fold_idx + 1),
                'double-dashed')
            self.model.fit_generator(generator=training_generator,
                                     validation_data=validation_generator,
                                     use_multiprocessing=True,
                                     workers=self.workers,
                                     epochs=epochs,
                                     verbose=1,
                                     callbacks=callbacks)
        else:
            # creating crops
            self.dataset.make_crops(self.crop_sample_size, self.crop_step)

            # forcing the x examples to have 4 dimensions
            self.dataset.add_axis()

            # parsing y to categorical
            self.dataset.to_categorical()

            # TODO: MetricsTracker for Data Generation routine
            # creating a MetricsTracker instance
            if self.metrics_tracker is None:
                callbacks.append(
                    MetricsTracker(dataset=self.dataset,
                                   epochs=self.epochs,
                                   n_classes=self.n_classes,
                                   batch_size=self.batch_size,
                                   h5_model_path=self.h5_model_path,
                                   fold_stats_path=self.fold_stats_path))
            else:
                callbacks.append(self.metrics_tracker)

            # training!
            print_manager(
                'RUNNING TRAINING ON FOLD {}'.format(self.fold_idx + 1),
                'double-dashed')
            self.model.fit(x=self.dataset.X_train,
                           y=self.dataset.y_train,
                           validation_data=(self.dataset.X_valid,
                                            self.dataset.y_valid),
                           batch_size=self.batch_size,
                           epochs=epochs,
                           verbose=1,
                           callbacks=callbacks,
                           shuffle=self.shuffle)
示例#12
0
    def __init__(
            self,
            # non-default inputs
            dataset,
            model_name,
            results_dir,
            subj_results_dir,
            name_to_start_codes,
            random_state,
            fold_idx,

            # hyperparameters
            dropout_rate=0.5,
            learning_rate=0.001,
            batch_size=128,
            epochs=10,
            early_stopping=False,
            monitor='val_acc',
            min_delta=0.0001,
            patience=5,
            loss='categorical_crossentropy',
            optimizer='Adam',
            shuffle='False',
            crop_sample_size=None,
            crop_step=None,

            # other parameters
            subject_id=1,
            data_generator=False,
            workers=cpu_count(),
            save_model_at_each_epoch=False):
        # non-default inputs
        self.dataset = dataset
        self.model_name = model_name
        self.results_dir = results_dir
        self.subj_results_dir = subj_results_dir
        self.datetime_results_dir = dirname(subj_results_dir)
        self.name_to_start_codes = name_to_start_codes
        self.random_state = random_state
        self.fold_idx = fold_idx

        # hyperparameters
        self.dropout_rate = dropout_rate
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.epochs = epochs
        self.early_stopping = early_stopping
        self.monitor = monitor
        self.min_delta = min_delta
        self.patience = patience
        self.loss = loss
        self.optimizer = optimizer
        self.shuffle = shuffle
        if crop_sample_size is None:
            self.crop_sample_size = self.n_samples
            self.crop_step = 1
        else:
            self.crop_sample_size = crop_sample_size
            self.crop_step = crop_step

        # other parameters
        self.subject_id = subject_id
        self.data_generator = data_generator
        self.workers = workers
        self.save_model_at_each_epoch = save_model_at_each_epoch
        self.metrics_tracker = None

        # managing paths
        self.dl_results_dir = None
        self.model_results_dir = None
        self.fold_results_dir = None
        self.statistics_dir = None
        self.figures_dir = None
        self.tables_dir = None
        self.model_picture_path = None
        self.model_report_path = None
        self.train_report_path = None
        self.h5_models_dir = None
        self.h5_model_path = None
        self.log_path = None
        self.fold_stats_path = None
        self.paths_manager()

        # importing model
        print_manager('IMPORTING & COMPILING MODEL', 'double-dashed')
        model_inputs_str = ', '.join([
            str(i) for i in [
                self.n_classes, self.n_channels, self.crop_sample_size,
                self.dropout_rate
            ]
        ])
        expression = 'models.' + self.model_name + '(' + model_inputs_str + ')'
        self.model = eval(expression)

        # creating optimizer instance
        if self.optimizer is 'Adam':
            opt = optimizers.Adam(lr=self.learning_rate)
        else:
            opt = optimizers.Adam(lr=self.learning_rate)

        # compiling model
        self.model.compile(loss=self.loss, optimizer=opt, metrics=['accuracy'])
        self.model.summary()
        print_manager('DONE!!', print_style='last', bottom_return=1)
示例#13
0
    def run(self):
        # printing routine start
        print_manager(
            'INIT TRAINING ROUTINE',
            'double-dashed',
        )

        # creating filter bank
        print_manager('Creating filter bank...')
        self.create_filter_bank()
        print_manager('DONE!!', bottom_return=1)

        # creating folds
        print_manager('Creating folds...')
        self.create_folds()
        print_manager('DONE!!', 'last')

        # running binary FBCSP
        print_manager("RUNNING BINARY FBCSP rLDA",
                      'double-dashed',
                      top_return=1)
        self.binary_csp = BinaryFBCSP(
            cnt=self.cnt,
            clean_trial_mask=self.clean_trial_mask,
            filterbands=self.filterbands,
            filt_order=self.filt_order,
            folds=self.folds,
            class_pairs=self.class_pairs,
            epoch_ival_ms=self.epoch_ival_ms,
            n_filters=self.n_top_bottom_csp_filters,
            marker_def=self.name_to_start_codes,
            name_to_stop_codes=self.name_to_stop_codes,
            average_trial_covariance=self.average_trial_covariance)
        self.binary_csp.run()

        # at the very end of the binary CSP experiment, running the real one
        print_manager("RUNNING FBCSP rLDA", 'double-dashed', top_return=1)
        self.filterbank_csp = FBCSP(
            binary_csp=self.binary_csp,
            n_features=self.n_selected_features,
            n_filterbands=self.n_selected_filterbands,
            forward_steps=self.forward_steps,
            backward_steps=self.backward_steps,
            stop_when_no_improvement=self.stop_when_no_improvement)
        self.filterbank_csp.run()

        # and finally multiclass
        print_manager("RUNNING MULTICLASS", 'double-dashed', top_return=1)
        self.multi_class = MultiClassWeightedVoting(
            train_labels=self.binary_csp.train_labels_full_fold,
            test_labels=self.binary_csp.test_labels_full_fold,
            train_preds=self.filterbank_csp.train_pred_full_fold,
            test_preds=self.filterbank_csp.test_pred_full_fold,
            class_pairs=self.class_pairs)
        self.multi_class.run()
        print('\n')