def cross_validate(subj_results_dir, label_names=None): # printing the start print_manager('RUNNING CROSS-VALIDATION', 'double-dashed') # getting fold file paths file_paths = CrossValidation.fold_file_paths(subj_results_dir) # getting figures and tables directories figures_dir, tables_dir = \ CrossValidation.get_figures_and_tables_dirs(subj_results_dir) # determining if ml or dl learning_type = CrossValidation.get_learning_type(subj_results_dir) # figures (only if deep learning) if learning_type == 'dl': CrossValidation.figures_manager(file_paths, figures_dir) # tables CrossValidation.tables_manager(file_paths=file_paths, tables_dir=tables_dir, label_names=label_names, learning_type=learning_type) # printing the end print_manager('CROSS-VALIDATION ENDED', 'last', bottom_return=1)
def prepare_for_transfer_learning(self, cross_subj_dir_path, subject_id, train_anyway=False): # printing the start print_manager('PREPARING FOR TRANSFER LEARNING', 'double-dashed') # getting this subject cross-subject dir cross_subj_this_subj_dir_path = join(cross_subj_dir_path, 'subj_cross', my_formatter(subject_id, 'fold')) # loading self.model.load_weights( join(cross_subj_this_subj_dir_path, 'net_best_val_loss.h5')) if train_anyway is False: # pre-saving this net as best one self.model.save(self.h5_model_path) # creating metrics tracker instance self.metrics_tracker = MetricsTracker( dataset=self.dataset, epochs=self.epochs, n_classes=self.n_classes, batch_size=self.batch_size, h5_model_path=self.h5_model_path, fold_stats_path=self.fold_stats_path) # loading cross-subject info with open(join(cross_subj_this_subj_dir_path, 'fold_stats.pickle'), 'rb') as f: results = load(f) # forcing best net to be the 0 one self.metrics_tracker.best['loss'] = results['test']['loss'] self.metrics_tracker.best['acc'] = results['test']['acc'] self.metrics_tracker.best['idx'] = 0 # printing the end print_manager('DONE!!', print_style='last', bottom_return=1)
def dl_loader(data_dir, name_to_start_codes, channel_names, subject_id=1, resampling_freq=None, clean_ival_ms=(0, 4000), epoch_ival_ms=(-500, 4000), train_test_split=True, clean_on_all_channels=True, standardize_mode=0): # loading and pre-processing data cnt, clean_trial_mask = load_and_preprocess_data( data_dir=data_dir, name_to_start_codes=name_to_start_codes, channel_names=channel_names, subject_id=subject_id, resampling_freq=resampling_freq, clean_ival_ms=clean_ival_ms, train_test_split=train_test_split, clean_on_all_channels=clean_on_all_channels, standardize_mode=standardize_mode) print_manager('EPOCHING AND CLEANING WITH MASK', 'double-dashed') # epoching continuous data (from RawArray to SignalAndTarget) print_manager('Epoching...') epo = create_signal_target_from_raw_mne(cnt, name_to_start_codes, epoch_ival_ms) print_manager('DONE!!', bottom_return=1) # cleaning epoched signal with mask print_manager('cleaning with mask...') epo.X = epo.X[clean_trial_mask] epo.y = epo.y[clean_trial_mask] print_manager('DONE!!', 'last', bottom_return=1) # returning only the epoched signal return epo
def freeze_layers(self, layers_to_freeze): print_manager('FREEZING LAYERS', 'double-dashed') if layers_to_freeze == 0: print('NOTHING TO FREEZE!!') else: print("I'm gonna gonna freeze {} layers.".format(layers_to_freeze)) # freezing layers frozen = 0 if layers_to_freeze > 0: idx = 0 step = 1 else: idx = -1 step = -1 layers_to_freeze = -layers_to_freeze while frozen < layers_to_freeze: layer = self.model.layers[idx] if layer.name[:4] == 'conv' or layer.name[:5] == 'dense': layer.trainable = False frozen += 1 idx += step # creating optimizer instance if self.optimizer is 'Adam': opt = optimizers.Adam(lr=self.learning_rate) else: opt = optimizers.Adam(lr=self.learning_rate) # compiling model self.model.compile(loss=self.loss, optimizer=opt, metrics=['accuracy']) # printing model information self.model.summary() print_manager('DONE!!', print_style='last', bottom_return=1)
def on_train_end(self, logs=None): # printing the end of training print_manager('TRAINING ENDED', 'last', bottom_return=1) # printing the start of testing print_manager('RUNNING TESTING', 'double-dashed') # loading best net print('BEST NET found at epoch: {}'.format(self.best['idx'] + 1)) print('Loading best net weights and testing.') self.model.load_weights(self.h5_model_path) # running test test_loss, test_acc = self.model.evaluate(self.dataset.X_test, self.dataset.y_test, verbose=1) print('Test loss:', test_loss) print('Test acc:', test_acc) # making predictions on X_test with final model and getting also # y_test from memory; parsing both back from categorical y_test = self.dataset.y_test.argmax(axis=1) y_pred = self.model.predict(self.dataset.X_test).argmax(axis=1) # computing confusion matrix conf_mtx = confusion_matrix(y_true=y_test, y_pred=y_pred) print("\nConfusion matrix:\n", conf_mtx) # creating results dictionary results = { 'train': { 'loss': self.train['loss'], 'acc': self.train['acc'] }, 'valid': { 'loss': self.valid['loss'], 'acc': self.valid['acc'] }, 'best': { 'loss': self.best['loss'], 'acc': self.best['acc'], 'idx': self.best['idx'] }, 'test': { 'loss': test_loss, 'acc': test_acc, 'conf_mtx': conf_mtx.tolist() } } # dumping and saving with open(self.fold_stats_path, 'wb') as f: dump(results, f) # printing the end print_manager('TESTING ENDED', 'last', bottom_return=1)
def epo_to_dataset(self, leave_subj, parsing_type=0): print_manager('FOLD ALL BUT ' + str(leave_subj), 'double-dashed') print_manager('Creating current fold...') print_manager('DONE!!', bottom_return=1) print_manager('Parsing epoched signal to EEGDataset...') if parsing_type is 0: self.fold_data = CrossValidation.create_dataset_static( self.fold_data, self.folds[leave_subj - 1]) elif parsing_type is 1: self.fold_data = CrossValidation.create_dataset_static( self.data, self.folds[leave_subj - 1]) else: raise ValueError( 'parsing_type {} not supported.'.format(parsing_type)) print_manager('DONE!!', bottom_return=1) print_manager('We obtained a ' + str(self.fold_data)) print_manager('DATA READY!!', 'last', bottom_return=1)
def cnt_to_epo(self, parsing_type): # checking if data is cnt; if not, the method will not work if isinstance(self.data, RawArray): """ WHATS GOING ON HERE? -------------------- If parsing_type is 0, then there will be a 'soft parsing routine', data will parsed and stored in fold_data instead of in the main data property """ if parsing_type == 0: # parsing from cnt to epoch print_manager('Parsing cnt signal to epoched one...') self.fold_data = create_signal_target_from_raw_mne( self.data, self.name_to_start_codes, self.epoch_ival_ms) print_manager('DONE!!', bottom_return=1) # cleaning signal and labels with mask print_manager('Cleaning epoched signal with mask...') self.fold_data.X = self.fold_data.X[self.clean_trial_mask] self.fold_data.y = self.fold_data.y[self.clean_trial_mask] self.fold_subject_labels = \ self.subject_labels[self.clean_trial_mask] print_manager('DONE!!', bottom_return=1) elif parsing_type == 1: """ WHATS GOING ON HERE? -------------------- If parsing_type is 1, then the epoched signal will replace the original one in the data property """ print_manager('Parsing cnt signal to epoched one...') self.data = create_signal_target_from_raw_mne( self.data, self.name_to_start_codes, self.epoch_ival_ms) print_manager('DONE!!', bottom_return=1) # cleaning signal and labels print_manager('Cleaning epoched signal with mask...') self.data.X = self.data.X[self.clean_trial_mask] self.data.y = self.data.y[self.clean_trial_mask] self.subject_labels = \ self.subject_labels[self.clean_trial_mask] print_manager('DONE!!', bottom_return=1) else: raise ValueError( 'parsing_type {} not supported.'.format(parsing_type)) # now that we have an epoched signal, we can already create # folds for cross-subject validation self.create_balanced_folds()
def load_and_preprocess_data(data_dir, name_to_start_codes, channel_names, subject_id=1, resampling_freq=None, clean_ival_ms=(0, 4000), train_test_split=True, clean_on_all_channels=True, standardize_mode=None): # TODO: create here another get_data_files_paths function if you have a # different file configuration; in every case, file_paths must be a # list of paths to valid BBCI standard files # getting data paths file_paths = get_data_files_paths(data_dir, subject_id=subject_id, train_test_split=train_test_split) # starting the loading routine print_manager('DATA LOADING ROUTINE FOR SUBJ ' + str(subject_id), 'double-dashed') print_manager('Loading continuous data...') # pre-allocating main cnt cnt = None # loading files and merging them for idx, current_path in enumerate(file_paths): current_cnt = load_cnt(file_path=current_path, channel_names=channel_names, clean_on_all_channels=clean_on_all_channels) # if the path is the first one... if idx is 0: # ...copying current_cnt as the main one, else... cnt = deepcopy(current_cnt) else: # merging current_cnt with the main one cnt = concatenate_raws_with_events([cnt, current_cnt]) print_manager('DONE!!', bottom_return=1) # getting clean_trial_mask print_manager('Getting clean trial mask...') clean_trial_mask = get_clean_trial_mask( cnt=cnt, name_to_start_codes=name_to_start_codes, clean_ival_ms=clean_ival_ms) print_manager('DONE!!', bottom_return=1) # pick only right channels log.info('Picking only right channels...') cnt = pick_right_channels(cnt, channel_names) print_manager('DONE!!', bottom_return=1) # resample continuous data if resampling_freq is not None: log.info('Resampling continuous data...') cnt = resample_cnt(cnt, resampling_freq) print_manager('DONE!!', bottom_return=1) # standardize continuous data if standardize_mode is not None: log.info('Standardizing continuous data...') log.info('Standardize mode: {}'.format(standardize_mode)) cnt = standardize_cnt(cnt=cnt, standardize_mode=standardize_mode) print_manager('DONE!!', 'last', bottom_return=1) return cnt, clean_trial_mask
def make_crops(self, crop_sample_size=None, crop_step=None): # TODO: validating inputs if crop_sample_size is not None: # printing print_manager('CROPPING ROUTINE', 'double-dashed') # cropping train print_manager('Cropping train...') self.X_train, self.y_train = self.crop_X_y(self.X_train, self.y_train, crop_sample_size, crop_step) print_manager('DONE!!', bottom_return=1) # cropping valid print_manager('Cropping validation...') self.X_valid, self.y_valid = self.crop_X_y(self.X_valid, self.y_valid, crop_sample_size, crop_step) print_manager('DONE!!', bottom_return=1) # cropping test print_manager('Cropping test...') self.X_test, self.y_test = self.crop_X_y(self.X_test, self.y_test, crop_sample_size, crop_step) print_manager('DONE!!', 'last', bottom_return=1)
n_folds=n_folds, fold_size=fold_size, validation_frac=0.1, random_state=random_state, shuffle=True, swap_train_test=swap_train_test) if n_folds is None: cv.balance_train_set(train_size=fold_size) # pre-allocating experiment exp = None # cycling on folds for cross validation for fold_idx, current_fold in enumerate(cv.folds): # clearing TF graph (https://github.com/keras-team/keras/issues/3579) print_manager('CLEARING KERAS BACKEND', print_style='double-dashed') K.clear_session() print_manager(print_style='last', bottom_return=1) # printing fold information print_manager('SUBJECT {}, FOLD {}'.format(subject_id, fold_idx + 1), print_style='double-dashed') cv.print_fold_classes(fold_idx) print_manager(print_style='last', bottom_return=1) # creating EEGDataset for current fold dataset = cv.create_dataset(fold=current_fold) # creating experiment instance exp = DLExperiment( # non-default inputs
def train(self): # saving a model picture # TODO: model_pic.png saving routine # saving a model report with open(self.model_report_path, 'w') as mr: self.model.summary(print_fn=lambda x: mr.write(x + '\n')) # pre-allocating callbacks list callbacks = [] # saving a train report csv = CSVLogger(self.train_report_path) callbacks.append(csv) # saving model each epoch if self.save_model_at_each_epoch: mcp = ModelCheckpoint(self.h5_model_path) callbacks.append(mcp) # else: # mcp = ModelCheckpoint(self.h5_model_path, # monitor='val_loss', # save_best_only=True) # callbacks.append(mcp) # if early_stopping is True... if self.early_stopping is True: # putting epochs to a very large number epochs = 1000 # creating early stopping callback esc = EarlyStopping(monitor=self.monitor, min_delta=self.min_delta, patience=self.patience, verbose=1) callbacks.append(esc) else: # getting user defined epochs value epochs = self.epochs # using fit_generator if a data generator is required if self.data_generator is True: training_generator = EEGDataGenerator( self.dataset.X_train, self.dataset.y_train, self.batch_size, self.n_classes, self.crop_sample_size, self.crop_step) validation_generator = EEGDataGenerator( self.dataset.X_train, self.dataset.y_train, self.batch_size, self.n_classes, self.crop_sample_size, self.crop_step) # training! print_manager( 'RUNNING TRAINING ON FOLD {}'.format(self.fold_idx + 1), 'double-dashed') self.model.fit_generator(generator=training_generator, validation_data=validation_generator, use_multiprocessing=True, workers=self.workers, epochs=epochs, verbose=1, callbacks=callbacks) else: # creating crops self.dataset.make_crops(self.crop_sample_size, self.crop_step) # forcing the x examples to have 4 dimensions self.dataset.add_axis() # parsing y to categorical self.dataset.to_categorical() # TODO: MetricsTracker for Data Generation routine # creating a MetricsTracker instance if self.metrics_tracker is None: callbacks.append( MetricsTracker(dataset=self.dataset, epochs=self.epochs, n_classes=self.n_classes, batch_size=self.batch_size, h5_model_path=self.h5_model_path, fold_stats_path=self.fold_stats_path)) else: callbacks.append(self.metrics_tracker) # training! print_manager( 'RUNNING TRAINING ON FOLD {}'.format(self.fold_idx + 1), 'double-dashed') self.model.fit(x=self.dataset.X_train, y=self.dataset.y_train, validation_data=(self.dataset.X_valid, self.dataset.y_valid), batch_size=self.batch_size, epochs=epochs, verbose=1, callbacks=callbacks, shuffle=self.shuffle)
def __init__( self, # non-default inputs dataset, model_name, results_dir, subj_results_dir, name_to_start_codes, random_state, fold_idx, # hyperparameters dropout_rate=0.5, learning_rate=0.001, batch_size=128, epochs=10, early_stopping=False, monitor='val_acc', min_delta=0.0001, patience=5, loss='categorical_crossentropy', optimizer='Adam', shuffle='False', crop_sample_size=None, crop_step=None, # other parameters subject_id=1, data_generator=False, workers=cpu_count(), save_model_at_each_epoch=False): # non-default inputs self.dataset = dataset self.model_name = model_name self.results_dir = results_dir self.subj_results_dir = subj_results_dir self.datetime_results_dir = dirname(subj_results_dir) self.name_to_start_codes = name_to_start_codes self.random_state = random_state self.fold_idx = fold_idx # hyperparameters self.dropout_rate = dropout_rate self.learning_rate = learning_rate self.batch_size = batch_size self.epochs = epochs self.early_stopping = early_stopping self.monitor = monitor self.min_delta = min_delta self.patience = patience self.loss = loss self.optimizer = optimizer self.shuffle = shuffle if crop_sample_size is None: self.crop_sample_size = self.n_samples self.crop_step = 1 else: self.crop_sample_size = crop_sample_size self.crop_step = crop_step # other parameters self.subject_id = subject_id self.data_generator = data_generator self.workers = workers self.save_model_at_each_epoch = save_model_at_each_epoch self.metrics_tracker = None # managing paths self.dl_results_dir = None self.model_results_dir = None self.fold_results_dir = None self.statistics_dir = None self.figures_dir = None self.tables_dir = None self.model_picture_path = None self.model_report_path = None self.train_report_path = None self.h5_models_dir = None self.h5_model_path = None self.log_path = None self.fold_stats_path = None self.paths_manager() # importing model print_manager('IMPORTING & COMPILING MODEL', 'double-dashed') model_inputs_str = ', '.join([ str(i) for i in [ self.n_classes, self.n_channels, self.crop_sample_size, self.dropout_rate ] ]) expression = 'models.' + self.model_name + '(' + model_inputs_str + ')' self.model = eval(expression) # creating optimizer instance if self.optimizer is 'Adam': opt = optimizers.Adam(lr=self.learning_rate) else: opt = optimizers.Adam(lr=self.learning_rate) # compiling model self.model.compile(loss=self.loss, optimizer=opt, metrics=['accuracy']) self.model.summary() print_manager('DONE!!', print_style='last', bottom_return=1)
def run(self): # printing routine start print_manager( 'INIT TRAINING ROUTINE', 'double-dashed', ) # creating filter bank print_manager('Creating filter bank...') self.create_filter_bank() print_manager('DONE!!', bottom_return=1) # creating folds print_manager('Creating folds...') self.create_folds() print_manager('DONE!!', 'last') # running binary FBCSP print_manager("RUNNING BINARY FBCSP rLDA", 'double-dashed', top_return=1) self.binary_csp = BinaryFBCSP( cnt=self.cnt, clean_trial_mask=self.clean_trial_mask, filterbands=self.filterbands, filt_order=self.filt_order, folds=self.folds, class_pairs=self.class_pairs, epoch_ival_ms=self.epoch_ival_ms, n_filters=self.n_top_bottom_csp_filters, marker_def=self.name_to_start_codes, name_to_stop_codes=self.name_to_stop_codes, average_trial_covariance=self.average_trial_covariance) self.binary_csp.run() # at the very end of the binary CSP experiment, running the real one print_manager("RUNNING FBCSP rLDA", 'double-dashed', top_return=1) self.filterbank_csp = FBCSP( binary_csp=self.binary_csp, n_features=self.n_selected_features, n_filterbands=self.n_selected_filterbands, forward_steps=self.forward_steps, backward_steps=self.backward_steps, stop_when_no_improvement=self.stop_when_no_improvement) self.filterbank_csp.run() # and finally multiclass print_manager("RUNNING MULTICLASS", 'double-dashed', top_return=1) self.multi_class = MultiClassWeightedVoting( train_labels=self.binary_csp.train_labels_full_fold, test_labels=self.binary_csp.test_labels_full_fold, train_preds=self.filterbank_csp.train_pred_full_fold, test_preds=self.filterbank_csp.test_pred_full_fold, class_pairs=self.class_pairs) self.multi_class.run() print('\n')