def load_bbci_data(filename, low_cut_hz): load_sensor_names = None loader = BBCIDataset(filename, load_sensor_names=load_sensor_names) log.info("Loading data...") cnt = loader.load() # Cleaning: First find all trials that have absolute microvolt values # larger than +- 800 inside them and remember them for removal later log.info("Cutting trials...") marker_def = OrderedDict([('Right Hand', [1]), ('Left Hand', [2],), ('Rest', [3]), ('Feet', [4])]) clean_ival = [0, 4000] set_for_cleaning = create_signal_target_from_raw_mne(cnt, marker_def, clean_ival) clean_trial_mask = np.max(np.abs(set_for_cleaning.X), axis=(1, 2)) < 800 log.info("Clean trials: {:3d} of {:3d} ({:5.1f}%)".format( np.sum(clean_trial_mask), len(set_for_cleaning.X), np.mean(clean_trial_mask) * 100)) # now pick only sensors with C in their name # as they cover motor cortex C_sensors = ['FC5', 'FC1', 'FC2', 'FC6', 'C3', 'C4', 'CP5', 'CP1', 'CP2', 'CP6', 'FC3', 'FCz', 'FC4', 'C5', 'C1', 'C2', 'C6', 'CP3', 'CPz', 'CP4', 'FFC5h', 'FFC3h', 'FFC4h', 'FFC6h', 'FCC5h', 'FCC3h', 'FCC4h', 'FCC6h', 'CCP5h', 'CCP3h', 'CCP4h', 'CCP6h', 'CPP5h', 'CPP3h', 'CPP4h', 'CPP6h', 'FFC1h', 'FFC2h', 'FCC1h', 'FCC2h', 'CCP1h', 'CCP2h', 'CPP1h', 'CPP2h'] cnt = cnt.pick_channels(C_sensors) # Further preprocessings log.info("Resampling...") cnt = resample_cnt(cnt, 250.0) print("REREFERENCING") log.info("Highpassing...") cnt = mne_apply(lambda a: highpass_cnt(a, low_cut_hz, cnt.info['sfreq'], filt_order=3, axis=1),cnt) log.info("Standardizing...") cnt = mne_apply(lambda a: exponential_running_standardize(a.T, factor_new=1e-3,init_block_size=1000,eps=1e-4).T,cnt) # Trial interval, start at -500 already, since improved decoding for networks ival = [-500, 4000] dataset = create_signal_target_from_raw_mne(cnt, marker_def, ival) dataset.X = dataset.X[clean_trial_mask] dataset.y = dataset.y[clean_trial_mask] return dataset.X, dataset.y
def processing_data(data_folder, subject_id, low_cut_hz, high_cut_hz, factor_new, init_block_size, ival, valid_set_fraction): train_filename = 'A{:02d}T.gdf'.format(subject_id) test_filename = 'A{:02d}E.gdf'.format(subject_id) train_filepath = os.path.join(data_folder, train_filename) test_filepath = os.path.join(data_folder, test_filename) train_label_filepath = train_filepath.replace('.gdf', '.mat') test_label_filepath = test_filepath.replace('.gdf', '.mat') train_loader = BCICompetition4Set2A( train_filepath, labels_filename=train_label_filepath) test_loader = BCICompetition4Set2A( test_filepath, labels_filename=test_label_filepath) train_cnt = train_loader.load() test_cnt = test_loader.load() # Preprocessing train_cnt = train_cnt.drop_channels(['STI 014', 'EOG-left', 'EOG-central', 'EOG-right']) assert len(train_cnt.ch_names) == 22 # lets convert to millvolt for numerical stability of next operations train_cnt = mne_apply(lambda a: a * 1e6, train_cnt) train_cnt = mne_apply( lambda a: bandpass_cnt(a, low_cut_hz, high_cut_hz, train_cnt.info['sfreq'], filt_order=3, axis=1), train_cnt) train_cnt = mne_apply( lambda a: exponential_running_standardize(a.T, factor_new=factor_new, init_block_size=init_block_size, eps=1e-4).T, train_cnt) test_cnt = test_cnt.drop_channels(['STI 014', 'EOG-left', 'EOG-central', 'EOG-right']) assert len(test_cnt.ch_names) == 22 test_cnt = mne_apply(lambda a: a * 1e6, test_cnt) test_cnt = mne_apply( lambda a: bandpass_cnt(a, low_cut_hz, high_cut_hz, test_cnt.info['sfreq'], filt_order=3, axis=1), test_cnt) test_cnt = mne_apply( lambda a: exponential_running_standardize(a.T, factor_new=factor_new, init_block_size=init_block_size, eps=1e-4).T, test_cnt) marker_def = OrderedDict([('Left Hand', [1]), ('Right Hand', [2],), ('Foot', [3]), ('Tongue', [4])]) train_set = create_signal_target_from_raw_mne( train_cnt, marker_def, ival) test_set = create_signal_target_from_raw_mne( test_cnt, marker_def, ival) train_set, valid_set = split_into_two_sets( train_set, first_set_fraction=1 - valid_set_fraction) return train_set, valid_set, test_set
def preprocessing(data_folder, subject_id, low_cut_hz): global train_set, test_set, valid_set, n_classes, n_chans global n_iters, input_time_length # def run_exp(data_folder, subject_id, low_cut_hz, model, cuda): train_filename = 'A{:02d}T.gdf'.format(subject_id) test_filename = 'A{:02d}E.gdf'.format(subject_id) train_filepath = os.path.join(data_folder, train_filename) test_filepath = os.path.join(data_folder, test_filename) train_label_filepath = train_filepath.replace('.gdf', '.mat') test_label_filepath = test_filepath.replace('.gdf', '.mat') train_loader = BCICompetition4Set2A( train_filepath, labels_filename=train_label_filepath) test_loader = BCICompetition4Set2A( test_filepath, labels_filename=test_label_filepath) train_cnt = train_loader.load() test_cnt = test_loader.load() train_cnt = train_cnt.drop_channels(['STI 014', 'EOG-left', 'EOG-central', 'EOG-right']) assert len(train_cnt.ch_names) == 22 # lets convert to millvolt for numerical stability of next operations train_cnt = mne_apply(lambda a: a * 1e6, train_cnt) train_cnt = mne_apply( lambda a: bandpass_cnt(a, low_cut_hz, 38, train_cnt.info['sfreq'], filt_order=3, axis=1), train_cnt) train_cnt = mne_apply( lambda a: exponential_running_standardize(a.T, factor_new=1e-3, init_block_size=1000, eps=1e-4).T, train_cnt) test_cnt = test_cnt.drop_channels(['STI 014', 'EOG-left', 'EOG-central', 'EOG-right']) assert len(test_cnt.ch_names) == 22 test_cnt = mne_apply(lambda a: a * 1e6, test_cnt) test_cnt = mne_apply( lambda a: bandpass_cnt(a, low_cut_hz, 38, test_cnt.info['sfreq'], filt_order=3, axis=1), test_cnt) test_cnt = mne_apply( lambda a: exponential_running_standardize(a.T, factor_new=1e-3, init_block_size=1000, eps=1e-4).T, test_cnt) marker_def = OrderedDict([('Left Hand', [1]), ('Right Hand', [2],), ('Foot', [3]), ('Tongue', [4])]) ival = [-500, 4000] train_set = create_signal_target_from_raw_mne(train_cnt, marker_def, ival) test_set = create_signal_target_from_raw_mne(test_cnt, marker_def, ival) train_set, valid_set = split_into_two_sets(train_set, first_set_fraction=0.8) set_random_seeds(seed=20190706, cuda=cuda) n_classes = 4 n_chans = int(train_set.X.shape[1]) input_time_length=1000
def load_bbci_data(filename, low_cut_hz, debug=False): load_sensor_names = None if debug: load_sensor_names = ['C3', 'C4', 'C2'] loader = BBCIDataset(filename, load_sensor_names=load_sensor_names) log.info("Loading data...") cnt = loader.load() log.info("Cutting trials...") marker_def = OrderedDict([('Right Hand', [1]), ( 'Left Hand', [2], ), ('Rest', [3]), ('Feet', [4])]) clean_ival = [0, 4000] set_for_cleaning = create_signal_target_from_raw_mne( cnt, marker_def, clean_ival) clean_trial_mask = np.max(np.abs(set_for_cleaning.X), axis=(1, 2)) < 800 log.info("Clean trials: {:3d} of {:3d} ({:5.1f}%)".format( np.sum(clean_trial_mask), len(set_for_cleaning.X), np.mean(clean_trial_mask) * 100)) # lets convert to millivolt for numerical stability of next operations C_sensors = [ 'FC5', 'FC1', 'FC2', 'FC6', 'C3', 'C4', 'CP5', 'CP1', 'CP2', 'CP6', 'FC3', 'FCz', 'FC4', 'C5', 'C1', 'C2', 'C6', 'CP3', 'CPz', 'CP4', 'FFC5h', 'FFC3h', 'FFC4h', 'FFC6h', 'FCC5h', 'FCC3h', 'FCC4h', 'FCC6h', 'CCP5h', 'CCP3h', 'CCP4h', 'CCP6h', 'CPP5h', 'CPP3h', 'CPP4h', 'CPP6h', 'FFC1h', 'FFC2h', 'FCC1h', 'FCC2h', 'CCP1h', 'CCP2h', 'CPP1h', 'CPP2h' ] if debug: C_sensors = load_sensor_names cnt = cnt.pick_channels(C_sensors) cnt = mne_apply(lambda a: a * 1e6, cnt) log.info("Resampling...") cnt = resample_cnt(cnt, 250.0) log.info("Highpassing...") cnt = mne_apply( lambda a: highpass_cnt( a, low_cut_hz, cnt.info['sfreq'], filt_order=3, axis=1), cnt) log.info("Standardizing...") cnt = mne_apply( lambda a: exponential_running_standardize( a.T, factor_new=1e-3, init_block_size=1000, eps=1e-4).T, cnt) ival = [-500, 4000] dataset = create_signal_target_from_raw_mne(cnt, marker_def, ival) return dataset
def cnt_to_epo(self, parsing_type): # checking if data is cnt; if not, the method will not work if isinstance(self.data, RawArray): """ WHATS GOING ON HERE? -------------------- If parsing_type is 0, then there will be a 'soft parsing routine', data will parsed and stored in fold_data instead of in the main data property """ if parsing_type == 0: # parsing from cnt to epoch print_manager('Parsing cnt signal to epoched one...') self.fold_data = create_signal_target_from_raw_mne( self.data, self.name_to_start_codes, self.epoch_ival_ms) print_manager('DONE!!', bottom_return=1) # cleaning signal and labels with mask print_manager('Cleaning epoched signal with mask...') self.fold_data.X = self.fold_data.X[self.clean_trial_mask] self.fold_data.y = self.fold_data.y[self.clean_trial_mask] self.fold_subject_labels = \ self.subject_labels[self.clean_trial_mask] print_manager('DONE!!', bottom_return=1) elif parsing_type == 1: """ WHATS GOING ON HERE? -------------------- If parsing_type is 1, then the epoched signal will replace the original one in the data property """ print_manager('Parsing cnt signal to epoched one...') self.data = create_signal_target_from_raw_mne( self.data, self.name_to_start_codes, self.epoch_ival_ms) print_manager('DONE!!', bottom_return=1) # cleaning signal and labels print_manager('Cleaning epoched signal with mask...') self.data.X = self.data.X[self.clean_trial_mask] self.data.y = self.data.y[self.clean_trial_mask] self.subject_labels = \ self.subject_labels[self.clean_trial_mask] print_manager('DONE!!', bottom_return=1) else: raise ValueError( 'parsing_type {} not supported.'.format(parsing_type)) # now that we have an epoched signal, we can already create # folds for cross-subject validation self.create_balanced_folds()
def import_EEGData_test(start=0, end=9, dir='../data_HGD/test/'): X, y = [], [] for i in range(start, end): dataFile = str(dir + str(i + 1) + '.mat') print("File:", dataFile, " loading...") cnt = BBCIDataset(filename=dataFile, load_sensor_names=None).load() marker_def = OrderedDict([('Right Hand', [1]), ( 'Left Hand', [2], ), ('Rest', [3]), ('Feet', [4])]) clean_ival = [0, 4000] set_for_cleaning = create_signal_target_from_raw_mne( cnt, marker_def, clean_ival) clean_trial_mask = np.max(np.abs(set_for_cleaning.X), axis=(1, 2)) < 800 C_sensors = [ 'FC5', 'FC1', 'FC2', 'FC6', 'C3', 'C4', 'CP5', 'CP1', 'CP2', 'CP6', 'FC3', 'FCz', 'FC4', 'C5', 'C1', 'C2', 'C6', 'CP3', 'CPz', 'CP4', 'FFC5h', 'FFC3h', 'FFC4h', 'FFC6h', 'FCC5h', 'FCC3h', 'FCC4h', 'FCC6h', 'CCP5h', 'CCP3h', 'CCP4h', 'CCP6h', 'CPP5h', 'CPP3h', 'CPP4h', 'CPP6h', 'FFC1h', 'FFC2h', 'FCC1h', 'FCC2h', 'CCP1h', 'CCP2h', 'CPP1h', 'CPP2h' ] cnt = cnt.pick_channels(C_sensors) cnt = resample_cnt(cnt, 250.0) cnt = mne_apply( lambda a: exponential_running_standardize( a.T, factor_new=1e-3, init_block_size=1000, eps=1e-4).T, cnt) ival = [-500, 4000] dataset = create_signal_target_from_raw_mne(cnt, marker_def, ival) dataset.X = dataset.X[clean_trial_mask] dataset.X = dataset.X[:, :, np.newaxis, :] dataset.y = dataset.y[clean_trial_mask] dataset.y = dataset.y[:, np.newaxis] X.extend(dataset.X) y.extend(dataset.y) X = data_in_one(np.array(X)) y = np.array(y) print("X:", X.shape) print("y:", y.shape) dataset = EEGDataset(X, y) return dataset
def get_data(): import os os.sys.path.append('/home/schirrmr/braindecode/code/braindecode/') from braindecode.datautil.trial_segment import create_signal_target_from_raw_mne from braindecode.datasets.bbci import BBCIDataset from braindecode.mne_ext.signalproc import mne_apply, resample_cnt from braindecode.datautil.signalproc import exponential_running_standardize subject_id = 4 # 1-14 loader = BBCIDataset( '/data/schirrmr/schirrmr/HGD-public/reduced/train/{:d}.mat'.format( subject_id), load_sensor_names=['C3']) cnt = loader.load() cnt = cnt.drop_channels(['STI 014']) from collections import OrderedDict marker_def = OrderedDict([('Right Hand', [1]), ( 'Left Hand', [2], ), ('Rest', [3]), ('Feet', [4])]) # Here you can choose a larger sampling rate later # Right now chosen very small to allow fast initial experiments cnt = resample_cnt(cnt, new_fs=500) cnt = mne_apply( lambda a: exponential_running_standardize( a.T, factor_new=1e-3, init_block_size=1000, eps=1e-4).T, cnt) ival = [0, 2000] # ms to cut trial dataset = create_signal_target_from_raw_mne(cnt, marker_def, ival) return dataset.X, dataset.y
def clean_cnt(cnt, epoch_ival_ms, name_to_start_codes, cleaner): log.info("Cleaning...") epo = create_signal_target_from_raw_mne( cnt, name_to_start_codes=name_to_start_codes, epoch_ival_ms=epoch_ival_ms) clean_result = cleaner.clean(epo.X) markers = extract_all_start_codes(name_to_start_codes) clean_trials = np.setdiff1d(np.arange(len(epo.X)), clean_result.rejected_i_trials) rejected_chan_names = [ cnt.ch_names[i_chan] for i_chan in clean_result.rejected_i_chans ] log.info("Rejected channels: {:s}".format(str(rejected_chan_names))) log.info("#Clean trials: {:d}".format(len(clean_trials))) log.info("#Rejected trials: {:d}".format( len(clean_result.rejected_i_trials))) log.info("Fraction Clean: {:.1f}%".format( 100 * len(clean_trials) / (len(clean_trials) + len(clean_result.rejected_i_trials)))) cleaned_cnt = restrict_cnt( cnt, markers, clean_trials, rejected_chan_names, ) return cleaned_cnt
def create_set(cnt, start_ms, stop_ms): marker_def = OrderedDict( [("Right Hand", [1]), ("Left Hand", [2]), ("Rest", [3]), ("Feet", [4])] ) ival = [start_ms, stop_ms] dataset = create_signal_target_from_raw_mne(cnt, marker_def, ival) return dataset
def clean(self, X, ignore_chans=False): eog_epo = create_signal_target_from_raw_mne( self.eog_cnt, name_to_start_codes=self.name_to_start_codes, epoch_ival_ms=self.epoch_ival_ms) clean_result = MaxMinCleaner(self.threshold).clean(eog_epo.X) return clean_result
def _create_examples(self): name_to_code = OrderedDict([('Right', 1), ('Left', 2), ('Rest', 3), ('Feet', 4)]) data_list_list = [] for file_name in self.file_names: cnt = BBCIDataset(file_name, load_sensor_names=self.load_sensor_names).load() cnt = cnt.drop_channels(['STI 014']) cnt = resample_cnt(cnt, self.sampling_freq) if self.normalization_type == 'exponential': cnt = mne_apply( lambda a: exponential_running_standardize( a.T, init_block_size=1000, factor_new=0.001, eps=1e-4). T, cnt) data = create_signal_target_from_raw_mne(cnt, name_to_code, self.segment_ival_ms) data_list = [(d, l) for d, l in zip(data.X, data.y)] data_list = self.cv_split(data_list) # Normalize the data if self.normalization_type == 'standard': running_statistics = RunningStatistics( dim=data_list[0][0].shape[0], time_dimension_first=False) for data, label in data_list: running_statistics.append(data) mean = running_statistics.mean_vector() sdev = np.clip(np.sqrt(running_statistics.var_vector()), 1e-5, None) logger.info('Normalize with \n mean: %s, \n sdev: %s' % (mean, sdev)) for i in range(len(data_list)): data_list[i] = ((data_list[i][0] - mean) / sdev, data_list[i][1]) data_list_list.append(data_list) # Create examples for 4 classes for i, data_list in enumerate(data_list_list): for label in range(4): class_data_list = [ data for data in data_list if data[1] == label ] self.examples.append([ BBCIDataReaderMulti.ExampleInfo( example_id=str((i, label, j)), random_mode=self.random_mode, offset_size=self.offset_size, label=label, data=data, context=i) for (j, (data, label)) in enumerate(class_data_list) ])
def init_training_vars(self): assert self.n_folds is None, "Cannot use folds on train test split" assert self.restricted_n_trials is None, "Not implemented yet" self.filterbands = generate_filterbank(min_freq=self.min_freq, max_freq=self.max_freq, last_low_freq=self.last_low_freq, low_width=self.low_width, low_overlap=self.low_overlap, high_width=self.high_width, high_overlap=self.high_overlap, low_bound=self.low_bound) assert filterbank_is_stable(self.filterbands, self.filt_order, self.cnt.info['sfreq']), ( "Expect filter bank to be stable given filter order.") # check if number of selected features is not too large if self.n_selected_features is not None: n_spatial_filters = self.n_top_bottom_csp_filters if n_spatial_filters is None: n_spatial_filters = len(self.cnt.ch_names) n_max_features = len(self.filterbands) * n_spatial_filters assert n_max_features >= self.n_selected_features, ( "Cannot select more features than will be originally created " "Originally: {:d}, requested: {:d}".format( n_max_features, self.n_selected_features) ) n_classes = len(self.name_to_start_codes) self.class_pairs = list(itertools.combinations(range(n_classes),2)) train_epo = create_signal_target_from_raw_mne( self.cnt, name_to_start_codes=self.name_to_start_codes, epoch_ival_ms=self.epoch_ival_ms, name_to_stop_codes=self.name_to_stop_codes) n_train_trials = len(train_epo.X) test_epo = create_signal_target_from_raw_mne( self.test_cnt, name_to_start_codes=self.name_to_start_codes, epoch_ival_ms=self.epoch_ival_ms, name_to_stop_codes=self.name_to_stop_codes) n_test_trials = len(test_epo.X) train_fold = np.arange(n_train_trials) test_fold = np.arange(n_train_trials, n_train_trials+n_test_trials) self.folds = [{'train': train_fold, 'test': test_fold}] assert np.intersect1d(self.folds[0]['test'], self.folds[0]['train']).size == 0 # merge cnts!! self.cnt = concatenate_raws_with_events([self.cnt, self.test_cnt])
def init_training_vars(self): self.filterbands = generate_filterbank( min_freq=self.min_freq, max_freq=self.max_freq, last_low_freq=self.last_low_freq, low_width=self.low_width, low_overlap=self.low_overlap, high_width=self.high_width, high_overlap=self.high_overlap, low_bound=self.low_bound) assert filterbank_is_stable( self.filterbands, self.filt_order, self.cnt.info['sfreq']), ( "Expect filter bank to be stable given filter order.") # check if number of selected features is not too large if self.n_selected_features is not None: n_spatial_filters = self.n_top_bottom_csp_filters if n_spatial_filters is None: n_spatial_filters = len(self.cnt.ch_names) n_max_features = len(self.filterbands) * n_spatial_filters assert n_max_features >= self.n_selected_features, ( "Cannot select more features than will be originally created " "Originally: {:d}, requested: {:d}".format( n_max_features, self.n_selected_features) ) n_classes = len(self.name_to_start_codes) self.class_pairs = list(itertools.combinations(range(n_classes),2)) # use only number of clean trials to split folds epo = create_signal_target_from_raw_mne( self.cnt, name_to_start_codes=self.name_to_start_codes, epoch_ival_ms=self.epoch_ival_ms, name_to_stop_codes=self.name_to_stop_codes) n_trials = len(epo.X) if self.restricted_n_trials is not None: if self.restricted_n_trials <= 1: n_trials = int(n_trials * self.restricted_n_trials) else: n_trials = min(n_trials, self.restricted_n_trials) rng = RandomState(903372376) folds = get_balanced_batches(n_trials, rng, self.shuffle, n_batches=self.n_folds) # remap to original indices in unclean set(!) # train is everything except fold # test is fold inds self.folds = [{'train': np.setdiff1d(np.arange(n_trials),fold), 'test': fold} for fold in folds] if self.only_last_fold: self.folds = self.folds[-1:]
def run(self): self.init_results() # TODELAY: split apart collecting of features and training lda? # however then you would not get progress output during training # only at very end for bp_nr, filt_band in enumerate(self.filterbands): self.print_filter(bp_nr) bandpassed_cnt = bandpass_mne(self.cnt, filt_band[0], filt_band[1], filt_order=self.filt_order) epo = create_signal_target_from_raw_mne( bandpassed_cnt, name_to_start_codes=self.marker_def, epoch_ival_ms=self.epoch_ival_ms, name_to_stop_codes=self.name_to_stop_codes) for fold_nr in range(len(self.folds)): self.run_fold(epo, bp_nr, fold_nr)
def get_clean_trial_mask(cnt, name_to_start_codes, clean_ival_ms=(0, 4000)): """ Scan trial in continuous data and create a mask with only the valid ones; in this way, at the and of the loading routine, after all the data pre-processing, you will be able to cut away the original not valid data. """ # split cnt into trials data for cleaning set_for_cleaning = create_signal_target_from_raw_mne( cnt, name_to_start_codes, clean_ival_ms) # compute the clean_trial_mask: in this case we take only all # trials that have absolute microvolt values larger than +- 800 clean_trial_mask = max(abs(set_for_cleaning.X), axis=(1, 2)) < 800 # logging clean trials information log.info('Clean trials: {:3d} of {:3d} ({:5.1f}%)'.format( sum(clean_trial_mask), len(set_for_cleaning.X), mean(clean_trial_mask) * 100)) # return the clean_trial_mask return clean_trial_mask
def dl_loader(data_dir, name_to_start_codes, channel_names, subject_id=1, resampling_freq=None, clean_ival_ms=(0, 4000), epoch_ival_ms=(-500, 4000), train_test_split=True, clean_on_all_channels=True, standardize_mode=0): # loading and pre-processing data cnt, clean_trial_mask = load_and_preprocess_data( data_dir=data_dir, name_to_start_codes=name_to_start_codes, channel_names=channel_names, subject_id=subject_id, resampling_freq=resampling_freq, clean_ival_ms=clean_ival_ms, train_test_split=train_test_split, clean_on_all_channels=clean_on_all_channels, standardize_mode=standardize_mode) print_manager('EPOCHING AND CLEANING WITH MASK', 'double-dashed') # epoching continuous data (from RawArray to SignalAndTarget) print_manager('Epoching...') epo = create_signal_target_from_raw_mne(cnt, name_to_start_codes, epoch_ival_ms) print_manager('DONE!!', bottom_return=1) # cleaning epoched signal with mask print_manager('cleaning with mask...') epo.X = epo.X[clean_trial_mask] epo.y = epo.y[clean_trial_mask] print_manager('DONE!!', 'last', bottom_return=1) # returning only the epoched signal return epo
def process_bbci_data(filename, labels_filename, low_cut_hz): ival = [-500, 4000] high_cut_hz = 38 factor_new = 1e-3 init_block_size = 1000 loader = BCICompetition4Set2A(filename, labels_filename=labels_filename) cnt = loader.load() # Preprocessing cnt = cnt.drop_channels( ['STI 014', 'EOG-left', 'EOG-central', 'EOG-right']) assert len(cnt.ch_names) == 22 # lets convert to millvolt for numerical stability of next operations cnt = mne_apply(lambda a: a * 1e6, cnt) cnt = mne_apply( lambda a: bandpass_cnt(a, low_cut_hz, high_cut_hz, cnt.info['sfreq'], filt_order=3, axis=1), cnt) cnt = mne_apply( lambda a: exponential_running_standardize(a.T, factor_new=factor_new, init_block_size= init_block_size, eps=1e-4).T, cnt) marker_def = OrderedDict([('Left Hand', [1]), ( 'Right Hand', [2], ), ('Foot', [3]), ('Tongue', [4])]) dataset = create_signal_target_from_raw_mne(cnt, marker_def, ival) return dataset
def run_exp(data_folder, subject_id, low_cut_hz, test_model, model_PATH, model, cuda): ival = [-500, 4000] high_cut_hz = 38 factor_new = 1e-3 init_block_size = 1000 test_filename = "A{:02d}E.gdf".format(subject_id) test_filepath = os.path.join(data_folder, test_filename) test_label_filepath = test_filepath.replace(".gdf", ".mat") test_loader = BCICompetition4Set2A( test_filepath, labels_filename=test_label_filepath ) test_cnt = test_loader.load() # Preprocessing test_cnt = test_cnt.drop_channels(["EOG-left", "EOG-central", "EOG-right"]) assert len(test_cnt.ch_names) == 22 test_cnt = mne_apply(lambda a: a * 1e6, test_cnt) test_cnt = mne_apply( lambda a: bandpass_cnt( a, low_cut_hz, high_cut_hz, test_cnt.info["sfreq"], filt_order=3, axis=1, ), test_cnt, ) test_cnt = mne_apply( lambda a: exponential_running_standardize( a.T, factor_new=factor_new, init_block_size=init_block_size, eps=1e-4, ).T, test_cnt, ) marker_def = OrderedDict( [ ("Left Hand", [1]), ("Right Hand", [2]), ("Foot", [3]), ("Tongue", [4]), ] ) test_set = create_signal_target_from_raw_mne(test_cnt, marker_def, ival) test_set = data_all_chan_cwtandraw(test_set) set_random_seeds(seed=20200104, cuda=cuda) model = SelfShallow() if test_model: model.load_state_dict(torch.load(model_PATH)) if cuda: model.cuda() model.eval() log.info("Model: \n{:s}".format(str(model))) all_test_labels = test_set.y all_test_data = torch.from_numpy(test_set.X).cuda() preds,feature1,feature2,raw,guide = model(all_test_data) preds = preds.cpu() preds = preds.detach().numpy() all_preds = np.argmax(preds, axis=1).squeeze() accury = np.mean(all_test_labels==all_preds) print('accury:',accury)
cnt[iSubject] = resample_cnt(cnt[iSubject], resampleToHz) # mne apply will apply the function to the data (a 2d-numpy-array) # have to transpose data back and forth, since # exponential_running_standardize expects time x chans order # while mne object has chans x time order cnt[iSubject] = mne_apply( lambda a: exponential_running_standardize( a.T, init_block_size=1000, factor_new=0.001, eps=1e-4).T, cnt[iSubject]) name_to_start_codes = OrderedDict([('ScoreExp', 1)]) name_to_stop_codes = OrderedDict([('ScoreExp', 2)]) train_sets.append( create_signal_target_from_raw_mne(cnt[iSubject], name_to_start_codes, [0, 0], name_to_stop_codes)) train_sets[iSubject].y = Score[:, :-1] cutInd = int( np.size(train_sets[iSubject].y) - nSecondsTestSet * samplingRate) # use last nSecondsTestSet as test set test_sets.append(deepcopy(train_sets[iSubject])) test_sets[iSubject].X[0] = np.array( np.float32(test_sets[iSubject].X[0][:, cutInd:])) test_sets[iSubject].y = np.float32(test_sets[iSubject].y[:, cutInd:]) train_sets[iSubject].X[0] = np.array( np.float32(train_sets[iSubject].X[0][:, :cutInd])) train_sets[iSubject].y = np.float32(train_sets[iSubject].y[:, :cutInd])
def run_exp(data_folder, subject_id, low_cut_hz, model, cuda): train_filename = 'A{:02d}T.gdf'.format(subject_id) test_filename = 'A{:02d}E.gdf'.format(subject_id) train_filepath = os.path.join(data_folder, train_filename) test_filepath = os.path.join(data_folder, test_filename) train_label_filepath = train_filepath.replace('.gdf', '.mat') test_label_filepath = test_filepath.replace('.gdf', '.mat') train_loader = BCICompetition4Set2A(train_filepath, labels_filename=train_label_filepath) test_loader = BCICompetition4Set2A(test_filepath, labels_filename=test_label_filepath) train_cnt = train_loader.load() test_cnt = test_loader.load() # Preprocessing train_cnt = train_cnt.drop_channels( ['STI 014', 'EOG-left', 'EOG-central', 'EOG-right']) assert len(train_cnt.ch_names) == 22 # lets convert to millvolt for numerical stability of next operations train_cnt = mne_apply(lambda a: a * 1e6, train_cnt) train_cnt = mne_apply( lambda a: bandpass_cnt( a, low_cut_hz, 38, train_cnt.info['sfreq'], filt_order=3, axis=1), train_cnt) train_cnt = mne_apply( lambda a: exponential_running_standardize( a.T, factor_new=1e-3, init_block_size=1000, eps=1e-4).T, train_cnt) test_cnt = test_cnt.drop_channels( ['STI 014', 'EOG-left', 'EOG-central', 'EOG-right']) assert len(test_cnt.ch_names) == 22 test_cnt = mne_apply(lambda a: a * 1e6, test_cnt) test_cnt = mne_apply( lambda a: bandpass_cnt( a, low_cut_hz, 38, test_cnt.info['sfreq'], filt_order=3, axis=1), test_cnt) test_cnt = mne_apply( lambda a: exponential_running_standardize( a.T, factor_new=1e-3, init_block_size=1000, eps=1e-4).T, test_cnt) marker_def = OrderedDict([('Left Hand', [1]), ( 'Right Hand', [2], ), ('Foot', [3]), ('Tongue', [4])]) ival = [-500, 4000] train_set = create_signal_target_from_raw_mne(train_cnt, marker_def, ival) test_set = create_signal_target_from_raw_mne(test_cnt, marker_def, ival) train_set, valid_set = split_into_two_sets(train_set, first_set_fraction=0.8) set_random_seeds(seed=20190706, cuda=cuda) n_classes = 4 n_chans = int(train_set.X.shape[1]) input_time_length = train_set.X.shape[2] if model == 'shallow': model = ShallowFBCSPNet(n_chans, n_classes, input_time_length=input_time_length, final_conv_length='auto').create_network() elif model == 'deep': model = Deep4Net(n_chans, n_classes, input_time_length=input_time_length, final_conv_length='auto').create_network() if cuda: model.cuda() log.info("Model: \n{:s}".format(str(model))) optimizer = optim.Adam(model.parameters()) iterator = BalancedBatchSizeIterator(batch_size=60) stop_criterion = Or([MaxEpochs(1600), NoDecrease('valid_misclass', 160)]) monitors = [LossMonitor(), MisclassMonitor(), RuntimeMonitor()] model_constraint = MaxNormDefaultConstraint() exp = Experiment(model, train_set, valid_set, test_set, iterator=iterator, loss_function=F.nll_loss, optimizer=optimizer, model_constraint=model_constraint, monitors=monitors, stop_criterion=stop_criterion, remember_best_column='valid_misclass', run_after_early_stop=True, cuda=cuda) exp.run() return exp
def data_gen(subject, high_cut_hz=38, low_cut_hz=0): data_sub = {} for i in range(len(subject)): subject_id = subject[i] data_folder = r'D:\li\=.=\eeg\hw\nn-STFT\dataset\BCICIV_2a_gdf' ival = [-500, 4000] factor_new = 1e-3 init_block_size = 1000 train_filename = 'A{:02d}T.gdf'.format(subject_id) test_filename = 'A{:02d}E.gdf'.format(subject_id) train_filepath = os.path.join(data_folder, train_filename) test_filepath = os.path.join(data_folder, test_filename) train_label_filepath = train_filepath.replace('.gdf', '.mat') test_label_filepath = test_filepath.replace('.gdf', '.mat') train_loader = BCICompetition4Set2A( train_filepath, labels_filename=train_label_filepath) test_loader = BCICompetition4Set2A(test_filepath, labels_filename=test_label_filepath) train_cnt = train_loader.load() test_cnt = test_loader.load() # train_loader = BCICompetition4Set2A( # train_filepath, labels_filename=train_label_filepath) # test_loader = BCICompetition4Set2A( # test_filepath, labels_filename=test_label_filepath) # train_cnt = train_loader.load() # test_cnt = test_loader.load() # train set process train_cnt = train_cnt.drop_channels( ['EOG-left', 'EOG-central', 'EOG-right']) assert len(train_cnt.ch_names) == 22 train_cnt = mne_apply(lambda a: a * 1e6, train_cnt) train_cnt = mne_apply( lambda a: bandpass_cnt(a, low_cut_hz, high_cut_hz, train_cnt.info['sfreq'], filt_order=3, axis=1), train_cnt) train_cnt = mne_apply( lambda a: exponential_running_standardize(a.T, factor_new=factor_new, init_block_size= init_block_size, eps=1e-4).T, train_cnt) # test set process test_cnt = test_cnt.drop_channels( ['EOG-left', 'EOG-central', 'EOG-right']) assert len(test_cnt.ch_names) == 22 test_cnt = mne_apply(lambda a: a * 1e6, test_cnt) test_cnt = mne_apply( lambda a: bandpass_cnt(a, low_cut_hz, high_cut_hz, test_cnt.info['sfreq'], filt_order=3, axis=1), test_cnt) test_cnt = mne_apply( lambda a: exponential_running_standardize(a.T, factor_new=factor_new, init_block_size= init_block_size, eps=1e-4).T, test_cnt) marker_def = OrderedDict([('Left Hand', [1]), ( 'Right Hand', [2], ), ('Foot', [3]), ('Tongue', [4])]) train_set = create_signal_target_from_raw_mne(train_cnt, marker_def, ival) test_set = create_signal_target_from_raw_mne(test_cnt, marker_def, ival) data_sub[str(subject_id)] = concatenate_sets([train_set, test_set]) if i == 0: dataset = data_sub[str(subject_id)] else: dataset = concatenate_sets([dataset, data_sub[str(subject_id)]]) assert len(data_sub) == len(subject) return dataset
def get_bci_iv_2a_train_val_test(data_folder, subject_id, low_cut_hz): ival = [ -500, 4000 ] # this is the window around the event from which we will take data to feed to the classifier high_cut_hz = 38 # cut off parts of signal higher than 38 hz factor_new = 1e-3 # ??? has to do with exponential running standardize init_block_size = 1000 # ??? train_filename = 'A{:02d}T.gdf'.format(subject_id) test_filename = 'A{:02d}E.gdf'.format(subject_id) train_filepath = os.path.join(data_folder, train_filename) test_filepath = os.path.join(data_folder, test_filename) train_label_filepath = train_filepath.replace('.gdf', '.mat') test_label_filepath = test_filepath.replace('.gdf', '.mat') train_loader = BCICompetition4Set2A(train_filepath, labels_filename=train_label_filepath) test_loader = BCICompetition4Set2A(test_filepath, labels_filename=test_label_filepath) train_cnt = train_loader.load() test_cnt = test_loader.load() train_cnt = train_cnt.drop_channels( ['EOG-left', 'EOG-central', 'EOG-right']) if len(train_cnt.ch_names) > 22: train_cnt = train_cnt.drop_channels(['STI 014']) assert len(train_cnt.ch_names) == 22 # convert measurements to millivolt train_cnt = mne_apply(lambda a: a * 1e6, train_cnt) train_cnt = mne_apply( # signal processing procedure that I don't understand lambda a: bandpass_cnt(a, low_cut_hz, high_cut_hz, train_cnt.info['sfreq'], filt_order=3, axis=1), train_cnt) train_cnt = mne_apply( # signal processing procedure that I don't understand lambda a: exponential_running_standardize(a.T, factor_new=factor_new, init_block_size= init_block_size, eps=1e-4).T, train_cnt) test_cnt = test_cnt.drop_channels(['EOG-left', 'EOG-central', 'EOG-right']) if len(test_cnt.ch_names) > 22: test_cnt = test_cnt.drop_channels(['STI 014']) assert len(test_cnt.ch_names) == 22 # convert measurements to millivolt test_cnt = mne_apply(lambda a: a * 1e6, test_cnt) test_cnt = mne_apply( lambda a: bandpass_cnt(a, low_cut_hz, high_cut_hz, test_cnt.info['sfreq'], filt_order=3, axis=1), test_cnt) test_cnt = mne_apply( lambda a: exponential_running_standardize(a.T, factor_new=factor_new, init_block_size= init_block_size, eps=1e-4).T, test_cnt) marker_def = OrderedDict([('Left Hand', [1]), ( 'Right Hand', [2], ), ('Foot', [3]), ('Tongue', [4])]) train_set = create_signal_target_from_raw_mne(train_cnt, marker_def, ival) test_set = create_signal_target_from_raw_mne(test_cnt, marker_def, ival) train_set, valid_set = split_into_two_sets( train_set, first_set_fraction=1 - global_vars.get('valid_set_fraction')) return train_set, valid_set, test_set
a, low_cut_hz, high_cut_hz, test_cnt.info["sfreq"], filt_order=3, axis=1 ), test_cnt, ) test_cnt = mne_apply( lambda a: exponential_running_standardize( a.T, factor_new=factor_new, init_block_size=init_block_size, eps=1e-4 ).T, test_cnt, ) marker_def = OrderedDict( [("Left Hand", [1]), ("Right Hand", [2]), ("Foot", [3]), ("Tongue", [4])] ) train_set = create_signal_target_from_raw_mne(train_cnt, marker_def, ival) test_set = create_signal_target_from_raw_mne(test_cnt, marker_def, ival) train_set, valid_set = split_into_two_sets( train_set, first_set_fraction=1 - valid_set_fraction ) logging.basicConfig( format="%(asctime)s %(levelname)s : %(message)s", level=logging.INFO, stream=sys.stdout, ) # Set if you want to use GPU # You can also use torch.cuda.is_available() to determine if cuda is available on your machine.
def run_exp(data_folder, subject_id, low_cut_hz, model, cuda): ival = [-500, 4000] input_time_length = 1000 max_epochs = 800 max_increase_epochs = 80 batch_size = 60 high_cut_hz = 38 factor_new = 1e-3 init_block_size = 1000 valid_set_fraction = 0.2 train_filename = 'A{:02d}T.gdf'.format(subject_id) test_filename = 'A{:02d}E.gdf'.format(subject_id) train_filepath = os.path.join(data_folder, train_filename) test_filepath = os.path.join(data_folder, test_filename) train_label_filepath = train_filepath.replace('.gdf', '.mat') test_label_filepath = test_filepath.replace('.gdf', '.mat') train_loader = BCICompetition4Set2A(train_filepath, labels_filename=train_label_filepath) test_loader = BCICompetition4Set2A(test_filepath, labels_filename=test_label_filepath) train_cnt = train_loader.load() test_cnt = test_loader.load() # Preprocessing train_cnt = train_cnt.drop_channels( ['STI 014', 'EOG-left', 'EOG-central', 'EOG-right']) assert len(train_cnt.ch_names) == 22 # lets convert to millvolt for numerical stability of next operations train_cnt = mne_apply(lambda a: a * 1e6, train_cnt) train_cnt = mne_apply( lambda a: bandpass_cnt(a, low_cut_hz, high_cut_hz, train_cnt.info['sfreq'], filt_order=3, axis=1), train_cnt) train_cnt = mne_apply( lambda a: exponential_running_standardize(a.T, factor_new=factor_new, init_block_size= init_block_size, eps=1e-4).T, train_cnt) test_cnt = test_cnt.drop_channels( ['STI 014', 'EOG-left', 'EOG-central', 'EOG-right']) assert len(test_cnt.ch_names) == 22 test_cnt = mne_apply(lambda a: a * 1e6, test_cnt) test_cnt = mne_apply( lambda a: bandpass_cnt(a, low_cut_hz, high_cut_hz, test_cnt.info['sfreq'], filt_order=3, axis=1), test_cnt) test_cnt = mne_apply( lambda a: exponential_running_standardize(a.T, factor_new=factor_new, init_block_size= init_block_size, eps=1e-4).T, test_cnt) marker_def = OrderedDict([('Left Hand', [1]), ( 'Right Hand', [2], ), ('Foot', [3]), ('Tongue', [4])]) train_set = create_signal_target_from_raw_mne(train_cnt, marker_def, ival) test_set = create_signal_target_from_raw_mne(test_cnt, marker_def, ival) train_set, valid_set = split_into_two_sets(train_set, first_set_fraction=1 - valid_set_fraction) set_random_seeds(seed=20190706, cuda=cuda) n_classes = 4 n_chans = int(train_set.X.shape[1]) if model == 'shallow': model = ShallowFBCSPNet(n_chans, n_classes, input_time_length=input_time_length, final_conv_length=30).create_network() elif model == 'deep': model = Deep4Net(n_chans, n_classes, input_time_length=input_time_length, final_conv_length=2).create_network() to_dense_prediction_model(model) if cuda: model.cuda() log.info("Model: \n{:s}".format(str(model))) dummy_input = np_to_var(train_set.X[:1, :, :, None]) if cuda: dummy_input = dummy_input.cuda() out = model(dummy_input) n_preds_per_input = out.cpu().data.numpy().shape[2] optimizer = optim.Adam(model.parameters()) iterator = CropsFromTrialsIterator(batch_size=batch_size, input_time_length=input_time_length, n_preds_per_input=n_preds_per_input) stop_criterion = Or([ MaxEpochs(max_epochs), NoDecrease('valid_misclass', max_increase_epochs) ]) monitors = [ LossMonitor(), MisclassMonitor(col_suffix='sample_misclass'), CroppedTrialMisclassMonitor(input_time_length=input_time_length), RuntimeMonitor() ] model_constraint = MaxNormDefaultConstraint() loss_function = lambda preds, targets: F.nll_loss( th.mean(preds, dim=2, keepdim=False), targets) exp = Experiment(model, train_set, valid_set, test_set, iterator=iterator, loss_function=loss_function, optimizer=optimizer, model_constraint=model_constraint, monitors=monitors, stop_criterion=stop_criterion, remember_best_column='valid_misclass', run_after_early_stop=True, cuda=cuda) exp.run() return exp
def concat_prepare_cnn(input_signal): # In this particular data set # it was required by the author of it, # that for preventing the algorithm # to pick on data of the eye movement, # a high band filter of Hz had to # be implimented. low_cut_hz = 1 # The authors prove both configuration >38 an 38< frequenzy # in the current experiment, we see that band pass filter will take # Theta to part of Gamma frequenzy band # whihch what Filter Bank Commun spatial patters would do. # This value is a hiperpartemer that should be ajusted # per data set... In my opinion. high_cut_hz = 40 # factor for exponential smothing # are this numbers usually setup used # on neuro sciencie? factor_new = 1e-3 # initianlization values for the the mean and variance, # see prior discussion init_block_size = 1000 # model = "shallow" #'shallow' or 'deep' # GPU support # cuda = True # It was stated in the paper [1] that # "trial window for later experiments with convolutional # networks, that is, from 0.5 to 4 s." # 0- 20s? # so "ival" variable simple states what milisecond interval to analize # per trial. ival = [0, 20000] # An epoch increase every time the whole training data point # had been input to the network. An epoch is not a batch # example, if we have 100 training data points # and we use batch_size 10, it will take 10 iterations of # batch_size to reach 1 epoch. # max_epochs = 1600 # max_increase_epochs = 160 # 60 data point per forward-backwards propagation # batch_size = 60 # pertecentage of data to be used as test-set valid_set_fraction = 0.2 gdf_events = mne.find_events(input_signal) input_signal = input_signal.drop_channels(["stim"]) raw_training_signal = input_signal.get_data() print("data shape:", raw_training_signal.shape) for i_chan in range(raw_training_signal.shape[0]): # first set to nan, than replace nans by nanmean. this_chan = raw_training_signal[i_chan] raw_training_signal[i_chan] = np.where(this_chan == np.min(this_chan), np.nan, this_chan) mask = np.isnan(raw_training_signal[i_chan]) chan_mean = np.nanmean(raw_training_signal[i_chan]) raw_training_signal[i_chan, mask] = chan_mean # Reconstruct input_signal = mne.io.RawArray(raw_training_signal, input_signal.info, verbose="WARNING") # append the extracted events # raw_gdf_training_signal # raw_gdf_training_signal input_signal.info["events"] = gdf_events train_cnt = input_signal # lets convert to millvolt for numerical stability of next operations train_cnt = mne_apply(lambda a: a * 1e6, train_cnt) train_cnt = mne_apply( lambda a: bandpass_cnt( a, low_cut_hz, high_cut_hz, train_cnt.info["sfreq"], filt_order=3, axis=1, ), train_cnt, ) train_cnt = mne_apply( lambda a: exponential_running_standardize( a.T, factor_new=factor_new, init_block_size=init_block_size, eps=1e-4, ).T, train_cnt, ) marker_def = OrderedDict([("ec", [30])]) train_set = create_signal_target_from_raw_mne(train_cnt, marker_def, ival) return train_set
def run(self): # %% CYCLING ON FILTERS IN FILTERBANK # %% # just for me: enumerate is a really powerful built-in python # function that allows you to loop over something and have an # automatic counter. In this case, bp_nr is the counter, # then filt_band is the default exit for the method getitem for # filterbands class. for bp_nr, filt_band in enumerate(self.filterbands): # printing filter information self.print_filter(bp_nr) # bandpassing all the cnt RawArray with the current filter bandpassed_cnt = bandpass_mne(self.cnt, filt_band[0], filt_band[1], filt_order=self.filt_order) # epoching: from cnt data to epoched data epo = create_signal_target_from_raw_mne( bandpassed_cnt, name_to_start_codes=self.marker_def, epoch_ival_ms=self.epoch_ival_ms, name_to_stop_codes=self.name_to_stop_codes) # cleaning epoched data with clean_trial_mask (finally) if len(self.folds) != 1: epo.X = epo.X[self.clean_trial_mask] epo.y = epo.y[self.clean_trial_mask] # %% CYCLING ON FOLDS # %% for fold_nr in range(len(self.folds)): # printing fold information self.print_fold_nr(fold_nr) # getting information on current fold train_test = self.folds[fold_nr] # getting train and test indexes train_ind = train_test['train'] test_ind = train_test['test'] # getting train data from train indexes epo_train = select_trials(epo, train_ind) # getting test data from test indexes epo_test = select_trials(epo, test_ind) # logging info on train log.info("#Train trials: {:4d}".format(len(epo_train.X))) # logging info on test log.info("#Test trials : {:4d}".format(len(epo_test.X))) # setting train labels of this fold self.train_labels_full_fold[fold_nr] = epo_train.y # setting test labels of this fold self.test_labels_full_fold[fold_nr] = epo_test.y # %% CYCLING ON ALL POSSIBLE CLASS PAIRS # %% for pair_nr in range(len(self.class_pairs)): # getting class pair from index (pair_nr) class_pair = self.class_pairs[pair_nr] # printing class pair information self.print_class_pair(class_pair) # getting train trials only for current two classes epo_train_pair = select_classes(epo_train, class_pair) # getting test trials only for current two classes epo_test_pair = select_classes(epo_test, class_pair) # saving train labels for this two classes self.train_labels[fold_nr][pair_nr] = epo_train_pair.y # saving test labels for this two classes self.test_labels[fold_nr][pair_nr] = epo_test_pair.y # %% COMPUTING CSP # %% filters, patterns, variances = calculate_csp( epo_train_pair, average_trial_covariance=self.average_trial_covariance) # %% FEATURE EXTRACTION # %% # choosing how many spacial filter to apply; # if no spacial filter number specified... if self.n_filters is None: # ...taking all columns, else... columns = list(range(len(filters))) else: # ...take topmost and bottommost filters; # e.g. for n_filters=3 we are going to pick: # 0, 1, 2, -3, -2, -1 columns = (list(range(0, self.n_filters)) + list(range(-self.n_filters, 0))) # feature extraction on train train_feature = apply_csp_var_log(epo_train_pair, filters, columns) # feature extraction on test test_feature = apply_csp_var_log(epo_test_pair, filters, columns) # %% COMPUTING LDA USING TRAIN FEATURES # %% # clf is a 1x2 tuple where: # * clf[0] is hyperplane parameters # * clf[1] is hyperplane bias # with clf, you can recreate the n-dimensional # hyperplane that splits class space, so you can # classify your fbcsp extracted features. clf = lda_train_scaled(train_feature, shrink=True) # %% APPLYING LDA ON TRAIN # %% # applying LDA train_out = lda_apply(train_feature, clf) # getting true/false labels instead of class labels # for example, if you have: # train_feature.y --> [1, 3, 3, 1] # class_pair --> [1, 3] # so you will have: # true_0_1_labels_train = [False, True, True, False] true_0_1_labels_train = train_feature.y == class_pair[1] # if predicted output grater than 0 True, False instead predicted_train = train_out >= 0 # computing accuracy # if mean has a boolean array as input, it will # compute number of True elements divided by total # number of elements, so the accuracy train_accuracy = mean( true_0_1_labels_train == predicted_train) # %% APPLYING LDA ON TEST # %% # same procedure test_out = lda_apply(test_feature, clf) true_0_1_labels_test = test_feature.y == class_pair[1] predicted_test = test_out >= 0 test_accuracy = mean( true_0_1_labels_test == predicted_test) # %% FEATURE COMPUTATION FOR FULL FOLD # %% (FOR LATER MULTICLASS) # here we use csp computed only for this pair of classes # to compute feature for all the current fold # train here train_feature_full_fold = apply_csp_var_log( epo_train, filters, columns) # test here test_feature_full_fold = apply_csp_var_log( epo_test, filters, columns) # %% STORE RESULTS # %% # only store used patterns filters variances # to save memory space on disk self.store_results(bp_nr, fold_nr, pair_nr, filters[:, columns], patterns[:, columns], variances[columns], train_feature, test_feature, train_feature_full_fold, test_feature_full_fold, clf, train_accuracy, test_accuracy) # printing the end of this super-nested cycle self.print_results(bp_nr, fold_nr, pair_nr) # printing a blank line to divide filters print()
resampleToHz = samplingRate cnt = resample_cnt(cnt, resampleToHz) # mne apply will apply the function to the data (a 2d-numpy-array) # have to transpose data back and forth, since # exponential_running_standardize expects time x chans order # while mne object has chans x time order cnt = mne_apply( lambda a: exponential_running_standardize( a.T, init_block_size=1000, factor_new=0.001, eps=1e-4).T, cnt) name_to_start_codes = OrderedDict([('ScoreExp', 1)]) name_to_stop_codes = OrderedDict([('ScoreExp', 2)]) train_set = create_signal_target_from_raw_mne( cnt, name_to_start_codes, [0, 0], name_to_stop_codes) train_set.y = Score[:, :-1] # Outer added axis is the trial axis (size one always...) #from braindecode.datautil.signal_target import SignalAndTarget #datasets = SignalAndTarget(train_set.X[0].astype(np.float32), train_set.y.astype(np.float32)) # split data and test set #first_set_fraction = 0.95 #cutInd = int(np.size(train_set.y)*first_set_fraction) cutInd = int( np.size(train_set.y) - nSecondsTestSet * samplingRate) # use last nSecondsTestSet as test set test_set = deepcopy(train_set)
def run_exp(data_folder, subject_id, low_cut_hz, model, cuda): ival = [-500, 4000] max_epochs = 1600 max_increase_epochs = 160 batch_size = 60 high_cut_hz = 38 factor_new = 1e-3 init_block_size = 1000 valid_set_fraction = 0.2 train_filename = "A{:02d}T.gdf".format(subject_id) test_filename = "A{:02d}E.gdf".format(subject_id) train_filepath = os.path.join(data_folder, train_filename) test_filepath = os.path.join(data_folder, test_filename) train_label_filepath = train_filepath.replace(".gdf", ".mat") test_label_filepath = test_filepath.replace(".gdf", ".mat") train_loader = BCICompetition4Set2A( train_filepath, labels_filename=train_label_filepath ) test_loader = BCICompetition4Set2A( test_filepath, labels_filename=test_label_filepath ) train_cnt = train_loader.load() test_cnt = test_loader.load() # Preprocessing train_cnt = train_cnt.drop_channels( ["EOG-left", "EOG-central", "EOG-right"] ) assert len(train_cnt.ch_names) == 22 # lets convert to millvolt for numerical stability of next operations train_cnt = mne_apply(lambda a: a * 1e6, train_cnt) train_cnt = mne_apply( lambda a: bandpass_cnt( a, low_cut_hz, high_cut_hz, train_cnt.info["sfreq"], filt_order=3, axis=1, ), train_cnt, ) train_cnt = mne_apply( lambda a: exponential_running_standardize( a.T, factor_new=factor_new, init_block_size=init_block_size, eps=1e-4, ).T, train_cnt, ) test_cnt = test_cnt.drop_channels(["EOG-left", "EOG-central", "EOG-right"]) assert len(test_cnt.ch_names) == 22 test_cnt = mne_apply(lambda a: a * 1e6, test_cnt) test_cnt = mne_apply( lambda a: bandpass_cnt( a, low_cut_hz, high_cut_hz, test_cnt.info["sfreq"], filt_order=3, axis=1, ), test_cnt, ) test_cnt = mne_apply( lambda a: exponential_running_standardize( a.T, factor_new=factor_new, init_block_size=init_block_size, eps=1e-4, ).T, test_cnt, ) marker_def = OrderedDict( [ ("Left Hand", [1]), ("Right Hand", [2]), ("Foot", [3]), ("Tongue", [4]), ] ) train_set = create_signal_target_from_raw_mne(train_cnt, marker_def, ival) test_set = create_signal_target_from_raw_mne(test_cnt, marker_def, ival) train_set, valid_set = split_into_two_sets( train_set, first_set_fraction=1 - valid_set_fraction ) set_random_seeds(seed=20190706, cuda=cuda) n_classes = 4 n_chans = int(train_set.X.shape[1]) input_time_length = train_set.X.shape[2] if model == "shallow": model = ShallowFBCSPNet( n_chans, n_classes, input_time_length=input_time_length, final_conv_length="auto", ).create_network() elif model == "deep": model = Deep4Net( n_chans, n_classes, input_time_length=input_time_length, final_conv_length="auto", ).create_network() if cuda: model.cuda() log.info("Model: \n{:s}".format(str(model))) optimizer = optim.Adam(model.parameters()) iterator = BalancedBatchSizeIterator(batch_size=batch_size) stop_criterion = Or( [ MaxEpochs(max_epochs), NoDecrease("valid_misclass", max_increase_epochs), ] ) monitors = [LossMonitor(), MisclassMonitor(), RuntimeMonitor()] model_constraint = MaxNormDefaultConstraint() exp = Experiment( model, train_set, valid_set, test_set, iterator=iterator, loss_function=F.nll_loss, optimizer=optimizer, model_constraint=model_constraint, monitors=monitors, stop_criterion=stop_criterion, remember_best_column="valid_misclass", run_after_early_stop=True, cuda=cuda, ) exp.run() return exp
def run_exp(data_folder, session_id, subject_id, low_cut_hz, model, cuda): ival = [-500, 4000] max_epochs = 1600 max_increase_epochs = 160 batch_size = 10 high_cut_hz = 38 factor_new = 1e-3 init_block_size = 1000 valid_set_fraction = .2 ''' # BCIcompetition train_filename = 'A{:02d}T.gdf'.format(subject_id) test_filename = 'A{:02d}E.gdf'.format(subject_id) train_filepath = os.path.join(data_folder, train_filename) test_filepath = os.path.join(data_folder, test_filename) train_label_filepath = train_filepath.replace('.gdf', '.mat') test_label_filepath = test_filepath.replace('.gdf', '.mat') train_loader = BCICompetition4Set2A( train_filepath, labels_filename=train_label_filepath) test_loader = BCICompetition4Set2A( test_filepath, labels_filename=test_label_filepath) train_cnt = train_loader.load() test_cnt = test_loader.load() ''' # GIGAscience filename = 'sess{:02d}_subj{:02d}_EEG_MI.mat'.format( session_id, subject_id) filepath = os.path.join(data_folder, filename) train_variable = 'EEG_MI_train' test_variable = 'EEG_MI_test' train_loader = GIGAscience(filepath, train_variable) test_loader = GIGAscience(filepath, test_variable) train_cnt = train_loader.load() test_cnt = test_loader.load() # Preprocessing ''' channel ['Fp1', 'Fp2', 'F7', 'F3', 'Fz', 'F4', 'F8', 'FC5', 'FC1', 'FC2', 'FC6', 'T7', 'C3', 'Cz', 'C4', 'T8', 'TP9', 'CP5', 'CP1', 'CP2', 'CP6', 'TP10', 'P7', 'P3', 'Pz', 'P4', 'P8', 'PO9', 'O1', 'Oz', 'O2', 'PO10', 'FC3', 'FC4', 'C5', 'C1', 'C2', 'C6', 'CP3', 'CPz', 'CP4', 'P1', 'P2', 'POz', 'FT9', 'FTT9h', 'TTP7h', 'TP7', 'TPP9h', 'FT10', 'FTT10h', 'TPP8h', 'TP8', 'TPP10h', 'F9', 'F10', 'AF7', 'AF3', 'AF4', 'AF8', 'PO3', 'PO4'] ''' train_cnt = train_cnt.pick_channels([ 'FC5', 'FC3', 'FC1', 'Fz', 'FC2', 'FC4', 'FC6', 'C5', 'C3', 'C1', 'Cz', 'C2', 'C4', 'C6', 'CP5', 'CP3', 'CP1', 'CPz', 'CP2', 'CP4', 'CP6', 'Pz' ]) train_cnt, train_cnt.info['events'] = train_cnt.copy().resample( 250, npad='auto', events=train_cnt.info['events']) assert len(train_cnt.ch_names) == 22 # lets convert to millvolt for numerical stability of next operations train_cnt = mne_apply(lambda a: a * 1e6, train_cnt) train_cnt = mne_apply( lambda a: bandpass_cnt(a, low_cut_hz, high_cut_hz, train_cnt.info['sfreq'], filt_order=3, axis=1), train_cnt) train_cnt = mne_apply( lambda a: exponential_running_standardize(a.T, factor_new=factor_new, init_block_size= init_block_size, eps=1e-4).T, train_cnt) test_cnt = test_cnt.pick_channels([ 'FC5', 'FC3', 'FC1', 'Fz', 'FC2', 'FC4', 'FC6', 'C5', 'C3', 'C1', 'Cz', 'C2', 'C4', 'C6', 'CP5', 'CP3', 'CP1', 'CPz', 'CP2', 'CP4', 'CP6', 'Pz' ]) test_cnt, test_cnt.info['events'] = test_cnt.copy().resample( 250, npad='auto', events=test_cnt.info['events']) assert len(test_cnt.ch_names) == 22 test_cnt = mne_apply(lambda a: a * 1e6, test_cnt) test_cnt = mne_apply( lambda a: bandpass_cnt(a, low_cut_hz, high_cut_hz, test_cnt.info['sfreq'], filt_order=3, axis=1), test_cnt) test_cnt = mne_apply( lambda a: exponential_running_standardize(a.T, factor_new=factor_new, init_block_size= init_block_size, eps=1e-4).T, test_cnt) marker_def = OrderedDict([('Right Hand', [1]), ('Left Hand', [2])]) train_set = create_signal_target_from_raw_mne(train_cnt, marker_def, ival) test_set = create_signal_target_from_raw_mne(test_cnt, marker_def, ival) train_set, valid_set = split_into_two_sets(train_set, first_set_fraction=1 - valid_set_fraction) set_random_seeds(seed=20190706, cuda=cuda) n_classes = 2 n_chans = int(train_set.X.shape[1]) input_time_length = train_set.X.shape[2] if model == 'shallow': model = ShallowFBCSPNet(n_chans, n_classes, input_time_length=input_time_length, final_conv_length='auto').create_network() elif model == 'deep': model = Deep4Net(n_chans, n_classes, input_time_length=input_time_length, final_conv_length='auto').create_network() if cuda: model.cuda() log.info("Model: \n{:s}".format(str(model))) optimizer = optim.Adam(model.parameters()) iterator = BalancedBatchSizeIterator(batch_size=batch_size) stop_criterion = Or([ MaxEpochs(max_epochs), NoDecrease('valid_misclass', max_increase_epochs) ]) monitors = [LossMonitor(), MisclassMonitor(), RuntimeMonitor()] model_constraint = MaxNormDefaultConstraint() exp = Experiment(model, train_set, valid_set, test_set, iterator=iterator, loss_function=F.nll_loss, optimizer=optimizer, model_constraint=model_constraint, monitors=monitors, stop_criterion=stop_criterion, remember_best_column='valid_misclass', run_after_early_stop=True, cuda=cuda) exp.run() return exp
def get_signal_from_cnt(cnt, markers, interval): return create_signal_target_from_raw_mne(cnt, markers, interval)