def test_select_epochs(self): """Selecting Epochs.""" # normal case dat = select_epochs(self.dat, [0]) self.assertEqual(dat.data.shape[0], 1) np.testing.assert_array_equal(dat.data, self.dat.data[[0]]) np.testing.assert_array_equal(dat.axes[0], self.dat.axes[0][0]) # normal every second dat = select_epochs(self.dat, [0, 2]) self.assertEqual(dat.data.shape[0], 2) np.testing.assert_array_equal(dat.data, self.dat.data[::2]) np.testing.assert_array_equal(dat.axes[0], self.dat.axes[0][::2]) # the full epo dat = select_epochs(self.dat, list(range(self.dat.data.shape[0]))) np.testing.assert_array_equal(dat.data, self.dat.data) np.testing.assert_array_equal(dat.axes[0], self.dat.axes[0]) # remove one dat = select_epochs(self.dat, [0], invert=True) self.assertEqual(dat.data.shape[0], 3) np.testing.assert_array_equal(dat.data, self.dat.data[1:]) np.testing.assert_array_equal(dat.axes[0], self.dat.axes[0][1:]) # remove every second dat = select_epochs(self.dat, [0, 2], invert=True) self.assertEqual(dat.data.shape[0], 2) np.testing.assert_array_equal(dat.data, self.dat.data[1::2]) np.testing.assert_array_equal(dat.axes[0], self.dat.axes[0][1::2])
def run_fold(self, epo, bp_nr, fold_nr): self.print_fold_nr(fold_nr) train_test = self.folds[fold_nr] train_ind = train_test['train'] test_ind = train_test['test'] epo_train = select_epochs(epo, train_ind) epo_test = select_epochs(epo, test_ind) if self.standardize_epo: epo_train, epo_test = online_standardize_epo(epo_train, epo_test) # TODELAY: also integrate into init and store results self.train_labels_full_fold[fold_nr] = epo_train.axes[0] self.test_labels_full_fold[fold_nr] = epo_test.axes[0] for pair_nr in xrange(len(self.class_pairs)): self.run_pair(epo_train, epo_test, bp_nr, fold_nr, pair_nr)
def run_fold(self, epo, bp_nr, fold_nr): self.print_fold_nr(fold_nr) train_test = self.folds[fold_nr] train_ind = train_test["train"] test_ind = train_test["test"] epo_train = select_epochs(epo, train_ind) epo_test = select_epochs(epo, test_ind) if self.standardize_epo: epo_train, epo_test = online_standardize_epo(epo_train, epo_test) # TODELAY: also integrate into init and store results self.train_labels_full_fold[fold_nr] = epo_train.axes[0] self.test_labels_full_fold[fold_nr] = epo_test.axes[0] for pair_nr in xrange(len(self.class_pairs)): self.run_pair(epo_train, epo_test, bp_nr, fold_nr, pair_nr)
def preproc_and_load_clean_trials(self): log.info("Preprocessing set...") self.signal_processor.preprocess_continuous_signal() self.signal_processor.segment_into_trials() if len(self.rejected_trials) > 0: self.signal_processor.epo = select_epochs( self.signal_processor.epo, self.rejected_trials, invert=True) # select epochs does not update marker structure... clean_markers = [m for i,m in enumerate(self.signal_processor.epo.markers) \ if i not in self.rejected_trials] self.signal_processor.epo.markers = clean_markers self.signal_processor.remove_continuous_signal() self.signal_processor.preprocess_trials() log.info("Done.")
def fill_filterbank_data(self, full_epo_data): for filterband_i in xrange(len(self.filterbands)): low_freq, high_freq= self.filterbands[filterband_i] log.info("Filterband {:d} of {:d}, from {:5.2f} to {:5.2f}".format( filterband_i + 1, len(self.filterbands), low_freq, high_freq)) bandpassed_cnt = bandpass_cnt(self.signal_processor.cnt, low_freq, high_freq, filt_order=3) epo = segment_dat_fast(bandpassed_cnt, marker_def={'1 - Right Hand': [1], '2 - Left Hand': [2], '3 - Rest': [3], '4 - Feet': [4]}, ival=self.signal_processor.segment_ival) epo.data = np.float32(epo.data) epo = select_epochs(epo, self.rejected_trials, invert=True) full_epo_data[:,:,:,filterband_i] = epo.data del epo.data del bandpassed_cnt self.filterband_axes = epo.axes + [self.filterbands.tolist()] self.filterband_names = epo.names + ['filterband'] self.filterband_units = epo.units + ['Hz']
def calculate_csp(epo, classes=None): """Calculate the Common Spatial Pattern (CSP) for two classes. Now with pattern computation as in matlab bbci toolbox https://github.com/bbci/bbci_public/blob/c7201e4e42f873cced2e068c6cbb3780a8f8e9ec/processing/proc_csp.m#L112 This method calculates the CSP and the corresponding filters. Use the columns of the patterns and filters. Examples -------- Calculate the CSP for the first two classes:: >>> w, a, d = calculate_csp(epo) >>> # Apply the first two and the last two columns of the sorted >>> # filter to the data >>> filtered = apply_spatial_filter(epo, w[:, [0, 1, -2, -1]]) >>> # You'll probably want to get the log-variance along the time >>> # axis, this should result in four numbers (one for each >>> # channel) >>> filtered = np.log(np.var(filtered, 0)) Select two classes manually:: >>> w, a, d = calculate_csp(epo, [2, 5]) Parameters ---------- epo : epoched Data object this method relies on the ``epo`` to have three dimensions in the following order: class, time, channel classes : list of two ints, optional If ``None`` the first two different class indices found in ``epo.axes[0]`` are chosen automatically otherwise the class indices can be manually chosen by setting ``classes`` Returns ------- v : 2d array the sorted spatial filters a : 2d array the sorted spatial patterns. Column i of a represents the pattern of the filter in column i of v. d : 1d array the variances of the components Raises ------ AssertionError : If: * ``classes`` is not ``None`` and has less than two elements * ``classes`` is not ``None`` and the first two elements are not found in the ``epo`` * ``classes`` is ``None`` but there are less than two different classes in the ``epo`` See Also -------- :func:`apply_spatial_filter`, :func:`apply_csp`, :func:`calculate_spoc` References ---------- http://en.wikipedia.org/wiki/Common_spatial_pattern """ n_channels = epo.data.shape[-1] if classes is None: # automagically find the first two different classidx # we don't use uniq, since it sorts the classidx first # first check if we have a least two diffeent idxs: assert len(np.unique(epo.axes[0])) >= 2 cidx1 = epo.axes[0][0] cidx2 = epo.axes[0][epo.axes[0] != cidx1][0] else: assert (len(classes) >= 2 and classes[0] in epo.axes[0] and classes[1] in epo.axes[0]) cidx1 = classes[0] cidx2 = classes[1] epoc1 = select_epochs(epo, np.nonzero(epo.axes[0] == cidx1)[0], classaxis=0) epoc2 = select_epochs(epo, np.nonzero(epo.axes[0] == cidx2)[0], classaxis=0) # we need a matrix of the form (observations, channels) so we stack trials # and time per channel together x1 = epoc1.data.reshape(-1, n_channels) x2 = epoc2.data.reshape(-1, n_channels) # compute covariance matrices of the two classes c1 = np.cov(x1.transpose()) c2 = np.cov(x2.transpose()) # solution of csp objective via generalized eigenvalue problem # in matlab the signature is v, d = eig(a, b) d, v = sp.linalg.eig(c1-c2, c1+c2) d = d.real # make sure the eigenvalues and -vectors are correctly sorted indx = np.argsort(d) # reverse indx = indx[::-1] d = d.take(indx) v = v.take(indx, axis=1) #old pattern computation #a = sp.linalg.inv(v).transpose() c_avg = (c1 + c2) / 2.0 # compare # https://github.com/bbci/bbci_public/blob/c7201e4e42f873cced2e068c6cbb3780a8f8e9ec/processing/proc_csp.m#L112 # with W := v v_with_cov = np.dot(c_avg, v) source_cov = np.dot(np.dot(v.T, c_avg), v) # matlab-python comparison """ v_with_cov = np.array([[1,2,-2], [3,-2,4], [5,1,0.3]]) source_cov = np.array([[1,2,0.5], [2,0.6,4], [0.5,4,2]]) sp.linalg.solve(source_cov.T, v_with_cov.T).T # for matlab v_with_cov = [[1,2,-2], [3,-2,4], [5,1,0.3]] source_cov = [[1,2,0.5], [2,0.6,4], [0.5,4,2]] v_with_cov / source_cov""" a = sp.linalg.solve(source_cov.T, v_with_cov.T).T return v, a, d
def calculate_csp(epo, classes=None): """Calculate the Common Spatial Pattern (CSP) for two classes. Now with pattern computation as in matlab bbci toolbox https://github.com/bbci/bbci_public/blob/c7201e4e42f873cced2e068c6cbb3780a8f8e9ec/processing/proc_csp.m#L112 This method calculates the CSP and the corresponding filters. Use the columns of the patterns and filters. Examples -------- Calculate the CSP for the first two classes:: >>> w, a, d = calculate_csp(epo) >>> # Apply the first two and the last two columns of the sorted >>> # filter to the data >>> filtered = apply_spatial_filter(epo, w[:, [0, 1, -2, -1]]) >>> # You'll probably want to get the log-variance along the time >>> # axis, this should result in four numbers (one for each >>> # channel) >>> filtered = np.log(np.var(filtered, 0)) Select two classes manually:: >>> w, a, d = calculate_csp(epo, [2, 5]) Parameters ---------- epo : epoched Data object this method relies on the ``epo`` to have three dimensions in the following order: class, time, channel classes : list of two ints, optional If ``None`` the first two different class indices found in ``epo.axes[0]`` are chosen automatically otherwise the class indices can be manually chosen by setting ``classes`` Returns ------- v : 2d array the sorted spatial filters a : 2d array the sorted spatial patterns. Column i of a represents the pattern of the filter in column i of v. d : 1d array the variances of the components Raises ------ AssertionError : If: * ``classes`` is not ``None`` and has less than two elements * ``classes`` is not ``None`` and the first two elements are not found in the ``epo`` * ``classes`` is ``None`` but there are less than two different classes in the ``epo`` See Also -------- :func:`apply_spatial_filter`, :func:`apply_csp`, :func:`calculate_spoc` References ---------- http://en.wikipedia.org/wiki/Common_spatial_pattern """ n_channels = epo.data.shape[-1] if classes is None: # automagically find the first two different classidx # we don't use uniq, since it sorts the classidx first # first check if we have a least two diffeent idxs: assert len(np.unique(epo.axes[0])) >= 2 cidx1 = epo.axes[0][0] cidx2 = epo.axes[0][epo.axes[0] != cidx1][0] else: assert (len(classes) >= 2 and classes[0] in epo.axes[0] and classes[1] in epo.axes[0]) cidx1 = classes[0] cidx2 = classes[1] epoc1 = select_epochs(epo, np.nonzero(epo.axes[0] == cidx1)[0], classaxis=0) epoc2 = select_epochs(epo, np.nonzero(epo.axes[0] == cidx2)[0], classaxis=0) # we need a matrix of the form (observations, channels) so we stack trials # and time per channel together x1 = epoc1.data.reshape(-1, n_channels) x2 = epoc2.data.reshape(-1, n_channels) # compute covariance matrices of the two classes c1 = np.cov(x1.transpose()) c2 = np.cov(x2.transpose()) # solution of csp objective via generalized eigenvalue problem # in matlab the signature is v, d = eig(a, b) d, v = sp.linalg.eig(c1 - c2, c1 + c2) d = d.real # make sure the eigenvalues and -vectors are correctly sorted indx = np.argsort(d) # reverse indx = indx[::-1] d = d.take(indx) v = v.take(indx, axis=1) # Now compute patterns #old pattern computation #a = sp.linalg.inv(v).transpose() c_avg = (c1 + c2) / 2.0 # compare # https://github.com/bbci/bbci_public/blob/c7201e4e42f873cced2e068c6cbb3780a8f8e9ec/processing/proc_csp.m#L112 # with W := v v_with_cov = np.dot(c_avg, v) source_cov = np.dot(np.dot(v.T, c_avg), v) # matlab-python comparison """ v_with_cov = np.array([[1,2,-2], [3,-2,4], [5,1,0.3]]) source_cov = np.array([[1,2,0.5], [2,0.6,4], [0.5,4,2]]) sp.linalg.solve(source_cov.T, v_with_cov.T).T # for matlab v_with_cov = [[1,2,-2], [3,-2,4], [5,1,0.3]] source_cov = [[1,2,0.5], [2,0.6,4], [0.5,4,2]] v_with_cov / source_cov""" a = sp.linalg.solve(source_cov.T, v_with_cov.T).T return v, a, d
sizes = [20]#np.arange(30, 91, 15) # 4 items Accuracy = np.zeros((len(sizes), len(starts))) Accuracy.fill(0.54588) for i, start in enumerate(starts): for j, window_size in enumerate(sizes): train_data = load_epo_data('train', start, window_size) accuracy = np.zeros(10) for state in range(0, len(accuracy)): rs = cross_validation.ShuffleSplit(train_data.data.shape[0], n_iter=10, test_size=.15, random_state=state) rs = [[train_index, test_index] for train_index, test_index in rs][0] test_data_i = proc.select_epochs(train_data, rs[1]) train_data_i = proc.select_epochs(train_data, rs[0]) #expected = test_data_i.axes[0] expected = train_data_i.axes[0] # creating a CSP filter, preprocessing train data fv_train, filt = preprocess(train_data_i) # training LDA cfy = proc.lda_train(fv_train) # preprocess test data #fv_test, _ = preprocess(test_data_i, filt) # predicting result of the test data result = proc.lda_apply(fv_train, cfy)
def test_select_epochs_copy(self): """Select Epochs must not modify argument.""" cpy = self.dat.copy() select_epochs(self.dat, [0, 1]) self.assertEqual(self.dat, cpy)
def test_select_epochs_swapaxes(self): """Select epochs must work with nonstandard classaxis.""" dat = select_epochs(swapaxes(self.dat, 0, 2), [0, 1], classaxis=2) dat = swapaxes(dat, 0, 2) dat2 = select_epochs(self.dat, [0, 1]) self.assertEqual(dat, dat2)
def test_select_epochs_with_cnt(self): """Select epochs must raise an exception if called with cnt argument.""" del(self.dat.class_names) with self.assertRaises(AssertionError): select_epochs(self.dat, [0, 1])
def subset_data(init_data, bis_crit=None, drop_perc=None, drop_from='beginning', subj_ids=None, use_min=None, use_from=None): """Subsets an epoched Data object by BIS value and intra-OP time It might be useful to only look at data aligned with critical BIS values (e. g., BIS < 60) or only the second half of the OP. Params ------ data : wyrm.types.Data epoched data to subset bis_crit : int critical BIS value, drop all epochs with BIS > `bis_crit` drop_perc : float percentage of OP to be dropped drop_from : str if drop_perc is passed, will drop from beginning or end of OP * 'beginning': drop from beginning * 'end': drop from end use_min : tuple of (start, stop) only use minutes between use_min[1] and use_min[2], if use_from == 'beginning' start must be < stop, if use_from == 'end' start must be > stop use_from : str if use_min is passed, will use minute marks at beginning/end of OP * 'beginning': use_min relative to OP start * 'end': use_min relative to OP end Returns ------- data : wyrm.types.Data subsetted data """ # TODO: Handle drop_perc == None if subj_ids: if (not isinstance(subj_ids, np.ndarray) and not isinstance(subj_ids, list)): msg = 'When passing `subj_ids`, have to pass list or np.ndarray.' raise TypeError(msg) if drop_perc and use_min: msg = 'Can only use either drop_perc or use_min' raise ValueError(msg) data = init_data.copy() dat = data.data.copy() subj_id = data.subj_id.copy() if drop_perc: if drop_from not in ['beginning', 'end']: msg = 'drop_from must be one of \'beginning\', \'end\'.' raise ValueError(msg) unique_subj_ids = np.unique(subj_id) idx_to_keep = [] for subj in unique_subj_ids: idx_subj = np.where(subj_id == subj)[0] data_subset = dat[idx_subj, :, :].squeeze() n_epochs = data_subset.shape[0] n_samples_per_epoch = data_subset.shape[1] n_samples = n_epochs * n_samples_per_epoch n_samples_to_drop = drop_perc * n_samples n_epochs_to_drop = int( np.floor(n_samples_to_drop / n_samples_per_epoch)) if drop_from == 'beginning': idx_to_keep.append(idx_subj[n_epochs_to_drop:]) else: idx_to_keep.append(idx_subj[:-n_epochs_to_drop]) idx_to_keep = np.concatenate(idx_to_keep).ravel() data = select_epochs(data, indices=idx_to_keep) data.subj_id = data.subj_id[idx_to_keep] data.bis = data.bis[idx_to_keep] if use_min: if use_from not in ['beginning', 'end']: msg = 'use_from must be one of \'beginning\', \'end\'.' raise ValueError(msg) unique_subj_ids = np.unique(subj_id) idx_to_keep = [] for subj in unique_subj_ids: idx_subj = np.where(subj_id == subj)[0] data_subset = dat[idx_subj, :, :].squeeze() n_samples_per_epoch = data_subset.shape[1] n_samples_to_keep = (max(use_min) - min(use_min)) * 60 * data.fs n_epochs_to_keep = int( np.floor(n_samples_to_keep / n_samples_per_epoch)) if use_from == 'beginning': n_samples_to_drop_before = min(use_min) * 60 * data.fs n_epochs_to_drop_before = int( np.floor(n_samples_to_drop_before / n_samples_per_epoch)) idx_to_keep.append( idx_subj[n_epochs_to_drop_before:n_epochs_to_drop_before + n_epochs_to_keep]) elif use_from == 'end': n_samples_to_drop_after = min(use_min) * 60 * data.fs n_epochs_to_drop_after = int( np.floor(n_samples_to_drop_after / n_samples_per_epoch)) start = -n_epochs_to_drop_after - n_epochs_to_keep stop = -n_epochs_to_drop_after if n_epochs_to_drop_after != 0 else -1 idx_to_keep.append(idx_subj[start:stop]) idx_to_keep = np.concatenate(idx_to_keep).ravel() data = select_epochs(data, indices=idx_to_keep) data.subj_id = data.subj_id[idx_to_keep] data.bis = data.bis[idx_to_keep] dat = data.data.copy() bis = data.bis.copy() axes = data.axes.copy() subj_id = data.subj_id.copy() # only keep windows where BIS <= bis_crit if bis_crit: new_idx = np.where(np.all(bis <= bis_crit, axis=1)) dat = dat[new_idx] bis = bis[new_idx] axes[0] = axes[0][new_idx] subj_id = subj_id[new_idx] data.data = dat data.bis = bis data.axes = axes data.subj_id = subj_id if subj_ids: mask = np.isin(data.subj_id, subj_ids) data.subj_id = data.subj_id[mask] dat = data.data.compress(mask, 0) # classaxis is 0 axes = data.axes[:] axes[0] = data.axes[0].compress(mask) data = data.copy(data=dat, axes=axes) return data
n_epo_per_sub = 5440 / len(train_subs) def epochs_indices(subj_indices): return np.hstack([np.arange(s_ind * n_epo_per_sub, (s_ind + 1) * n_epo_per_sub) for s_ind in subj_indices]) for i, start in enumerate(starts): for j, window_size in enumerate(sizes): accuracy = np.zeros(20) train_data = load_epo_data('train', start, window_size) for state in range(0, len(accuracy)): rs = cross_validation.ShuffleSplit(len(train_subs), n_iter=10, test_size=.15, random_state=state) rs = [[train_index, test_index] for train_index, test_index in rs][0] train_data_i = proc.select_epochs(train_data, epochs_indices(rs[0])) test_data_i = proc.select_epochs(train_data, epochs_indices(rs[1])) expected = test_data_i.axes[0] # creating a CSP filter, preprocessing train data fv_train, filt = preprocess(train_data_i) # training LDA cfy = proc.lda_train(fv_train) # preprocess test data fv_test, _ = preprocess(test_data_i, filt) # predicting result of the test data result = proc.lda_apply(fv_test, cfy)