class CSPSVCClassifier(Classifier): def __init__(self, params, net_saver_dir): super(CSPSVCClassifier, self).__init__(params) self._init() def _init(self): self.n_components = self.params.get('n_components', 4) self.svc = SVC(C=1, kernel='linear', probability=True) self.csp = CSP(n_components=self.n_components, norm_trace=False) def _fit(self, X_train_dict, y_train, X_valid_dict, y_valid, **kwargs): X_train = X_train_dict['x'] X_train_transformed = self.csp.fit_transform(X_train, y_train) self.svc.fit(X_train_transformed, y_train) return {'csp_filters': self.csp.patterns_} def _predict(self, X_test_dict, **kwargs): X_test = X_test_dict['x'] X_test_transformed = self.csp.transform(X_test) return self.svc.predict(X_test_transformed) def _predict_proba(self, X_test_dict, **kwargs): X_test = X_test_dict['x'] X_test_transformed = self.csp.transform(X_test) return self.svc.predict_proba(X_test_transformed)
def get_score(subject=1): epochs = mne_wrapper.get_epochs(subject) epochs_train = epochs.copy().crop(tmin=1., tmax=2.) epochs_data_train = epochs_train.get_data() labels = epochs.events[:, -1] cv = KFold(n_splits=5) csp = CSP(n_components=4, reg=None, norm_trace=False, transform_into="csp_space") fft = mne_wrapper.FFT() svc = svm.SVC(kernel="linear") scores = [] self_scores = [] for train_index, test_index in cv.split(epochs_data_train): # fit x = epochs_data_train[train_index] y = labels[train_index] x = csp.fit_transform(x, y) x = fft.transform(x) # print(x) svc.fit(x, y) self_scores.append(svc.score(x, y)) # estimate x_test = epochs_data_train[test_index] y_test = labels[test_index] x_test = csp.transform(x_test) x_test = fft.transform(x_test) score = svc.score(x_test, y_test) scores.append(score) return np.mean(self_scores), np.mean(scores)
def run_csp(run_data, label, n_comp): # transform data matrix, trials, labels = run_data num_trials = len(labels) d3_data = d3_matrix_creator(matrix, num_trials) # create data info object for rawArray # ch_names = ['eog' + str(x) for x in range(1, 4)] ch_names = ['eeg' + str(x) for x in range(1, 23)] # ch_types = ['eog' for x in range(0, 3)] ch_types = ['eeg' for x in range(1, 23)] # Create label info labels = csp_label_reformat(labels, label) # Create data_info and event_info data_info = mne.create_info(ch_names, HERTZ, ch_types, None) event_info = create_events(labels) # Create mne structure epochs_data = mne.EpochsArray(d3_data, data_info, event_info, verbose=False) """ Do some crazy csp stuff """ # Cross validation with sklearn labels = epochs_data.events[:, -1] csp = CSP(n_components=n_comp) csp = csp.fit(d3_data, labels) return csp
def build_model(self, data, label): global info, csp_components print("{}: Building model with {} and {}".format( self.name, np.shape(data), np.shape(label))) self.csp_list = [] self.lda = LinearDiscriminantAnalysis() csp_feature_train = None for i in range(num_bank): low_cut = self.filter_bank[i][0] high_cut = self.filter_bank[i][1] MI_epochs = mne.EpochsArray(data, info) MI_epochs.filter(low_cut, high_cut, method='iir') MI_epochs.set_eeg_reference('average', projection=True) csp = CSP(n_components=csp_components, norm_trace=True) x_train = csp.fit_transform(MI_epochs.get_data(), label) self.csp_list.append(csp) if i == 0: csp_feature_train = x_train else: csp_feature_train = np.concatenate( (csp_feature_train, x_train), axis=1) self.lda.fit(csp_feature_train, label)
def selectBounds(data, label, bounds_list): n_components = 4 # 定义CSP滤波器 输出4个特征 csp = CSP(n_components=n_components, reg=None, log=True, norm_trace=False) num = len(bounds_list) all_features = np.zeros([120, n_components * num]) i = 0 for bound in bounds_list: data = butter_bandpass_filter(data, bound[0], bound[1]) data_features = csp.fit_transform(data, label) all_features[:, i:i + 4] = data_features i = i + 4 #按照信息熵筛选前十特征 select_K = sklearn.feature_selection.SelectKBest(mutual_info_classif, k=10).fit( all_features, label) selected_list = select_K.get_support(indices=True) # 按照前十特征所在频带 出现的频率排序 selected_bounds = [] #[ 4 5 10 17 20 21 23 31 33 39] selected_dic = {} for i in range(len(selected_list)): bound_index = (selected_list[i] + 1) % 4 - 1 if bound_index in selected_dic: selected_dic[bound_index] = selected_dic[bound_index] + 1 else: selected_dic[bound_index] = 1 tmp = sorted(selected_dic.items(), key=lambda x: x[1], reverse=True) for i in tmp: selected_bounds.append(bounds_list[i[0]]) return selected_bounds
def wrapper_csp(x, cl, reducedim): """Call MNE CSP algorithm.""" from mne.decoding import CSP csp = CSP(n_components=reducedim, cov_est="epoch", reg="ledoit_wolf") csp.fit(x, cl) c, d = csp.filters_.T[:, :reducedim], csp.patterns_[:reducedim, :] y = datatools.dot_special(c.T, x) return c, d, y
def train_transform(train_nparray, train_info, test_nparray, test_info, best_opts, verbose=False): [train_X, train_y] = extract_X_and_y(train_nparray, train_info, best_opts, verbose=verbose) [test_X, test_y] = extract_X_and_y(test_nparray, test_info, best_opts, verbose=verbose) # train / apply CSP with max num filters csp = CSP(n_components=best_opts.best_num_filters, reg=None, log=True) csp.fit(train_X, train_y) # apply CSP filters to train data train_feat = csp.transform(train_X) # apply CSP filters to test data test_feat = csp.transform(test_X) return [train_feat, train_y, test_feat, test_y]
def process(filename): data_dir = os.path.join("../Datasets/", filename) data_path = os.path.join(data_dir, filename + '_cnt.txt') label_path = os.path.join(data_dir, filename + '_mrk.txt') data_df = pd.read_table(data_path, header=None) label_df = pd.read_table(label_path, header=None) ## data overview print("data shape", data_df.shape) print("label shape", label_df.shape) ## data label_array = label_df.dropna().values train_markers = [] for events in label_array: if events[1] != 0: for i in range(0, 400, 50): train_markers.append((float(events[0]) + i, str(int(events[1])))) markers_subject1_class_1 = [(float(events[0]),str(int(events[1]))) for events in train_markers if events[1]== '1'] markers_subject1_class_2 = [(float(events[0]),str(int(events[1]))) for events in train_markers if events[1]== '2'] data_array = data_df.values cnt1 = convert_mushu_data(data_array, markers_subject1_class_1, 50, channels) cnt2 = convert_mushu_data(data_array, markers_subject1_class_2, 50, channels) epoch_subject1_class1 = segment_dat(cnt1, md, [0, 1000]) # 640x50x118 epoch_subject1_class2 = segment_dat(cnt2, md, [0, 1000]) # 704x50x118 final_epoch = append_epo(epoch_subject1_class1,epoch_subject1_class2) #1344x50x118 targets = final_epoch.axes[0] methods = ['_csp', '_bandpowers', '_dct', '_wavelet'] for i, func in enumerate(['_csp', utils.bandpowers, utils.dct_features, utils.wavelet_features]): if func == '_csp': from mne.decoding import CSP csp = CSP(n_components=50, reg=None, log=True, norm_trace=True) dictionary = csp.fit_transform(final_epoch.data, targets) else: dictionary = feature_transform(final_epoch, func) ## save the data res = np.concatenate([dictionary, targets.reshape(-1, 1)], axis=1) res_df = pd.DataFrame(res) save_path = os.path.join(data_dir, filename + methods[i] + '.csv') res_df.to_csv(save_path, index=False) print("==> saved data at {}".format(save_path))
def CSP_data(selected_channels, selected_electrodes_reshaped, selected_electrodes_ref, n_patterns, zeros, n_freqs, y): x = create_data(selected_electrodes_ref, fs, n_samples, low, high, n_freqs, zeros, length) print(x.shape) x_reshaped = np.reshape(x, (n_samples, n_channels_ref, n_freqs)) csp1 = CSP(n_components=n_patterns, reg=None, log=True, norm_trace=False) csp2 = CSP(n_components=n_patterns, reg=None, log=True, norm_trace=False) # Traditional CSP of the raw data x_raw_csp = csp1.fit_transform(selected_electrodes_reshaped, y) info = mne.create_info(selected_channels, sfreq=fs, ch_types='eeg') info['description'] = 'My motor imagery dataset!' info.set_montage('standard_1020') epochs = mne.EpochsArray(selected_electrodes_reshaped, info) # # Apply band-pass filter to the raw data # epochs.filter(low, high, fir_design='firwin', skip_by_annotation='edge') csp1.plot_patterns(epochs.info, ch_type='eeg', units='Patterns (AU)', size=1.5) # CSP of the PSD data x_csp = csp2.fit_transform(x_reshaped, y) print("x_csp shape: ", x_csp.shape) return np.hstack((x_csp, x_raw_csp)), x, csp1, csp2
class Adjuster(Thread): """Thread to ajust CSP and classfier with new training data""" def __init__(self, datas, labels): Thread.__init__(self) self.datas = datas self.labels = labels self.csp = None self.clf = None def run(self): self.csp = CSP(reg='ledoit_wolf') self.csp.fit(self.datas, self.labels) self.clf = svm.SVC() self.clf.fit(self.csp.transform(self.datas), self.labels)
def __init__(self, data_channels=list(range(20)), window_size=1000, preprocessing=LowpassWrapper()): """ Parameters ---------- data_channels : int, default list(range(20)) Channels that the classifier should use as input window_size : int, default 1000 number of samples of eeg data the classifier should use as input preprocessing : default LowpassWrapper() Step added to the start of the csp+lda sklearn pipeline """ self.window_size = window_size self.data_channels = data_channels # make pipeline preproc = preprocessing lda = LinearDiscriminantAnalysis() csp = CSP(n_components=10, reg=None, log=None, norm_trace=False, component_order='alternate') self.clf = Pipeline([(str(preproc), preproc), ('CSP', csp), ('LDA', lda)])
def create_confidence_matrix(user_matix, file_number=0, limit=None): from sklearn.pipeline import Pipeline if not limit: limit = 100 score_matrix = np.full((len(user_matix), len(user_matix)), 0) classifier_matrix = [[0 for x in range(len(user_matix))] for y in range(len(user_matix))] print("New confidence matrix") for id, subject in enumerate(user_matix): #print("Training user for "+str(id)) for oid, other_subject in enumerate(user_matix): if id == oid: pass else: lda = LDA() labels1 = [0 for i in range(len(subject[file_number]))] labels2 = [1 for j in range(len(other_subject[file_number]))] labels = np.concatenate( (np.asarray(labels1), np.asarray(labels2))) try: data = np.concatenate( (np.asarray(subject[file_number]), np.asarray(other_subject[file_number]))) except: print("1") if len(data) == len(labels): csp = CSP(n_components=4) clf = Pipeline([('CSP', csp), ("LDA", lda)]) score_matrix[id][oid] = fit_classifier_cross_val_score( data, labels, clf) classifier_matrix[id][oid] = clf else: print("Smth gone wrong") return score_matrix, classifier_matrix
def _csp_lda(self, eeg: EEG): print('Training CSP & LDA model') # convert data to mne.Epochs ch_names = eeg.get_board_names() ch_types = ['eeg'] * len(ch_names) sfreq: int = eeg.sfreq n_samples: int = min([t.shape[1] for t in self.trials]) epochs_array: np.ndarray = np.stack( [t[:, :n_samples] for t in self.trials]) info = mne.create_info(ch_names, sfreq, ch_types) epochs = mne.EpochsArray(epochs_array, info) # set montage montage = make_standard_montage('standard_1020') epochs.set_montage(montage) # Apply band-pass filter epochs.filter(7., 30., fir_design='firwin', skip_by_annotation='edge', verbose=False) # Assemble a classifier lda = LinearDiscriminantAnalysis() csp = CSP(n_components=6, reg=None, log=True, norm_trace=False) # Use scikit-learn Pipeline self.clf = Pipeline([('CSP', csp), ('LDA', lda)]) # fit transformer and classifier to data self.clf.fit(epochs.get_data(), self.labels)
def test_ssd_pipeline(): """Test if SSD works in a pipeline.""" from sklearn.pipeline import Pipeline sf = 250 X, A, S = simulate_data(n_trials=100, n_channels=20, n_samples=500) X_e = np.reshape(X, (100, 20, 500)) # define bynary random output y = np.random.randint(2, size=100) info = create_info(ch_names=20, sfreq=sf, ch_types='eeg') filt_params_signal = dict(l_freq=freqs_sig[0], h_freq=freqs_sig[1], l_trans_bandwidth=4, h_trans_bandwidth=4) filt_params_noise = dict(l_freq=freqs_noise[0], h_freq=freqs_noise[1], l_trans_bandwidth=4, h_trans_bandwidth=4) ssd = SSD(info, filt_params_signal, filt_params_noise) csp = CSP() pipe = Pipeline([('SSD', ssd), ('CSP', csp)]) pipe.set_params(SSD__n_components=5) pipe.set_params(CSP__n_components=2) out = pipe.fit_transform(X_e, y) assert (out.shape == (100, 2)) assert (pipe.get_params()['SSD__n_components'] == 5)
def fit(self, data, label): data_bank = dict() for i in range(self.n_bank): # get each freq filter bank data_bank[i] = self.bank_filter(data, self.low[i], self.high[i], self.sample_rate) # extract csp feature for each bank self.csp_bank[i] = CSP(n_components=self.csp_component, reg=None, log=True, norm_trace=False) self.csp_bank[i].fit(data_bank[i], label)
def self_tune(self, X, y, verbose=False): # fix random seed for reproducibility seed = 5 np.random.seed(seed) # define k-fold cross validation test harness kfold = StratifiedKFold(y=y, n_folds=self.tuning_csp_num_folds, shuffle=True, random_state=seed) # init scores cvscores = {} for i in xrange(1, self.num_spatial_filters): cvscores[i + 1] = 0 for i, (train, test) in enumerate(kfold): # calculate CSP spatial filters csp = CSP(n_components=self.num_spatial_filters) csp.fit(X[train], y[train]) # try all filters, from the given num down to 2 # (1 is too often found to be overfitting) for j in xrange(2, self.num_spatial_filters): num_filters_to_try = j # calculate spatial filters csp.n_components = num_filters_to_try # apply CSP filters to train data tuning_train_LDA_features = csp.transform(X[train]) np.nan_to_num(tuning_train_LDA_features) check_X_y(tuning_train_LDA_features, y[train]) # apply CSP filters to test data tuning_test_LDA_features = csp.transform(X[test]) np.nan_to_num(tuning_test_LDA_features) check_X_y(tuning_test_LDA_features, y[test]) # train LDA lda = LinearDiscriminantAnalysis() prediction_score = lda.fit(tuning_train_LDA_features, y[train]).score( tuning_test_LDA_features, y[test]) cvscores[num_filters_to_try] += prediction_score if verbose: print "prediction score", prediction_score, "with", num_filters_to_try, "spatial filters" best_num = max(cvscores, key=cvscores.get) best_score = cvscores[best_num] / i + 1 if verbose: print "best num filters:", best_num, "(average accuracy ", best_score, ")" print "average scores per filter num:" for k in cvscores: print k, ":", cvscores[k] / i + 1 return [best_num, best_score]
def csp_training(raw, picks, nfilters): """ Implement CSP training :param raw: Raw data :return: The csp filter """ epochs_tot = [] y = [] # get event position corresponding to Replace events = find_events(raw, stim_channel='HandStart', verbose=False) # epochs signal for 1.5 second before the movement epochs = Epochs(raw, events, {'during': 1}, 0, 2, proj=False, picks=picks, baseline=None, preload=True, add_eeg_ref=False, verbose=False) epochs_tot.append(epochs) y.extend([1] * len(epochs)) # epochs signal for 1.5 second after the movement, this correspond to the # rest period. epochs_rest = Epochs(raw, events, {'before': 1}, -2, 0, proj=False, picks=picks, baseline=None, preload=True, add_eeg_ref=False, verbose=False) # Workaround to be able to concatenate epochs with MNE epochs_rest.times = epochs.times y.extend([-1] * len(epochs_rest)) epochs_tot.append(epochs_rest) # Concatenate all epochs epochs = concatenate_epochs(epochs_tot) # get data X = epochs.get_data() y = np.array(y) # train CSP csp = CSP(n_components=nfilters, reg='lws') csp.fit(X, y) return csp
def create_fbcsp(bands, n_components=2, transform_into="average_power"): pipeline = [] for low, high in zip(bands[::2], bands[1::2]): pipeline.append( ("pipe", Pipeline([("bandpass_filter", BandPassFilter(low, high)), ("csp", CSP(n_components=n_components, transform_into=transform_into))]))) return FeatureUnion(pipeline)
def cspReduce(X,xTest,y,n_components): if np.size(xTest)==0: raise NoTestError("A test file is needed for dimension reducing, otherwise, test would be biaised.\nAdd '-r 0.8' option when calling mainParams.py") #Reformat X for csp function X = vecToMat(X) print X.shape #Apply CSP csp = CSP(n_components=n_components) X = csp.fit_transform(X,y) xTest = vecToMat(xTest) xTest = csp.transform(xTest) return X,xTest
def train(self, reader, doBalanceLabels): trainFiles = reader.traindata triggers = reader.classtrainTriggers onsets = reader.classtrainOnsets finOnsets = reader.classtrainFinOnsets [self.trainData, self.labels] = utilities.dataLoadFromEDF(self, trainFiles, triggers, onsets, finOnsets, self.params) if (doBalanceLabels): [self.trainData, self.labels] = utilities.balance_labels(self.trainData, self.labels) csp = CSP(n_components=self.numCSP, reg=None, log=True, norm_trace=False) csp.fit(self.trainData, self.labels) fVecs = csp.transform(self.trainData) self.trainResult.cspOp = csp #Shuffle! inds = np.random.permutation(fVecs.shape[0]) fVecs = fVecs[inds, :] labels = self.labels[inds] self.trainResult.mean = np.mean(fVecs, 0) self.trainResult.std = np.std(fVecs, 0) #Norm! for i in range(fVecs.shape[0]): fVecs[i, :] = (fVecs[i, :] - self.trainResult.mean) / self.trainResult.std fTransformed = fVecs #LDA! if not (self.params.finalClassifier is None): [Op, fTransformed ] = utilities.trainClassifier(self.params.finalClassifier, fTransformed, labels) self.trainResult.finalOp = Op self.trainResult.trainTransformedVecs = fTransformed self.trainResult.trainLabels = labels
def model(self, n_components): """ Classification using Linear Discriminant Analysis (lda) Signal decomposition using Common Spatial Patterns (CSP) """ lda = LinearDiscriminantAnalysis() csp = CSP(n_components=n_components, reg=None, log=True, norm_trace=False) clf = Pipeline([('CSP', csp), ('LDA', lda)]) return clf, csp
def build_model(self, data, label): global info, csp_components, low_cut, high_cut print("{}: Building model with {} and {}".format( self.name, np.shape(data), np.shape(label))) MI_epochs = mne.EpochsArray(data, info) MI_epochs.filter(low_cut, high_cut, method='iir') MI_epochs.set_eeg_reference('average', projection=True) self.clf = make_pipeline( CSP(n_components=csp_components, reg=None, log=True, norm_trace=False), LinearDiscriminantAnalysis()) self.clf.fit(MI_epochs.get_data(), label)
def train_model( self, csp_component): #todo csp_component X = (nTrial,nChannel,nTimes) self.csp = CSP(n_components=csp_component, reg=None, log=True, norm_trace=False) self.clf = Pipeline([('CSP', self.csp), ('SCALER_BEFOR', self.scaler_befor_lda), ('LDA', self.lda) ]) #,('SCALER_AFTER',self.scaler_after_lda) self.clf.fit(self.train_data, self.labels) trans_result = self.clf.transform(self.train_data) self.scaler_after_lda.fit(trans_result)
def fit(self, X, y): filtersCount = np.shape(self.filterDiaposones)[0] filters = [] CSPs = [] Pipelines = [] FB = [] for i in range(filtersCount): lowF = self.filterDiaposones[i][0] highF = self.filterDiaposones[i][1] filters.append(Bandpass(self.frequency, lowF, highF, axis=2)) CSPs.append( CSP(n_components=self.n_components, transform_into=self.tranform_info_CSP, reg=None, log=self.log)) Pipelines.append( Pipeline([('filter_' + str(i), filters[i]), ('csp_' + str(i), CSPs[i])])) # Pipelines.append(Pipeline([('filter_' + str(i), filters[i])])) FB.append(('F_' + str(i), Pipelines[i])) FU = FeatureUnion(FB) FU.fit(X, y) CSPfilters = [] for i in range(filtersCount): CSPfilters.append(CSPs[i].filters_[:self.n_components]) self.CSPfilters = CSPfilters buf = np.empty(shape=(np.shape(X)[0], self.n_components * filtersCount, np.shape(X)[2])) for i in range(filtersCount): buf[:, i * self.n_components:i * self.n_components + self.n_components] = np.asarray([ np.dot(self.CSPfilters[i], epoch) for epoch in filters[i].transform(X) ]) X = buf # compute features (mean band power) X = (X**2).mean(axis=2) # To standardize features self.mean_ = X.mean(axis=0) self.std_ = X.std(axis=0) return self
def get_trained_CSP_LDA(data, labels, window_size=None, preprocessing=LowpassWrapper(), step_size=None): """Returns a trained sklearn pipeline of [csp, lda] Parameters ---------- data : np.array Data to train the classifier on Shape (trials, channels, time) labels : np.array 1d array of labels to the training data window_size : int Size in samples (not seconds) the classifier should be trained on If None, the function will trian with each entire trial as input Default None preprocessing : object implementing fit_transform and transform Preprocessing step to add at the beggining of the sklearn pipeline Default BIpy.preprocessing.LowpassWraspper() step_size : int, default None Stride/step size passed to BIpy.data_processing.get_windows() If None, classifier will be trained on raw data and get_windows() is never used Returns ------- clf A trained csp + lda Pipeline """ # slide window over trial data to generate many more data points if step_size and window_size and window_size < data.shape[-1]: data, labels = get_windows(data, labels, window_size, step_size) # make pipeline preproc = preprocessing lda = LinearDiscriminantAnalysis() csp = CSP(n_components=10, reg=None, log=None, norm_trace=False, component_order='alternate') clf = Pipeline([(str(preproc), preproc), ('CSP', csp), ('LDA', lda)]) # train model clf.fit(data, labels) # return trained model return clf
def main(person): h1_list = get_acc(person) h0_list = [] for i in h1_list: j = 1 - i h0_list.append(j) data, label = load_data(person) train_data, train_label, _, _ = seperate_test(data, label, 0) csp = CSP(n_components=4, reg=None, log=False, norm_trace=False) lda = LinearDiscriminantAnalysis() clf = Pipeline([('CSP', csp), ('LDA', lda)]) clf.fit(train_data, train_label) sec = 0 seconds = [] acc_dic = {} for i in range(20): acc_dic["test{}".format(i)] = [] while sec <= 1.4: seconds.append(sec) sec = sec + 0.1 j = 0 with tqdm(total=len(seconds) * 20) as pbar: for second in seconds: _, _, test_data, test_label = seperate_test(data, label, second) test_feature = csp.transform(test_data) p = lda.predict_proba(test_feature) for i in range(20): p_h1 = max(p[i][0], p[i][1]) p_h0 = min(p[i][0], p[i][1]) predict_ = 0 if p[i][0] > p[i][1] else 1 pre_p = p_h1 * h1_list[j] / (p_h1 * h1_list[j] + p_h0 * h0_list[j]) acc_dic["test{}".format(i)].append( [p_h1, p_h0, pre_p, predict_]) pbar.update(1) j = j + 1 return acc_dic, test_label
def SVM(pca_L, pca_RR, labels_data): from sklearn.svm import SVC # noqa from sklearn.model_selection import ShuffleSplit # noqa from sklearn.model_selection import cross_val_score from sklearn.pipeline import make_pipeline from sklearn.ensemble import RandomForestClassifier from sklearn.svm import LinearSVC # Apply different classifiers to test out results very easily # by just defining it here, and adding it to the pipeline after the CSP filter forest_clf = RandomForestClassifier(random_state=42) linear_svc = LinearSVC(random_state=42) # Applies CSP before applying SVC for better results, point of investigation for # obtaining better accuracies from mne.decoding import CSP # noqa clf = make_pipeline( CSP(n_components=4, reg='oas', log=True, norm_trace=False), linear_svc) #SVC(C=1, kernel = 'linear')) # forest_scores = cross_val_score(forest_clf, X_train, y_train, cv = 10) # forest_scores.mean() # If you want to apply a LogisticRegression instead, this is the code for it # # from sklearn.preprocessing import StandardScaler # from mne.decoding import (SlidingEstimator, cross_val_multiscore) # from sklearn.pipeline import make_pipeline # from sklearn.linear_model import LogisticRegression # # # clf = make_pipeline(StandardScaler(), LogisticRegression()) # time_decod = SlidingEstimator(clf, n_jobs=1, scoring='accuracy') # L_score = cross_val_multiscore(time_decod, pca_L, labels_data[0][:,-1], cv=cv, n_jobs=1) # R_score = cross_val_multiscore(time_decod, pca_RR, labels_data[1][:,-1], cv=cv, n_jobs=1) # Use cross validation based on shuflesplit, to split into training and testing - Random cv = ShuffleSplit(n_splits=10, test_size=0.2, random_state=42) L_score = cross_val_score(clf, pca_L, labels_data[0][:, -1], cv=cv) R_score = cross_val_score(clf, pca_RR, labels_data[1][:, -1], cv=cv) print('Left Hemisphere Score', np.mean(L_score)) print('Right Hemisphere Score', np.mean(R_score)) return (L_score, R_score)
def fit(self, X, y): self.csp = [] nyq = 0.5 * self.fs self.labels = np.unique(y) for band in self.bands: b, a = butter(5, band / nyq, btype='band') Xband = filtfilt(b, a, X, axis=2) #trials x ch x time csp_band = [] for c in self.labels: csp_class = CSP(self.n_components).fit(Xband, y == c) csp_band.append(csp_class) self.csp.append(csp_band) #[banda][clase] return self
def fit(self, X, y): # filter and crop X X = self.preprocess_X(X) # Check that X and y have correct shape X, y = check_X_y(X, y, allow_nd=True) # set internal vars self.classes_ = unique_labels(y) self.X_ = X self.y_ = y ################################################ # train / apply CSP with max num filters [self.best_num_filters, best_num_filters_score] = self.self_tune() # now use this insight to really fit # calculate CSP spatial filters csp = CSP(n_components=self.best_num_filters, reg=None, log=True) csp.fit(self.X_, self.y_) # now use CSP spatial filters to transform classification_features = csp.transform(self.X_) # train LDA classifier = LinearDiscriminantAnalysis() classifier.fit(classification_features, self.y_) self.featureTransformer = csp self.classifier = classifier # finish up, set the flag to indicate "fitted" state self.fit_ = True # Return the classifier return self
def classify(epochs, config): n_splits = config['classification']['n_splits'] n_repeats = config['classification']['n_repeats'] classifier = config['classification']['classifier'] csp = CSP(n_components=config['classification']['csp_num_components'], norm_trace=config['classification']['csp_norm_trace']) labels = epochs.events[:, -1] cv = RepeatedKFold(n_splits=n_splits, n_repeats=n_repeats) scores = [] epochs_data = epochs.get_data() for train_idx, test_idx in cv.split(labels): y_train, y_test = labels[train_idx], labels[test_idx] x_train = csp.fit_transform(epochs_data[train_idx], y_train) x_test = csp.transform(epochs_data[test_idx]) classifier.fit(x_train, y_train) scores.append(classifier.score(x_test, y_test)) return np.mean(scores)
def plotCSP(experiments, components, task): epochs = getEpochs(experiments, task) y = epochs.events[:, 2] epochs_data = epochs.get_data() csp = CSP(n_components=components, reg=None, log=False, norm_trace=False) csp.fit_transform(epochs_data, y) csp.plot_patterns(epochs.info, ch_type='eeg', units='Patterns (AU)', size=1.5)
def create_fbcsp(low_freq, n_filters=12, band_overlap=2, band_width=4, n_csp_components=2, n_jobs=1): pipeline_list = [] step = band_width - band_overlap bands = range(low_freq, low_freq + n_filters * step, step) for low in bands: pipeline_list.append( ("pipe%d" % low, Pipeline([("filter", BandPassFilter(low, low + band_width)), ("csp", CSP(n_components=n_csp_components))]))) return FeatureUnion(pipeline_list, n_jobs=n_jobs)
def self_tune(self, X, y, verbose=False): # fix random seed for reproducibility seed = 5 np.random.seed(seed) # define k-fold cross validation test harness kfold = StratifiedKFold(y=y, n_folds=self.tuning_csp_num_folds, shuffle=True, random_state=seed) # init scores cvscores = {} for i in xrange(1,self.num_spatial_filters): cvscores[i+1] = 0 for i, (train, test) in enumerate(kfold): # calculate CSP spatial filters csp = CSP(n_components=self.num_spatial_filters) csp.fit(X[train], y[train]) # try all filters, from the given num down to 2 # (1 is too often found to be overfitting) for j in xrange(2,self.num_spatial_filters): num_filters_to_try = j # calculate spatial filters csp.n_components = num_filters_to_try # apply CSP filters to train data tuning_train_LDA_features = csp.transform(X[train]) np.nan_to_num(tuning_train_LDA_features) check_X_y(tuning_train_LDA_features, y[train]) # apply CSP filters to test data tuning_test_LDA_features = csp.transform(X[test]) np.nan_to_num(tuning_test_LDA_features) check_X_y(tuning_test_LDA_features, y[test]) # train LDA lda = LinearDiscriminantAnalysis() prediction_score = lda.fit(tuning_train_LDA_features, y[train]).score(tuning_test_LDA_features, y[test]) cvscores[num_filters_to_try] += prediction_score if verbose: print "prediction score", prediction_score, "with",num_filters_to_try,"spatial filters" best_num = max(cvscores, key=cvscores.get) best_score = cvscores[best_num] / i+1 if verbose: print "best num filters:", best_num, "(average accuracy ",best_score,")" print "average scores per filter num:" for k in cvscores: print k,":", cvscores[k]/i+1 return [best_num, best_score]
def fit_transform(self, data, label): data_bank = dict() csp_feat = dict() for i in range(self.n_bank): # get each freq filter bank data_bank[i] = self.bank_filter(data, self.low[i], self.high[i], self.sample_rate) # extract csp feature for each bank self.csp_bank[i] = CSP(n_components=4, reg=None, log=True, norm_trace=False) self.csp_bank[i].fit(data_bank[i], label) csp_feat[i] = self.csp_bank[i].transform(data_bank[i]) try: feature except NameError: feature = csp_feat[i] else: feature = np.hstack([feature, csp_feat[i]]) return feature
def analyzeCSP(components, examples, targets): csp = CSP(n_components=components, reg=None, log=True, norm_trace=False) csp_examples = csp.fit_transform(examples, targets) channel_names = np.loadtxt('./metadata/channel_names.csv', dtype=str) bci_info = create_info(channel_names.tolist(), 240, ch_types='eeg', montage='biosemi64') csp.plot_patterns(bci_info, ch_type='eeg').savefig("CSP Patterns.png") csp.plot_filters(bci_info, ch_type='eeg').savefig("CSP Filters.png") return csp_examples, targets
def eval_classification(max_spatial_filters, train_X, train_y, test_X, test_y, verbose=False): # Assemble a classifier # train / apply CSP with max num filters csp = CSP(n_components=max_spatial_filters, reg=None, log=True) csp.fit(train_X, train_y) best_num = 0 best_score = 0.0 # try at least 6 filters for i in xrange(1, 6): #max_spatial_filters): num_filters_to_try = i + 1 if verbose: print "trying with first", num_filters_to_try, "spatial filters" # apply CSP filters to train data csp.n_components = num_filters_to_try train_feat = csp.transform(train_X) # apply CSP filters to test data test_feat = csp.transform(test_X) # train LDA lda = LinearDiscriminantAnalysis() prediction_score = lda.fit(train_feat, train_y).score(test_feat, test_y) if prediction_score > best_score: best_score = prediction_score best_num = num_filters_to_try if verbose: print "prediction score", prediction_score print "prediction score", best_score print "best filters:", best_num return [best_score, best_num]
def eval_classification(max_spatial_filters, train_X, train_y, test_X, test_y, verbose=False): # Assemble a classifier # train / apply CSP with max num filters csp = CSP(n_components=max_spatial_filters, reg=None, log=True) csp.fit(train_X, train_y) best_num = 0 best_score = 0.0 # try at least 6 filters for i in xrange(1, 6): # max_spatial_filters): num_filters_to_try = i + 1 if verbose: print "trying with first", num_filters_to_try, "spatial filters" # apply CSP filters to train data csp.n_components = num_filters_to_try train_feat = csp.transform(train_X) # apply CSP filters to test data test_feat = csp.transform(test_X) # train LDA lda = LinearDiscriminantAnalysis() prediction_score = lda.fit(train_feat, train_y).score(test_feat, test_y) if prediction_score > best_score: best_score = prediction_score best_num = num_filters_to_try if verbose: print "prediction score", prediction_score print "prediction score", best_score print "best filters:", best_num return [best_score, best_num]
def run(self): self.csp = CSP(reg='ledoit_wolf') self.csp.fit(self.datas, self.labels) self.clf = svm.SVC() self.clf.fit(self.csp.transform(self.datas), self.labels)
# Read epochs (train will be done only between 1 and 2s) # Testing will be done with a running classifier epochs = Epochs(raw, events, event_id, tmin, tmax, proj=True, picks=picks, baseline=None, preload=True, add_eeg_ref=False) epochs_train = epochs.crop(tmin=1., tmax=2., copy=True) labels = epochs.events[:, -1] - 2 ############################################################################### # Classification with linear discrimant analysis from sklearn.lda import LDA # noqa from sklearn.cross_validation import ShuffleSplit # noqa # Assemble a classifier svc = LDA() csp = CSP(n_components=4, reg=None, log=True) # Define a monte-carlo cross-validation generator (reduce variance): cv = ShuffleSplit(len(labels), 10, test_size=0.2, random_state=42) scores = [] epochs_data = epochs.get_data() epochs_data_train = epochs_train.get_data() # Use scikit-learn Pipeline with cross_val_score function from sklearn.pipeline import Pipeline # noqa from sklearn.cross_validation import cross_val_score # noqa clf = Pipeline([('CSP', csp), ('SVC', svc)]) scores = cross_val_score(clf, epochs_data_train, labels, cv=cv, n_jobs=1) # Printing the results class_balance = np.mean(labels == labels[0])
# .. topic:: Examples # # * :ref:`sphx_glr_auto_examples_decoding_plot_decoding_csp_eeg.py` # * :ref:`sphx_glr_auto_examples_decoding_plot_decoding_csp_timefreq.py` # # .. note:: # # The winning entry of the Grasp-and-lift EEG competition in Kaggle used # the :class:`~mne.decoding.CSP` implementation in MNE and was featured as # a `script of the week`_. # # .. _script of the week: http://blog.kaggle.com/2015/08/12/july-2015-scripts-of-the-week/ # noqa # # We can use CSP with these data with: csp = CSP(n_components=3, norm_trace=False) clf = make_pipeline(csp, LogisticRegression(solver='lbfgs')) scores = cross_val_multiscore(clf, X, y, cv=5, n_jobs=1) print('CSP: %0.1f%%' % (100 * scores.mean(),)) ############################################################################### # Source power comodulation (SPoC) # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ # Source Power Comodulation (:class:`mne.decoding.SPoC`) [3]_ # identifies the composition of # orthogonal spatial filters that maximally correlate with a continuous target. # # SPoC can be seen as an extension of the CSP where the target is driven by a # continuous variable rather than a discrete variable. Typical applications # include extraction of motor patterns using EMG power or audio patterns using # sound envelope.
# Workaround to be able to concatenate epochs with MNE epochs_rest.times = epochs.times y.extend([-1]*len(epochs_rest)) epochs_tot.append(epochs_rest) # Concatenate all epochs epochs = concatenate_epochs(epochs_tot) # get data X = epochs.get_data() y = np.array(y) # train CSP csp = CSP(n_components=nfilters, reg='lws') csp.fit(X,y) ################ Create Training Features ################################# # apply csp filters and rectify signal feat = np.dot(csp.filters_[0:nfilters],raw._data[picks])**2 # smoothing by convolution with a rectangle window feattr = np.array(Parallel(n_jobs=-1)(delayed(convolve)(feat[i],boxcar(nwin),'full') for i in range(nfilters))) feattr = np.log(feattr[:,0:feat.shape[1]]) # training labels # they are stored in the 6 last channels of the MNE raw object labels = raw._data[32:] ################ Create test Features #####################################
def fit(self, X, y): # validate X, y = check_X_y(X, y, allow_nd=True) X = sklearn.utils.validation.check_array(X, allow_nd=True) # set internal vars self.classes_ = unique_labels(y) self.X_ = X self.y_ = y ################################################## # split X into train and test sets, so that # grid search can be performed on train set only seed = 7 np.random.seed(seed) #X_TRAIN, X_TEST, y_TRAIN, y_TEST = train_test_split(X, y, test_size=0.25, random_state=seed) for epoch_trim in self.epoch_bounds: for bandpass in self.bandpass_filters: X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=seed) # X_train = np.copy(X_TRAIN) # X_test = np.copy(X_TEST) # y_train = np.copy(y_TRAIN) # y_test = np.copy(y_TEST) # separate out inputs that are tuples bandpass_start,bandpass_end = bandpass epoch_trim_start,epoch_trim_end = epoch_trim # bandpass filter coefficients b, a = butter(5, np.array([bandpass_start, bandpass_end])/(self.sfreq*0.5), 'bandpass') # filter and crop TRAINING SET X_train = self.preprocess_X(X_train, b, a, epoch_trim_start, epoch_trim_end) # validate X_train, y_train = check_X_y(X_train, y_train, allow_nd=True) X_train = sklearn.utils.validation.check_array(X_train, allow_nd=True) # filter and crop TEST SET X_test = self.preprocess_X(X_test, b, a, epoch_trim_start, epoch_trim_end) # validate X_test, y_test = check_X_y(X_test, y_test, allow_nd=True) X_test = sklearn.utils.validation.check_array(X_test, allow_nd=True) ########################################################################### # self-tune CSP to find optimal number of filters to use at these settings [best_num_filters, best_num_filters_score] = self.self_tune(X_train, y_train) # as an option, we could tune optimal CSP filter num against complete train set #X_tune = self.preprocess_X(X, b, a, epoch_trim_start, epoch_trim_end) #[best_num_filters, best_num_filters_score] = self.self_tune(X_tune, y) # now use this insight to really fit with optimal CSP spatial filters """ reg : float | str | None (default None) if not None, allow regularization for covariance estimation if float, shrinkage covariance is used (0 <= shrinkage <= 1). if str, optimal shrinkage using Ledoit-Wolf Shrinkage ('ledoit_wolf') or Oracle Approximating Shrinkage ('oas'). """ transformer = CSP(n_components=best_num_filters, reg='ledoit_wolf') transformer.fit(X_train, y_train) # use these CSP spatial filters to transform train and test spatial_filters_train = transformer.transform(X_train) spatial_filters_test = transformer.transform(X_test) # put this back in as failsafe if NaN or inf starts cropping up # spatial_filters_train = np.nan_to_num(spatial_filters_train) # check_X_y(spatial_filters_train, y_train) # spatial_filters_test = np.nan_to_num(spatial_filters_test) # check_X_y(spatial_filters_test, y_test) # train LDA classifier = LinearDiscriminantAnalysis() classifier.fit(spatial_filters_train, y_train) score = classifier.score(spatial_filters_test, y_test) print "current score",score print "bandpass:"******"epoch window:",epoch_trim_start,epoch_trim_end print best_num_filters,"filters chosen" # put in ranked order Top 10 list idx = bisect(self.ranked_scores, score) self.ranked_scores.insert(idx, score) self.ranked_scores_opts.insert(idx, dict(bandpass=bandpass,epoch_trim=epoch_trim,filters=best_num_filters)) self.ranked_classifiers.insert(idx,classifier) self.ranked_transformers.insert(idx,transformer) if len(self.ranked_scores) > self.num_votes: self.ranked_scores.pop(0) if len(self.ranked_scores_opts) > self.num_votes: self.ranked_scores_opts.pop(0) if len(self.ranked_classifiers) > self.num_votes: self.ranked_classifiers.pop(0) if len(self.ranked_transformers) > self.num_votes: self.ranked_transformers.pop(0) print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" print "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^" print " T O P ", self.num_votes, " C L A S S I F I E R S" print #j=1 for i in xrange(len(self.ranked_scores)): print i,",",round(self.ranked_scores[i],4),",", print self.ranked_scores_opts[i] # finish up, set the flag to indicate "fitted" state self.fit_ = True # Return the classifier return self
# Read epochs epochs = mne.Epochs(raw, events, event_id, tmin, tmax, proj=True, picks=picks, baseline=None, preload=True) labels = epochs.events[:, -1] evoked = epochs.average() ############################################################################### # Decoding in sensor space using a linear SVM from sklearn.svm import SVC # noqa from sklearn.cross_validation import ShuffleSplit # noqa from mne.decoding import CSP # noqa n_components = 3 # pick some components svc = SVC(C=1, kernel="linear") csp = CSP(n_components=n_components) # Define a monte-carlo cross-validation generator (reduce variance): cv = ShuffleSplit(len(labels), 10, test_size=0.2, random_state=42) scores = [] epochs_data = epochs.get_data() for train_idx, test_idx in cv: y_train, y_test = labels[train_idx], labels[test_idx] X_train = csp.fit_transform(epochs_data[train_idx], y_train) X_test = csp.transform(epochs_data[test_idx]) # fit classifier svc.fit(X_train, y_train)
epochs_train = epochs.copy().crop(tmin=1., tmax=2.) labels = epochs.events[:, -1] - 2 ############################################################################### # Classification with linear discrimant analysis # Define a monte-carlo cross-validation generator (reduce variance): scores = [] epochs_data = epochs.get_data() epochs_data_train = epochs_train.get_data() cv = ShuffleSplit(10, test_size=0.2, random_state=42) cv_split = cv.split(epochs_data_train) # Assemble a classifier lda = LinearDiscriminantAnalysis() csp = CSP(n_components=4, reg=None, log=True, norm_trace=False) # Use scikit-learn Pipeline with cross_val_score function clf = Pipeline([('CSP', csp), ('LDA', lda)]) scores = cross_val_score(clf, epochs_data_train, labels, cv=cv, n_jobs=1) # Printing the results class_balance = np.mean(labels == labels[0]) class_balance = max(class_balance, 1. - class_balance) print("Classification accuracy: %f / Chance level: %f" % (np.mean(scores), class_balance)) # plot CSP patterns estimated on full data for visualization csp.fit_transform(epochs_data, labels) layout = read_layout('EEG1005')