def test_MDM_transform(): """Test transform of MDM""" covset = generate_cov(100,3) labels = np.array([0,1]).repeat(50) mdm = MDM(metric='riemann') mdm.fit(covset,labels) mdm.transform(covset)
def fit(self, X, y): self.classes_ = unique_labels(y) self._mdm = MDM(metric=self.metric, n_jobs=self.n_jobs) self._fgda = FGDA(metric=self.metric_mean, tsupdate=self.tsupdate) cov = self._fgda.fit_transform(X, y) self._mdm.fit(cov, y) return self
def test_MDM_predict(): """Test prediction of MDM""" covset = generate_cov(100,3) labels = np.array([0,1]).repeat(50) mdm = MDM(metric='riemann') mdm.fit(covset,labels) mdm.predict(covset)
def N170_test(session_data): markers = N170_MARKERS epochs = get_session_erp_epochs(session_data, markers) conditions = OrderedDict() for i in range(len(markers)): conditions[markers[i]] = [i+1] clfs = OrderedDict() clfs['Vect + LR'] = make_pipeline(Vectorizer(), StandardScaler(), LogisticRegression()) clfs['Vect + RegLDA'] = make_pipeline(Vectorizer(), LDA(shrinkage='auto', solver='eigen')) clfs['ERPCov + TS'] = make_pipeline(ERPCovariances(estimator='oas'), TangentSpace(), LogisticRegression()) clfs['ERPCov + MDM'] = make_pipeline(ERPCovariances(estimator='oas'), MDM()) clfs['XdawnCov + TS'] = make_pipeline(XdawnCovariances(estimator='oas'), TangentSpace(), LogisticRegression()) clfs['XdawnCov + MDM'] = make_pipeline(XdawnCovariances(estimator='oas'), MDM()) methods_list = ['Vect + LR','Vect + RegLDA','ERPCov + TS','ERPCov + MDM','XdawnCov + TS','XdawnCov + MDM'] # format data epochs.pick_types(eeg=True) X = epochs.get_data() * 1e6 times = epochs.times y = epochs.events[:, -1] # define cross validation cv = StratifiedShuffleSplit(n_splits=20, test_size=0.25, random_state=42) # run cross validation for each pipeline auc = [] methods = [] print('Calcul in progress...') for m in clfs: try: res = cross_val_score(clfs[m], X, y==2, scoring='roc_auc', cv=cv, n_jobs=-1) auc.extend(res) methods.extend([m]*len(res)) except Exception: print("exception") ## Plot Decoding Results results = pd.DataFrame(data=auc, columns=['AUC']) results['Method'] = methods n_row,n_column = results.shape auc_means = [] for method in methods_list: auc = [] for i in range(n_row): if results.loc[i,'Method']== method: auc.append(results.loc[i,'AUC']) auc_means.append(np.mean(auc)) counter = 0 for i in range(len(methods_list)): color = 'green' if auc_means[i]>=0.7 else 'red' counter = counter +1 if auc_means[i]>=0.7 else counter return counter > 0, counter
def create_mdm(raw, event_id): tmin, tmax = -1., 4. events = find_events(raw, shortest_event=0, stim_channel='STI 014') epochs = Epochs(raw, events, event_id, tmin, tmax, proj=True, baseline=None, preload=True, verbose=False) labels = epochs.events[:, -1] epochs_data_train = epochs.get_data()[:, :-1] cov_data_train = Covariances().transform(epochs_data_train) mdm = MDM(metric=dict(mean='riemann', distance='riemann')) mdm.fit(cov_data_train, labels) return mdm
def test_MDM_init(): """Test init of MDM""" mdm = MDM(metric='riemann') # Should raise if metric not string or dict assert_raises(TypeError, MDM, metric=42) # Should raise if metric is not contain bad keys assert_raises(KeyError, MDM, metric={'universe': 42}) #should works with correct dict mdm = MDM(metric={'mean': 'riemann', 'distance': 'logeuclid'})
def erp_cov_vr_pc(X_training, labels_training, X_test, labels_test, class_name, class_info): # estimate the extended ERP covariance matrices with Xdawn erpc = ERPCovariances(classes=[class_info[class_name]], estimator='lwf') erpc.fit(X_training, labels_training) covs_training = erpc.transform(X_training) covs_test = erpc.transform(X_test) # get the AUC for the classification clf = MDM() clf.fit(covs_training, labels_training) labels_pred = clf.predict(covs_test) return roc_auc_score(labels_test, labels_pred)
def fit(self, X, y): """Fit.""" self.mdm = MDM(metric=self.metric_mean, n_jobs=self.n_jobs) labels = y[::self.subsample] pCalcMeans = partial(mean_covariance, metric=self.metric_mean) pool = Pool(processes=6) mc1 = pool.map(pCalcMeans, [X[labels[:, i] == 1] for i in range(6)]) pool.close() pool = Pool(processes=6) mc0 = pool.map(pCalcMeans, [X[labels[:, i] == 0] for i in range(6)]) pool.close() self.mdm.covmeans = mc1 + mc0 return self
def ml_classifier(inputs, targets, classifier=None, pipeline=None): """Uses sklearn to fit a model given inputs and targets Args: inputs: list containing (N trials * M channels) data segments of length(number of features). targets: list containing (N trials * M channels) of marker data (0 or 1). classifier: pre-trained lda classifier; if None train from scratch pipeline: name of pipeline to create if classifier is None Returns: classifier: classifier object """ pipeline_dict = { 'vect_lr': make_pipeline(Vectorizer(), StandardScaler(), LogisticRegression()), 'vecct_reglda': make_pipeline(Vectorizer(), LDA(shrinkage='auto', solver='eigen')), 'xdawn_reglda': make_pipeline(Xdawn(2, classes=[1]), Vectorizer(), LDA(shrinkage='auto', solver='eigen')), 'erpcov_ts': make_pipeline(ERPCovariances(), TangentSpace(), LogisticRegression()), 'erpcov_mdm': make_pipeline(ERPCovariances(), MDM()) } if not classifier and pipeline: classifier = pipeline_dict[pipeline.lower()] classifier.fit(inputs, targets) return classifier
def fit(self, X, y): """fit.""" # Create ERP and get cov mat self.ERP = ERP(self.window, self.nfilters, self.subsample) train_cov = self.ERP.fit_transform(X, y) labels_train = self.ERP.labels_train # Add rest epochs rest_cov = self._get_rest_cov(X, y) train_cov = np.concatenate((train_cov, rest_cov), axis=0) labels_train = np.concatenate((labels_train, [0] * len(rest_cov))) # fit MDM self.MDM = MDM(metric=self.metric, n_jobs=self.n_jobs) self.MDM.fit(train_cov, labels_train) self._fitted = True return self
def test_MDM_transform(): """Test transform of MDM""" covset = generate_cov(100, 3) labels = np.array([0, 1]).repeat(50) mdm = MDM(metric='riemann') mdm.fit(covset, labels) mdm.transform(covset)
def test_MDM_predict(): """Test prediction of MDM""" covset = generate_cov(100, 3) labels = np.array([0, 1]).repeat(50) mdm = MDM(metric='riemann') mdm.fit(covset, labels) mdm.predict(covset)
class FgMDM2(BaseEstimator, ClassifierMixin, TransformerMixin): def __init__(self, metric='riemann', tsupdate=False, n_jobs=1): """Init.""" self.metric = metric self.n_jobs = n_jobs self.tsupdate = tsupdate if isinstance(metric, str): self.metric_mean = metric elif isinstance(metric, dict): # check keys for key in ['mean', 'distance']: if key not in metric.keys(): raise KeyError('metric must contain "mean" and "distance"') self.metric_mean = metric['mean'] else: raise TypeError('metric must be dict or str') def fit(self, X, y): self.classes_ = unique_labels(y) self._mdm = MDM(metric=self.metric, n_jobs=self.n_jobs) self._fgda = FGDA(metric=self.metric_mean, tsupdate=self.tsupdate) cov = self._fgda.fit_transform(X, y) self._mdm.fit(cov, y) return self def predict(self, X): cov = self._fgda.transform(X) return self._mdm.predict(cov) def predict_proba(self, X): cov = self._fgda.transform(X) return self._mdm.predict_proba(cov) def transform(self, X): cov = self._fgda.transform(X) return self._mdm.transform(cov)
def check_other_classifiers(train_X, train_y, test_X, test_y): from pyriemann.classification import MDM, TSclassifier from sklearn.linear_model import LogisticRegression from pyriemann.estimation import Covariances from sklearn.pipeline import Pipeline from mne.decoding import CSP import seaborn as sns import pandas as pd train_y = [np.where(i == 1)[0][0] for i in train_y] test_y = [np.where(i == 1)[0][0] for i in test_y] cov_data_train = Covariances().transform(train_X) cov_data_test = Covariances().transform(test_X) cv = KFold(n_splits=10, random_state=42) clf = TSclassifier() scores = cross_val_score(clf, cov_data_train, train_y, cv=cv, n_jobs=1) print("Tangent space Classification accuracy: ", np.mean(scores)) clf = TSclassifier() clf.fit(cov_data_train, train_y) print(clf.score(cov_data_test, test_y)) mdm = MDM(metric=dict(mean='riemann', distance='riemann')) scores = cross_val_score(mdm, cov_data_train, train_y, cv=cv, n_jobs=1) print("MDM Classification accuracy: ", np.mean(scores)) mdm = MDM() mdm.fit(cov_data_train, train_y) fig, axes = plt.subplots(1, 2) ch_names = [ch for ch in range(8)] df = pd.DataFrame(data=mdm.covmeans_[0], index=ch_names, columns=ch_names) g = sns.heatmap(df, ax=axes[0], square=True, cbar=False, xticklabels=2, yticklabels=2) g.set_title('Mean covariance - feet') df = pd.DataFrame(data=mdm.covmeans_[1], index=ch_names, columns=ch_names) g = sns.heatmap(df, ax=axes[1], square=True, cbar=False, xticklabels=2, yticklabels=2) plt.xticks(rotation='vertical') plt.yticks(rotation='horizontal') g.set_title('Mean covariance - hands') # dirty fix plt.sca(axes[0]) plt.xticks(rotation='vertical') plt.yticks(rotation='horizontal') plt.savefig("meancovmat.png") plt.show()
class DistanceCalculatorAlex(BaseEstimator, TransformerMixin): """Distance Calulator Based on MDM.""" def __init__(self, metric_mean='logeuclid', metric_dist=['riemann'], n_jobs=7, subsample=10): """Init.""" self.metric_mean = metric_mean self.metric_dist = metric_dist self.n_jobs = n_jobs self.subsample = subsample def fit(self, X, y): """Fit.""" self.mdm = MDM(metric=self.metric_mean, n_jobs=self.n_jobs) labels = np.squeeze(create_sequence(y.T)[::self.subsample]) self.mdm.fit(X, labels) return self def transform(self, X, y=None): """Transform.""" feattr = [] for metric in self.metric_dist: self.mdm.metric_dist = metric feat = self.mdm.transform(X) # substract distance of the class 0 feat = feat[:, 1:] - np.atleast_2d(feat[:, 0]).T feattr.append(feat) feattr = np.concatenate(feattr, axis=1) feattr[np.isnan(feattr)] = 0 return feattr def fit_transform(self, X, y): """Fit and transform.""" self.fit(X, y) return self.transform(X)
class wrapper_MDM(machine_learning_method): """wrapper for pyriemann MDM""" def __init__(self, method_name, method_args): super(wrapper_MDM, self).__init__(method_name, method_args) self.init_method() def init_method(self, n_jobs=1): self.classifier = MDM(metric=self.method_args['metric'], n_jobs=n_jobs) def set_parallel(self, is_parallel=False, n_jobs=8): logging.warning( 'The call to this set_parallel method is reseting the class, and must be fitted again' ) self.parallel = is_parallel self.n_jobs = n_jobs if self.parallel: self.init_method(n_jobs) def fit(self, X, y): return self.classifier.fit(X, y) def predict(self, X): return self.classifier.predict(X)
class DistanceCalculatorRafal(BaseEstimator, TransformerMixin): """Distance Calulator Based on MDM Rafal style.""" def __init__(self, metric_mean='logeuclid', metric_dist=['riemann'], n_jobs=12, subsample=10): """Init.""" self.metric_mean = metric_mean self.metric_dist = metric_dist self.n_jobs = n_jobs self.subsample = subsample def fit(self, X, y): """Fit.""" self.mdm = MDM(metric=self.metric_mean, n_jobs=self.n_jobs) labels = y[::self.subsample] pCalcMeans = partial(mean_covariance, metric=self.metric_mean) pool = Pool(processes=6) mc1 = pool.map(pCalcMeans, [X[labels[:, i] == 1] for i in range(N_EVENTS)]) pool.close() pool = Pool(processes=6) mc0 = pool.map(pCalcMeans, [X[labels[:, i] == 0] for i in range(N_EVENTS)]) pool.close() self.mdm.covmeans = mc1 + mc0 return self def transform(self, X, y=None): """Transform.""" feattr = [] for metric in self.metric_dist: self.mdm.metric_dist = metric feat = self.mdm.transform(X) # print 'feat', feat, feat.shape # substract distance of the class 0 feat = feat[:, 0:N_EVENTS] - feat[:, N_EVENTS:] feattr.append(feat) feattr = np.concatenate(feattr, axis=1) feattr[np.isnan(feattr)] = 0 return feattr def fit_transform(self, X, y): """Fit and transform.""" self.fit(X, y) return self.transform(X)
class DistanceCalculatorRafal(BaseEstimator, TransformerMixin): """Distance Calulator Based on MDM Rafal style.""" def __init__(self, metric_mean='logeuclid', metric_dist=['riemann'], n_jobs=12, subsample=10): """Init.""" self.metric_mean = metric_mean self.metric_dist = metric_dist self.n_jobs = n_jobs self.subsample = subsample def fit(self, X, y): """Fit.""" self.mdm = MDM(metric=self.metric_mean, n_jobs=self.n_jobs) labels = y[::self.subsample] pCalcMeans = partial(mean_covariance, metric=self.metric_mean) pool = Pool(processes=6) mc1 = pool.map(pCalcMeans, [X[labels[:, i] == 1] for i in range(6)]) pool.close() pool = Pool(processes=6) mc0 = pool.map(pCalcMeans, [X[labels[:, i] == 0] for i in range(6)]) pool.close() self.mdm.covmeans = mc1 + mc0 return self def transform(self, X, y=None): """Transform.""" feattr = [] for metric in self.metric_dist: self.mdm.metric_dist = metric feat = self.mdm.transform(X) # substract distance of the class 0 feat = feat[:, 0:6] - feat[:, 6:] feattr.append(feat) feattr = np.concatenate(feattr, axis=1) feattr[np.isnan(feattr)] = 0 return feattr def fit_transform(self, X, y): """Fit and transform.""" self.fit(X, y) return self.transform(X)
def get_score(subject=7, runs=[6, 10, 14], event_id=dict(hands=2, feet=3)): tmin, tmax = -1., 4. raw = get_raw(subject, runs) events = find_events(raw, shortest_event=0, stim_channel='STI 014') epochs = Epochs(raw, events, event_id, tmin, tmax, proj=True, baseline=None, preload=True, verbose=False) labels = epochs.events[:, -1] # cv = KFold(len(labels), 10, shuffle=True, random_state=42) epochs_data_train = epochs.get_data()[:, :-1] cov_data_train = Covariances().transform(epochs_data_train) ############################################################################### # Classification with Minimum distance to mean mdm = MDM(metric=dict(mean='riemann', distance='riemann')) pl = Pipeline([("mdm", mdm)]) params = {"mdm__metric": [dict(mean='riemann', distance='riemann')]} clf = GridSearchCV(pl, params, n_jobs=-1, cv=5, return_train_score=True) clf.fit(cov_data_train, labels) df = pd.DataFrame(clf.cv_results_) return df
class ERPDistance(BaseEstimator, TransformerMixin): """ERP distance cov estimator. This transformer estimates Riemannian distance for ERP covariance matrices. After estimation of special form ERP covariance matrices using the ERP transformer, a MDM [1] algorithm is used to compute Riemannian distance. References: [1] A. Barachant, S. Bonnet, M. Congedo and C. Jutten, "Multiclass Brain-Computer Interface Classification by Riemannian Geometry," in IEEE Transactions on Biomedical Engineering, vol. 59, no. 4, p. 920-928, 2012 """ def __init__(self, window=500, nfilters=3, subsample=1, metric='riemann', n_jobs=1): """Init.""" self.window = window self.nfilters = nfilters self.subsample = subsample self.metric = metric self.n_jobs = n_jobs self._fitted = False def fit(self, X, y): """fit.""" # Create ERP and get cov mat self.ERP = ERP(self.window, self.nfilters, self.subsample) train_cov = self.ERP.fit_transform(X, y) labels_train = self.ERP.labels_train # Add rest epochs rest_cov = self._get_rest_cov(X, y) train_cov = np.concatenate((train_cov, rest_cov), axis=0) labels_train = np.concatenate((labels_train, [0] * len(rest_cov))) # fit MDM self.MDM = MDM(metric=self.metric, n_jobs=self.n_jobs) self.MDM.fit(train_cov, labels_train) self._fitted = True return self def transform(self, X, y=None): """Transform.""" test_cov = self.ERP.transform(X) dist = self.MDM.transform(test_cov) dist = dist[:, 1:] - np.atleast_2d(dist[:, 0]).T return dist def update_subsample(self, old_sub, new_sub): """update subsampling.""" if self._fitted: self.ERP.update_subsample(old_sub, new_sub) def _get_rest_cov(self, X, y): """Sample rest epochs from data and compute the cov mat.""" ix = np.where(np.diff(y[:, 0]) == 1)[0] rest = [] offset = -self.window for i in ix: start = i + offset - self.window stop = i + offset rest.append(self.ERP.erp_cov(X[slice(start, stop)].T)) return np.array(rest)
import moabb from moabb.datasets import BNCI2014001 from moabb.paradigms import LeftRightImagery # load the data subject = 1 dataset = BNCI2014001() paradigm = LeftRightImagery() X, labels, meta = paradigm.get_data(dataset, subjects=[subject]) X = X[meta['session'] == 'session_E'] covs = Covariances(estimator='oas').fit_transform(X) labs = labels[meta['session'] == 'session_E'] # define the pipelines for classification -- MDM and MeansField classifier pipelines = {} pipelines['MDM'] = MDM() plist = [1.00, 0.75, 0.50, 0.25, 0.10, 0.01, -0.01, -0.10, -0.25, -0.50, -0.75, -1.00] pipelines['MeansField'] = power_means.MeanFieldClassifier(plist=plist) # perform the KFold cross-validation procedure with stratified segments # (same proportion of labels form each class on every fold) n_splits = 5 kf = StratifiedKFold(n_splits) scores = {} for pipeline_name in pipelines.keys(): scores[pipeline_name] = 0 for train_idx, test_idx in tqdm(kf.split(covs, labs), total=n_splits): covs_train, labs_train = covs[train_idx], labs[train_idx] covs_test, labs_test = covs[test_idx], labs[test_idx] for pipeline_name in pipelines.keys(): pipelines[pipeline_name].fit(covs_train, labs_train)
print('sample drop %: ', (1 - len(epochs.events) / len(events)) * 100) epochs ################################################################################################### # Run classification # ---------------------------- clfs = OrderedDict() clfs['Vect + LR'] = make_pipeline(Vectorizer(), StandardScaler(), LogisticRegression()) clfs['Vect + RegLDA'] = make_pipeline(Vectorizer(), LDA(shrinkage='auto', solver='eigen')) clfs['ERPCov + TS'] = make_pipeline(ERPCovariances(estimator='oas'), TangentSpace(), LogisticRegression()) clfs['ERPCov + MDM'] = make_pipeline(ERPCovariances(estimator='oas'), MDM()) clfs['XdawnCov + TS'] = make_pipeline(XdawnCovariances(estimator='oas'), TangentSpace(), LogisticRegression()) clfs['XdawnCov + MDM'] = make_pipeline(XdawnCovariances(estimator='oas'), MDM()) # format data epochs.pick_types(eeg=True) X = epochs.get_data() * 1e6 times = epochs.times y = epochs.events[:, -1] # define cross validation cv = StratifiedShuffleSplit(n_splits=20, test_size=0.25, random_state=42) # run cross validation for each pipeline
filtered_offline_signal = _bandpass_filter(offline_raw, frequencies, frequency_range) offline_raw = createRaw(filtered_offline_signal, offline_raw, filtered=True) offline_epochs = Epochs(offline_raw, offline_events, event_id, tmin=0, tmax=5, baseline=None) offline_epochs_data = offline_epochs.get_data() # Creating ML model offline_cov_matrix = Covariances( estimator='lwf').transform(offline_epochs_data) mdm = MDM(metric=dict(mean='riemann', distance='riemann')) mdm.fit(offline_cov_matrix, labels) # Evoking trials to simulate online input iter_evoked = epochs.iter_evoked() epochs_data = offline_epochs_data time_array = [] pre_predict = mdm.predict(offline_cov_matrix) print("Labels: ") print(labels) for i, evoked in enumerate(iter_evoked): evoked_raw = createRaw(evoked.data, raw, filtered=False)
def get_score(subject=7, runs=[6, 10, 14], event_id=dict(hands=2, feet=3)): if subject in EXCLUDE_SUBJECTS: return tmin, tmax = -1., 4. weights = np.arange(0.1, 1.0, 0.1) for weight in weights: first_sub = 2 if subject == 1 else 1 raw = get_raw(subject, runs) scores = [] for i in range(first_sub, TRANS_SUBJECT_COUNT): print(i) if i == subject or (i in EXCLUDE_SUBJECTS): continue raw.append(get_raw(i, runs)) events = find_events(raw, shortest_event=0, stim_channel='STI 014') epochs = Epochs(raw, events, event_id, tmin, tmax, proj=True, baseline=None, preload=True, verbose=False) labels = epochs.events[:, -1] epochs_data_train = 1e6*epochs.get_data()[:, :-1] cov_data_train = Covariances().transform(epochs_data_train) target_sample_weight_base = np.ones(EPOCH_COUNT)*weight others_sample_weight_base = np.ones( len(epochs)-EPOCH_COUNT)*(1.-weight) sample_weight = np.hstack( (target_sample_weight_base, others_sample_weight_base)) others_size = others_sample_weight_base.size others_index = np.arange(others_size) mdm = MDM(metric=dict(mean='riemann', distance='riemann')) cv = KFold(n_splits=5, shuffle=True, random_state=42) train_scores = [] test_scores = [] dumy_array = np.ones(EPOCH_COUNT) for train_index, test_index in cv.split(dumy_array): train_index = np.hstack( (others_index, train_index+others_size)) x = cov_data_train[train_index] y = labels[train_index] mdm.fit(x, y, sample_weight=sample_weight[train_index]) score = (mdm.predict(x) == y).sum()/len(train_index) train_scores.append(score) test_index = test_index + others_size y = mdm.predict(cov_data_train[test_index]) score = (y == labels[test_index]).sum()/len(test_index) test_scores.append(score) train_score = np.mean(train_scores) test_score = np.mean(test_scores) scores.append([subject, i, train_score, test_score]) df = pd.DataFrame( scores, columns=["subject", "transfer_count", "train_score", "test_score"]) df.to_excel("data/riemann/gradually/test_subject_%d_weight_%e.xlsx" % (subject, weight), index=False)
class ERPDistance(BaseEstimator, TransformerMixin): """ERP distance cov estimator. This transformer estimates Riemannian distance for ERP covariance matrices. After estimation of special form ERP covariance matrices using the ERP transformer, a MDM [1] algorithm is used to compute Riemannian distance. References: [1] A. Barachant, S. Bonnet, M. Congedo and C. Jutten, "Multiclass Brain-Computer Interface Classification by Riemannian Geometry," in IEEE Transactions on Biomedical Engineering, vol. 59, no. 4, p. 920-928, 2012 """ def __init__(self, window=500, nfilters=3, subsample=1, metric='riemann', n_jobs=1): """Init.""" self.window = window self.nfilters = nfilters self.subsample = subsample self.metric = metric self.n_jobs = n_jobs self._fitted = False def fit(self, X, y): """fit.""" # Create ERP and get cov mat self.ERP = ERP(self.window, self.nfilters, self.subsample) train_cov = self.ERP.fit_transform(X, y) labels_train = self.ERP.labels_train # Add rest epochs rest_cov = self._get_rest_cov(X, y) train_cov = np.concatenate((train_cov, rest_cov), axis=0) labels_train = np.concatenate((labels_train, [0] * len(rest_cov))) # fit MDM self.MDM = MDM(metric=self.metric, n_jobs=self.n_jobs) self.MDM.fit(train_cov, labels_train) self._fitted = True return self def transform(self, X, y=None): """Transform.""" test_cov = self.ERP.transform(X) dist = self.MDM.transform(test_cov) dist = dist[:, 1:] - np.atleast_2d(dist[:, 0]).T return dist def update_subsample(self, old_sub, new_sub): """update subsampling.""" if self._fitted: self.ERP.update_subsample(old_sub, new_sub) def _get_rest_cov(self, X, y): """Sample rest epochs from data and compute the cov mat.""" ix = np.where(np.diff(y[:, 0]) == 1)[0] rest = [] offset = - self.window for i in ix: start = i + offset - self.window stop = i + offset rest.append(self.ERP.erp_cov(X[slice(start, stop)].T)) return np.array(rest)
def on_initialize(self): try: self.model_path = self.setting['Filename to save model to'] except KeyError: self.model_path = '' if self.model_path == '': print( 'No correct file location has been given for saving the model, thus it won\'t be saved' ) try: self.test_set_share = float(self.setting['Test set share']) # wrong value diff = (1 - self.test_set_share) if diff <= 0 or diff > 1: self.test_set_share = 0 print( 'The value of the test set share must be between 0 and 1 (1 not included), no prediction will be performed' ) except KeyError: self.test_set_share = 0 print( 'The value of the test set share must be between 0 and 1 (1 not included), no prediction will be performed' ) try: self.save_path = self.setting['Filename to load model from'] self.model = pickle.load(open(self.save_path, 'rb')) except KeyError: self.save_path = '' print( 'No correct location has been given to load the model from, thus a new model will be created.' ) # if model doesn't exist we will init a new one with params from the box # FileNotFoundError doesn't exist in Python 2.7 except IOError: print( 'No correct location has been given to load the model from, thus a new model will be created.' ) # special case for Riemannian Geometry because it needs a pipeline clf = self.setting['Classifier'] try: discriminator, _ = self.map_clf(self.setting['Discriminator']) except KeyError: discriminator = None if clf == 'Riemann Tangent Space': if discriminator is not None: self.clf = make_pipeline(Covariances(), TangentSpace(metric='riemann'), discriminator()) else: self.clf = make_pipeline(Covariances(), TangentSpace(metric='riemann'), LinearDiscriminantAnalysis()) elif clf == 'Riemann Minimum Distance to Mean': if discriminator is not None: self.clf = make_pipeline( Covariances(), MDM(metric=dict(mean='riemann', distance='riemann')), discriminator()) else: self.clf = make_pipeline( Covariances(), MDM(metric=dict(mean='riemann', distance='riemann'))) else: self.clf, _ = self.map_clf(clf) self.init_params() try: self.clf = self.clf(**self.clf_dependant_settings) except TypeError: self.clf = self.clf()
# - CSP + RegLDA : Common Spatial Patterns + Regularized Linear Discriminat Analysis. This is a very common EEG analysis pipeline. # - Cov + TS : Covariance + Tangent space mapping. One of the most reliable Riemannian geometry-based pipelines. # - Cov + MDM: Covariance + MDM. A very simple, yet effective (for low channel count), Riemannian geometry classifier. # - CSP + Cov + TS: Common Spatial Patterns + Covariance + Tangent spacem mapping. Riemannian pipeline with the standard CSP procedure beforehand # Evaluation is done through cross-validation, with area-under-the-curve (AUC) as metric (AUC is probably the best metric for binary and unbalanced classification problem) # Note: because we're doing machine learning here, the following cell may take a while to complete clfs = OrderedDict() clfs['CSP + RegLDA'] = make_pipeline(Covariances(), CSP(4), LDA(shrinkage='auto', solver='eigen')) clfs['Cov + TS'] = make_pipeline(Covariances(), TangentSpace(), LogisticRegression()) clfs['Cov + MDM'] = make_pipeline(Covariances(), MDM()) clfs['CSP + Cov + TS'] = make_pipeline(Covariances(), CSP(4, log=False), TangentSpace(), LogisticRegression()) # define cross validation cv = StratifiedShuffleSplit(n_splits=20, test_size=0.25, random_state=42) # run cross validation for each pipeline auc = [] methods = [] for m in clfs: print(m) try: res = cross_val_score(clfs[m], X, y == 2,
labels = epochs.events[:, -1] evoked = epochs.average() ############################################################################### # Decoding with Xdawn + MDM n_components = 3 # pick some components # Define a monte-carlo cross-validation generator (reduce variance): cv = KFold(n_splits=10, shuffle=True, random_state=42) pr = np.zeros(len(labels)) epochs_data = epochs.get_data() print('Multiclass classification with XDAWN + MDM') clf = make_pipeline(XdawnCovariances(n_components), MDM()) for train_idx, test_idx in cv.split(epochs_data): y_train, y_test = labels[train_idx], labels[test_idx] clf.fit(epochs_data[train_idx], y_train) pr[test_idx] = clf.predict(epochs_data[test_idx]) print(classification_report(labels, pr)) ############################################################################### # plot the spatial patterns xd = XdawnCovariances(n_components) xd.fit(epochs_data, labels) evoked.data = xd.Xd_.patterns_.T
def fit(self, X, y): # validate X, y = check_X_y(X, y, allow_nd=True) X = check_array(X, allow_nd=True) # set internal vars self.classes_ = unique_labels(y) self.X_ = X self.y_ = y ################################################## # split X into train and test sets, so that # grid search can be performed on train set only seed = 7 np.random.seed(seed) #X_TRAIN, X_TEST, y_TRAIN, y_TEST = train_test_split(X, y, test_size=0.25, random_state=seed) for epoch_trim in self.epoch_bounds: for bandpass in self.bandpass_filters: X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.25, random_state=seed) # X_train = np.copy(X_TRAIN) # X_test = np.copy(X_TEST) # y_train = np.copy(y_TRAIN) # y_test = np.copy(y_TEST) # separate out inputs that are tuples bandpass_start, bandpass_end = bandpass epoch_trim_start, epoch_trim_end = epoch_trim # bandpass filter coefficients b, a = butter( 5, np.array([bandpass_start, bandpass_end]) / (self.sfreq * 0.5), 'bandpass') # filter and crop TRAINING SET X_train = self.preprocess_X(X_train, b, a, epoch_trim_start, epoch_trim_end) # validate X_train, y_train = check_X_y(X_train, y_train, allow_nd=True) X_train = check_array(X_train, allow_nd=True) # filter and crop TEST SET X_test = self.preprocess_X(X_test, b, a, epoch_trim_start, epoch_trim_end) # validate X_test, y_test = check_X_y(X_test, y_test, allow_nd=True) X_test = check_array(X_test, allow_nd=True) ########################################################################### # self-tune CSP to find optimal number of filters to use at these settings #[best_num_filters, best_num_filters_score] = self.self_tune(X_train, y_train) best_num_filters = 5 # as an option, we could tune optimal CSP filter num against complete train set #X_tune = self.preprocess_X(X, b, a, epoch_trim_start, epoch_trim_end) #[best_num_filters, best_num_filters_score] = self.self_tune(X_tune, y) # now use this insight to really fit with optimal CSP spatial filters """ reg : float | str | None (default None) if not None, allow regularization for covariance estimation if float, shrinkage covariance is used (0 <= shrinkage <= 1). if str, optimal shrinkage using Ledoit-Wolf Shrinkage ('ledoit_wolf') or Oracle Approximating Shrinkage ('oas'). """ transformer = CSP(n_components=best_num_filters, reg='ledoit_wolf') transformer.fit(X_train, y_train) # use these CSP spatial filters to transform train and test spatial_filters_train = transformer.transform(X_train) spatial_filters_test = transformer.transform(X_test) # put this back in as failsafe if NaN or inf starts cropping up # spatial_filters_train = np.nan_to_num(spatial_filters_train) # check_X_y(spatial_filters_train, y_train) # spatial_filters_test = np.nan_to_num(spatial_filters_test) # check_X_y(spatial_filters_test, y_test) # train LDA classifier = LinearDiscriminantAnalysis() classifier.fit(spatial_filters_train, y_train) score = classifier.score(spatial_filters_test, y_test) #print "current score",score print "bandpass:"******"epoch window:", epoch_trim_start, epoch_trim_end #print best_num_filters,"filters chosen" # put in ranked order Top 10 list idx = bisect(self.ranked_scores, score) self.ranked_scores.insert(idx, score) self.ranked_scores_opts.insert( idx, dict(bandpass=bandpass, epoch_trim=epoch_trim, filters=best_num_filters)) self.ranked_classifiers.insert(idx, classifier) self.ranked_transformers.insert(idx, transformer) if len(self.ranked_scores) > self.num_votes: self.ranked_scores.pop(0) if len(self.ranked_scores_opts) > self.num_votes: self.ranked_scores_opts.pop(0) if len(self.ranked_classifiers) > self.num_votes: self.ranked_classifiers.pop(0) if len(self.ranked_transformers) > self.num_votes: self.ranked_transformers.pop(0) """ Covariance computation """ # compute covariance matrices cov_data_train = covariances(X=X_train) cov_data_test = covariances(X=X_test) clf_mdm = MDM(metric=dict(mean='riemann', distance='riemann')) clf_mdm.fit(cov_data_train, y_train) score_mdm = clf_mdm.score(cov_data_test, y_test) # print "MDM prediction score:",score_mdm # put in ranked order Top 10 list idx = bisect(self.ranked_scores_mdm, score_mdm) self.ranked_scores_mdm.insert(idx, score_mdm) self.ranked_scores_opts_mdm.insert( idx, dict(bandpass=bandpass, epoch_trim=epoch_trim, filters=best_num_filters)) self.ranked_classifiers_mdm.insert(idx, clf_mdm) if len(self.ranked_scores_mdm) > self.num_votes: self.ranked_scores_mdm.pop(0) if len(self.ranked_scores_opts_mdm) > self.num_votes: self.ranked_scores_opts_mdm.pop(0) if len(self.ranked_classifiers_mdm) > self.num_votes: self.ranked_classifiers_mdm.pop(0) clf_ts = TSclassifier() clf_ts.fit(cov_data_train, y_train) score_ts = clf_ts.score(cov_data_test, y_test) # put in ranked order Top 10 list idx = bisect(self.ranked_scores_ts, score_ts) self.ranked_scores_ts.insert(idx, score_ts) self.ranked_scores_opts_ts.insert( idx, dict(bandpass=bandpass, epoch_trim=epoch_trim, filters=best_num_filters)) self.ranked_classifiers_ts.insert(idx, clf_ts) if len(self.ranked_scores_ts) > self.num_votes: self.ranked_scores_ts.pop(0) if len(self.ranked_scores_opts_ts) > self.num_votes: self.ranked_scores_opts_ts.pop(0) if len(self.ranked_classifiers_ts) > self.num_votes: self.ranked_classifiers_ts.pop(0) print "CSP+LDA score:", score, "Tangent space w/LR score:", score_ts print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" print "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^" print " T O P ", self.num_votes, " C L A S S I F I E R S" print #j=1 for i in xrange(len(self.ranked_scores)): print i, ",", round(self.ranked_scores[i], 4), ",", print self.ranked_scores_opts[i] print "-------------------------------------" for i in xrange(len(self.ranked_scores_ts)): print i, ",", round(self.ranked_scores_ts[i], 4), ",", print self.ranked_scores_opts_ts[i] print "-------------------------------------" for i in xrange(len(self.ranked_scores_mdm)): print i, ",", round(self.ranked_scores_mdm[i], 4), ",", print self.ranked_scores_opts_mdm[i] # finish up, set the flag to indicate "fitted" state self.fit_ = True # Return the classifier return self
epochs = Epochs(raw, events, event_id, tmin, tmax, proj=True, picks=picks, baseline=None, preload=True, add_eeg_ref=False, verbose=False) labels = epochs.events[:, -1] - 2 # cross validation cv = KFold(len(labels), 10, shuffle=True, random_state=42) # get epochs epochs_data_train = 1e6*epochs.get_data() # compute covariance matrices cov_data_train = covariances(epochs_data_train) ############################################################################### # Classification with Minimum distance to mean mdm = MDM(metric=dict(mean='riemann', distance='riemann')) # Use scikit-learn Pipeline with cross_val_score function scores = cross_val_score(mdm, cov_data_train, labels, cv=cv, n_jobs=1) # Printing the results class_balance = np.mean(labels == labels[0]) class_balance = max(class_balance, 1. - class_balance) print("MDM Classification accuracy: %f / Chance level: %f" % (np.mean(scores), class_balance)) ############################################################################### # Classification with Tangent Space Logistic Regression clf = TSclassifier() # Use scikit-learn Pipeline with cross_val_score function scores = cross_val_score(clf, cov_data_train, labels, cv=cv, n_jobs=1)
epochs = Epochs( raw, events, event_ids, tmin, tmin + wl, proj=True, picks=picks, preload=True, baseline=None, verbose=False, ) X = epochs.get_data() y = np.array([0 if ev == 2 else 1 for ev in epochs.events[:, -1]]) for est in estimators: clf = make_pipeline(Covariances(estimator=est), MDM()) try: score = cross_val_score(clf, X, y, cv=cv, scoring=sc) dfa += [dict(estimator=est, wlen=wl, accuracy=sc) for sc in score] except ValueError: print(f"{est}: {wl} is not sufficent to estimate a SPD matrix") dfa += [dict(estimator=est, wlen=wl, accuracy=np.nan)] * n_splits dfa = pd.DataFrame(dfa) ############################################################################### fig, ax = plt.subplots(figsize=(6, 4)) sns.lineplot( data=dfa, x="wlen", y="accuracy",
def fit(self, X, y): """Fit.""" self.mdm = MDM(metric=self.metric_mean, n_jobs=self.n_jobs) labels = np.squeeze(create_sequence(y.T)[::self.subsample]) self.mdm.fit(X, labels) return self
data_source = {} data_target = {} X, labels, meta = paradigm.get_data(dataset, subjects=[subject_source]) data_source['covs'] = Covariances(estimator='lwf').fit_transform(X) data_source['labels'] = labels X, labels, meta = paradigm.get_data(dataset, subjects=[subject_target]) data_target['covs'] = Covariances(estimator='lwf').fit_transform(X) data_target['labels'] = labels # setup the scores dictionary scores = {} for meth in ['org', 'rct', 'str', 'rot', 'clb']: scores[meth] = [] # apply RPA to multiple random partitions for the training dataset clf = MDM() nrzt = 5 for _ in tqdm(range(nrzt)): # split the target dataset into training and testing source = {} target_train = {} target_test = {} source['org'], target_train['org'], target_test[ 'org'] = TL.get_sourcetarget_split(data_source, data_target, ncovs_target_train, paradigm=paradigm_name) # apply RPA source['rct'], target_train['rct'], target_test['rct'] = TL.RPA_recenter(
def test_MDM_predict(): """Test prediction of MDM""" covset = generate_cov(100, 3) labels = np.array([0, 1]).repeat(50) mdm = MDM(metric='riemann') mdm.fit(covset, labels) mdm.predict(covset) # test fit_predict mdm = MDM(metric='riemann') mdm.fit_predict(covset, labels) # test transform mdm.transform(covset) # predict proba mdm.predict_proba(covset) # test n_jobs mdm = MDM(metric='riemann', n_jobs=2) mdm.fit(covset, labels) mdm.predict(covset)
plt.imshow(cov_centers[i], cmap=plt.get_cmap('RdBu_r')) plt.title('Cov mean for class: '+l) plt.xticks([]) if i == 0 or i == 2: plt.yticks(np.arange(len(info['ch_names'])), info['ch_names']) ax.tick_params(axis='both', which='major', labelsize=7) else: plt.yticks([]) plt.show() ############################################################################### # Minimum distance to mean is a simple and robust algorithm for BCI decoding. # It reproduces results of [2] for the first session of subject 12. cv = RepeatedKFold(n_splits=2, n_repeats=10, random_state=42) mdm = MDM(metric=dict(mean='riemann', distance='riemann')) scores = cross_val_score(mdm, cov_ext_trials, events[:, 2], cv=cv, n_jobs=1) print("MDM accuracy: {:.2f}% +/- {:.2f}".format(np.mean(scores)*100, np.std(scores)*100)) # The obtained results are 80.62% +/- 16.29 for this session, with a repeated # k-fold validation. ############################################################################### # References # ---------- # [1] M. Congedo, A. Barachant, A. Andreev ,"A New generation of Brain-Computer # Interface Based on Riemannian Geometry", arXiv: 1310.8115, 2013. # # [2] E. K. Kalunga, S. Chevallier, Q. Barthélemy, E. Monacelli, # "Review of Riemannian distances and divergences, applied to SSVEP-based BCI", # Neuroinformatics, 2020.
baseline=None, preload=True, verbose=False) labels = epochs.events[:, -1] - 2 # cross validation cv = KFold(len(labels), 10, shuffle=True, random_state=42) # get epochs epochs_data_train = 1e6 * epochs.get_data() # compute covariance matrices cov_data_train = Covariances().transform(epochs_data_train) ############################################################################### # Classification with Minimum distance to mean mdm = MDM(metric=dict(mean='riemann', distance='riemann')) # Use scikit-learn Pipeline with cross_val_score function scores = cross_val_score(mdm, cov_data_train, labels, cv=cv, n_jobs=1) # Printing the results class_balance = np.mean(labels == labels[0]) class_balance = max(class_balance, 1. - class_balance) print("MDM Classification accuracy: %f / Chance level: %f" % (np.mean(scores), class_balance)) ############################################################################### # Classification with Tangent Space Logistic Regression clf = TSclassifier() # Use scikit-learn Pipeline with cross_val_score function scores = cross_val_score(clf, cov_data_train, labels, cv=cv, n_jobs=1)
from pyriemann.estimation import Covariances from pyriemann.classification import MDM from utilities import dimensionality_reduction as DR from sklearn.model_selection import KFold from tqdm import tqdm # setup which dataset to consider from MOABB dataset = PhysionetMI() paradigm = MotorImagery() paradigm_name = 'MI' # choose dimension to reduce pred = 12 # setup the the classifier clf = MDM() # which subject to consider subject = 7 # load data X, labels, meta = paradigm.get_data(dataset, subjects=[subject]) covs = Covariances(estimator='lwf').fit_transform(X) # get the indices for the electrodes chosen in SELg and SELb raw = dataset._get_single_subject_data(subject)['session_0']['run_4'] chnames_dict = {} for i, chi in enumerate(raw.ch_names): chnames_dict[chi.upper()] = i SELg_names = [ 'F3', 'FZ', 'F4', 'FC1', 'FC2', 'C3', 'CZ', 'C4', 'CP1', 'CP2', 'P3', 'P4'
for train_idx, test_idx in cv: y_train, y_test = labels[train_idx], labels[test_idx] clf.fit(epochs_data[train_idx], y_train) scores.append(clf.score(epochs_data[test_idx], y_test)) # Printing the results class_balance = np.mean(labels == labels[0]) class_balance = max(class_balance, 1. - class_balance) print("Classification accuracy: %f / Chance level: %f" % (np.mean(scores), class_balance)) # spatial patterns xd = XdawnCovariances(n_components) Cov = xd.fit_transform(epochs_data,labels) evoked.data = xd.Xd._patterns.T evoked.times = np.arange(evoked.data.shape[0]) evoked.plot_topomap(times=[0, 1, n_components, n_components+1], ch_type='grad', colorbar=False, size=1.5) # prototyped covariance matrices mdm = MDM() mdm.fit(Cov,labels) fig,axe = plt.subplots(1,2) axe[0].matshow(mdm.covmeans[0]) axe[0].set_title('Class 1 covariance matrix') axe[1].matshow(mdm.covmeans[1]) axe[1].set_title('Class 2 covariance matrix') plt.show()