def ml_classifier(inputs, targets, classifier=None, pipeline=None): """Uses sklearn to fit a model given inputs and targets Args: inputs: list containing (N trials * M channels) data segments of length(number of features). targets: list containing (N trials * M channels) of marker data (0 or 1). classifier: pre-trained lda classifier; if None train from scratch pipeline: name of pipeline to create if classifier is None Returns: classifier: classifier object """ pipeline_dict = { 'vect_lr': make_pipeline(Vectorizer(), StandardScaler(), LogisticRegression()), 'vecct_reglda': make_pipeline(Vectorizer(), LDA(shrinkage='auto', solver='eigen')), 'xdawn_reglda': make_pipeline(Xdawn(2, classes=[1]), Vectorizer(), LDA(shrinkage='auto', solver='eigen')), 'erpcov_ts': make_pipeline(ERPCovariances(), TangentSpace(), LogisticRegression()), 'erpcov_mdm': make_pipeline(ERPCovariances(), MDM()) } if not classifier and pipeline: classifier = pipeline_dict[pipeline.lower()] classifier.fit(inputs, targets) return classifier
def N170_test(session_data): markers = N170_MARKERS epochs = get_session_erp_epochs(session_data, markers) conditions = OrderedDict() for i in range(len(markers)): conditions[markers[i]] = [i+1] clfs = OrderedDict() clfs['Vect + LR'] = make_pipeline(Vectorizer(), StandardScaler(), LogisticRegression()) clfs['Vect + RegLDA'] = make_pipeline(Vectorizer(), LDA(shrinkage='auto', solver='eigen')) clfs['ERPCov + TS'] = make_pipeline(ERPCovariances(estimator='oas'), TangentSpace(), LogisticRegression()) clfs['ERPCov + MDM'] = make_pipeline(ERPCovariances(estimator='oas'), MDM()) clfs['XdawnCov + TS'] = make_pipeline(XdawnCovariances(estimator='oas'), TangentSpace(), LogisticRegression()) clfs['XdawnCov + MDM'] = make_pipeline(XdawnCovariances(estimator='oas'), MDM()) methods_list = ['Vect + LR','Vect + RegLDA','ERPCov + TS','ERPCov + MDM','XdawnCov + TS','XdawnCov + MDM'] # format data epochs.pick_types(eeg=True) X = epochs.get_data() * 1e6 times = epochs.times y = epochs.events[:, -1] # define cross validation cv = StratifiedShuffleSplit(n_splits=20, test_size=0.25, random_state=42) # run cross validation for each pipeline auc = [] methods = [] print('Calcul in progress...') for m in clfs: try: res = cross_val_score(clfs[m], X, y==2, scoring='roc_auc', cv=cv, n_jobs=-1) auc.extend(res) methods.extend([m]*len(res)) except Exception: print("exception") ## Plot Decoding Results results = pd.DataFrame(data=auc, columns=['AUC']) results['Method'] = methods n_row,n_column = results.shape auc_means = [] for method in methods_list: auc = [] for i in range(n_row): if results.loc[i,'Method']== method: auc.append(results.loc[i,'AUC']) auc_means.append(np.mean(auc)) counter = 0 for i in range(len(methods_list)): color = 'green' if auc_means[i]>=0.7 else 'red' counter = counter +1 if auc_means[i]>=0.7 else counter return counter > 0, counter
def test_xdawn_decoding_performance(): """Test decoding performance and extracted pattern on synthetic data.""" from sklearn.model_selection import KFold from sklearn.pipeline import make_pipeline from sklearn.linear_model import LogisticRegression from sklearn.preprocessing import MinMaxScaler from sklearn.metrics import accuracy_score n_xdawn_comps = 3 expected_accuracy = 0.98 epochs, mixing_mat = _simulate_erplike_mixed_data(n_epochs=100) y = epochs.events[:, 2] # results of Xdawn and _XdawnTransformer should match xdawn_pipe = make_pipeline( Xdawn(n_components=n_xdawn_comps), Vectorizer(), MinMaxScaler(), LogisticRegression(solver='liblinear')) xdawn_trans_pipe = make_pipeline( _XdawnTransformer(n_components=n_xdawn_comps), Vectorizer(), MinMaxScaler(), LogisticRegression(solver='liblinear')) cv = KFold(n_splits=3, shuffle=False) for pipe, X in ( (xdawn_pipe, epochs), (xdawn_trans_pipe, epochs.get_data())): predictions = np.empty_like(y, dtype=float) for train, test in cv.split(X, y): pipe.fit(X[train], y[train]) predictions[test] = pipe.predict(X[test]) cv_accuracy_xdawn = accuracy_score(y, predictions) assert_allclose(cv_accuracy_xdawn, expected_accuracy, atol=0.01) # for both event types, the first component should "match" the mixing fitted_xdawn = pipe.steps[0][1] if isinstance(fitted_xdawn, Xdawn): relev_patterns = np.concatenate( [comps[[0]] for comps in fitted_xdawn.patterns_.values()]) else: relev_patterns = fitted_xdawn.patterns_[::n_xdawn_comps] for i in range(len(relev_patterns)): r, _ = stats.pearsonr(relev_patterns[i, :], mixing_mat[0, :]) assert np.abs(r) > 0.99
def svm_proba(train_xx6, train_y, test_xx6): selects = [] for j, y in enumerate(train_y): if y == 1: [selects.append(j - e) for e in [-2, -1, 0, 1, 2]] train_xx6 = train_xx6[selects] train_xx6 = train_xx6[:, :, 40:80] test_xx6 = test_xx6[:, :, 40:80] train_y = train_y[selects] train_y[train_y != 1] = 0 # s = train_xx6.shape # train_xx6 = train_xx6.reshape((s[0], s[1] * s[2])) # s = test_xx6.shape # test_xx6 = test_xx6.reshape((s[0], s[1] * s[2])) # return test_xx6 clf = make_pipeline( Vectorizer(), StandardScaler(), pca.PCA(n_components=.95), svm.SVC(gamma='scale', kernel='rbf', class_weight={ 0: 1, 1: 2 }, probability=True)) clf.fit(train_xx6, train_y) return clf.predict_proba(test_xx6)
def test_get_coef_multiclass_full(n_classes, n_channels, n_times): """Test a full example with pattern extraction.""" from sklearn.pipeline import make_pipeline from sklearn.linear_model import LogisticRegression from sklearn.model_selection import StratifiedKFold data = np.zeros((10 * n_classes, n_channels, n_times)) # Make only the first channel informative for ii in range(n_classes): data[ii * 10:(ii + 1) * 10, 0] = ii events = np.zeros((len(data), 3), int) events[:, 0] = np.arange(len(events)) events[:, 2] = data[:, 0, 0] info = create_info(n_channels, 1000., 'eeg') epochs = EpochsArray(data, info, events, tmin=0) clf = make_pipeline( Scaler(epochs.info), Vectorizer(), LinearModel(LogisticRegression(random_state=0, multi_class='ovr')), ) scorer = 'roc_auc_ovr_weighted' time_gen = GeneralizingEstimator(clf, scorer, verbose=True) X = epochs.get_data() y = epochs.events[:, 2] n_splits = 3 cv = StratifiedKFold(n_splits=n_splits) scores = cross_val_multiscore(time_gen, X, y, cv=cv, verbose=True) want = (n_splits, ) if n_times > 1: want += (n_times, n_times) assert scores.shape == want assert_array_less(0.8, scores) clf.fit(X, y) patterns = get_coef(clf, 'patterns_', inverse_transform=True) assert patterns.shape == (n_classes, n_channels, n_times) assert_allclose(patterns[:, 1:], 0., atol=1e-7) # no other channels useful
def __init__(self, subject=None, date=None, mode='train', **kwargs): if subject is None: subject = cfg.subj_info.subjname self._subj_path = os.path.dirname(__file__) + '/../data/' + subject if date is None: self._date = utils.find_nearest_time(self._subj_path) else: if isinstance(date, datetime): # convert datetime to str self._date = date.strftime("%Y-%m-%d-%H-%M-%S") else: self._date = date self.mode = mode.lower() assert self.mode in ['train', 'test'] if self.mode == 'test': # loading trained coefficient self.data_dict = np.load(os.path.join(self._subj_path, self._date, 'coef.npz')) # loading trained model self.__cls = joblib.load(os.path.join(self._subj_path, self._date, 'model.pkl')) self._ch_ind = self.data_dict['ind_ch_scores'] else: self.data_dict = {} C = kwargs.pop('C', 1) n_components = kwargs.pop('n_components', 3) self.__cls = make_pipeline( _XdawnTransformer(n_components=n_components), ChannelScaler(), Vectorizer(), LogisticRegression(C=C, class_weight='balanced', solver='liblinear', multi_class='ovr') )
def mvpa(name): # Perform MVPA # Setting BASELINE = (None, 0) CROP = (0, 0.8) EVENTS = ['1', '2'] # Load epochs loader = FileLoader(name) loader.load_epochs(recompute=False) print(loader.epochs_list) # Prepare [predicts] for results predicts = [] # Cross validation num_epochs = len(loader.epochs_list) for exclude in range(num_epochs): # Start on separate training and testing dataset print(f'---- {name}: {exclude} | {num_epochs} ----------------------') includes = [ e for e in range(len(loader.epochs_list)) if not e == exclude ] excludes = [exclude] train_epochs, test_epochs = loader.leave_one_session_out( includes, excludes) print(train_epochs, test_epochs) def prepare_epochs(epochs): # A tool for prepare epochs epochs = epochs['1', '2'] epochs.apply_baseline(BASELINE) return epochs.crop(CROP[0], CROP[1]) # print('Xdawn --------------------------------') # enhancer = Enhancer(train_epochs=train_epochs, # test_epochs=test_epochs) # train_epochs, test_epochs = enhancer.fit_apply() # Prepare epochs train_epochs = prepare_epochs(train_epochs) test_epochs = prepare_epochs(test_epochs) X_train, y_train = get_X_y(train_epochs) X_test, y_test = get_X_y(test_epochs) print('Training -----------------------------') clf = svm.SVC(gamma='scale', kernel='rbf', class_weight='balanced') pipeline = make_pipeline(Vectorizer(), StandardScaler(), clf) pipeline.fit(X_train, y_train) y_pred = pipeline.predict(X_test) # y_pred = y_test print('Testing ------------------------------') predicts.append(dict(y_test=y_test, y_pred=y_pred)) with open(os.path.join(results_dir, f'{name}.json'), 'wb') as f: pickle.dump(predicts, f) pass
def UpdateModel(): try: SetPathsVars() userlist = np.load(USER_LIST, allow_pickle='TRUE').item() if (len(userlist) < 2): return True database = np.load(DB_PATH, allow_pickle='TRUE') print(database) database = database[1:] rows, cols = len(database), len(database[0]) print(rows, cols) X = [] y = [] for r in database: y.append(r[-1]) arrr = r[:cols - 1] arrr = np.reshape(arrr, (15, 113)) X.append(arrr) from sklearn.pipeline import make_pipeline from sklearn.preprocessing import StandardScaler from sklearn.svm import LinearSVC from mne.decoding import Vectorizer clf = make_pipeline(Vectorizer(), StandardScaler(), LinearSVC()) clf.fit(X, y) pkl.dump(clf, open(getPath('Res/model.pkl'), 'wb')) print('Successfully updated model') return True except Exception as e: print(e) print("Update Model haga") return False
def train_test_lda(self, X_train, y_train, X_test, y_test): """Regulated LDA Parameters ---------- X_train: instance of numpy.ndarray The training data. y_train: instance of numpy.ndarray The training target values. X_test: instance of numpy.ndarray The testing data. y_test: instance of numpy.ndarray The testing target values. Returns ------- model : instance of sklearn.pipeline.Pipeline The final model. auc : float The AUC score. """ model = make_pipeline(Vectorizer(), LDA(shrinkage='auto', solver='eigen')) model.fit(X_train, y_train) auc = roc_auc_score(y_test, model.predict(X_test)) return model, auc
def make_clf(pattern=False,vectorized=False): clf = [] from sklearn.svm import SVC clf.append(('vectorizer',Vectorizer())) # use linear SVM as the estimator estimator = SVC(max_iter=-1,kernel='linear',random_state=12345,class_weight='balanced',probability=True) clf.append(('estimator',estimator)) clf = Pipeline(clf) return clf
def mvpa(name): # Perform MVPA # Load epochs loader = FileLoader(name) loader.load_epochs(recompute=False) print(loader.epochs_list) # Prepare [predicts] for results predicts = [] # Cross validation num_epochs = len(loader.epochs_list) for exclude in range(num_epochs): # Start on separate training and testing dataset print(f'---- {name}: {exclude} | {num_epochs} ----------------------') includes = [ e for e in range(len(loader.epochs_list)) if not e == exclude ] excludes = [exclude] train_epochs, test_epochs = loader.leave_one_session_out( includes, excludes) print(train_epochs, test_epochs) print('Xdawn --------------------------------') enhancer = Enhancer(train_epochs=train_epochs, test_epochs=test_epochs) train_epochs, test_epochs = enhancer.fit_apply() # Prepare epochs train_epochs = prepare_epochs(train_epochs) test_epochs = prepare_epochs(test_epochs) X_train, y_train = get_X_y(train_epochs) X_test, y_test = get_X_y(test_epochs) print('Preprocess ---------------------------') pipeline = make_pipeline(Vectorizer(), StandardScaler()) X_train = pipeline.fit(X_train) X_test = pipeline.fit(X_test) y_train[not y_train == 1] = 0 y_test[not y_test == 1] = 0 print('Training -----------------------------') eegnet = EEGNet_classifier() eegnet.fit(X_train, y_train, quiet=False) y_pred = eegnet.predict(X_test) # y_pred = y_test print('Testing ------------------------------') predicts.append(dict(y_test=y_test, y_pred=y_pred)) with open(os.path.join(results_dir, f'{name}.json'), 'wb') as f: pickle.dump(predicts, f) pass
def make_clf(): clf = [] clf.append(('vectorizer',Vectorizer())) # hyper parameters were optimized and here we just directly use them in the random forest model clf.append(('estimator',RandomForestClassifier(n_estimators=190,# number of trees max_depth=None, # no need to specifiy the depth, in order words, feature depth random_state=12345, class_weight='balanced', max_features=10, # dimension reduction min_samples_leaf=4,# minimum feature span min_samples_split=4)))# minimum feature split clf = Pipeline(clf) return clf
def svm_fit_predict(train_xraw, train_y, test_xraw): clf = svm.SVC(gamma='scale', kernel='rbf', class_weight='balanced', probability=True) selects = [] for j, y in enumerate(train_y): if y == 1: [selects.append(j - e) for e in [-1, 0, 1]] pipeline = make_pipeline(Vectorizer(), clf) pipeline.fit(train_xraw[selects, :, 40:80], train_y[selects]) pred = pipeline.predict(test_xraw[:, :, 40:80]) prob = pipeline.predict_proba(test_xraw[:, :, 40:80]) return pred, prob
def make_clf(pattern=False, vectorized=False): clf = [] if vectorized: clf.append(('vectorizer', Vectorizer())) clf.append(('scaler', MinMaxScaler())) # use linear SVM as the estimator estimator = SVC(max_iter=-1, kernel='linear', random_state=12345, class_weight='balanced', probability=True) if pattern: estimator = LinearModel(estimator) clf.append(('estimator', estimator)) clf = Pipeline(clf) return clf
def test_vectorizer(): """Test Vectorizer.""" data = np.random.rand(150, 18, 6) vect = Vectorizer() result = vect.fit_transform(data) assert_equal(result.ndim, 2) # check inverse_trasnform orig_data = vect.inverse_transform(result) assert_equal(orig_data.ndim, 3) assert_array_equal(orig_data, data) assert_array_equal(vect.inverse_transform(result[1:]), data[1:]) # check with different shape assert_equal(vect.fit_transform(np.random.rand(150, 18, 6, 3)).shape, (150, 324)) assert_equal(vect.fit_transform(data[1:]).shape, (149, 108)) # check if raised errors are working correctly vect.fit(np.random.rand(105, 12, 3)) assert_raises(ValueError, vect.transform, np.random.rand(105, 12, 3, 1)) assert_raises(ValueError, vect.inverse_transform, np.random.rand(102, 12, 12))
def test_get_coef_multiclass(n_features, n_targets): """Test get_coef on multiclass problems.""" # Check patterns with more than 1 regressor from sklearn.linear_model import LinearRegression, Ridge from sklearn.pipeline import make_pipeline X, Y, A = _make_data(n_samples=30000, n_features=n_features, n_targets=n_targets) lm = LinearModel(LinearRegression()).fit(X, Y) assert_array_equal(lm.filters_.shape, lm.patterns_.shape) if n_targets == 1: want_shape = (n_features, ) else: want_shape = (n_targets, n_features) assert_array_equal(lm.filters_.shape, want_shape) if n_features > 1 and n_targets > 1: assert_array_almost_equal(A, lm.patterns_.T, decimal=2) lm = LinearModel(Ridge(alpha=0)) clf = make_pipeline(lm) clf.fit(X, Y) if n_features > 1 and n_targets > 1: assert_allclose(A, lm.patterns_.T, atol=2e-2) coef = get_coef(clf, 'patterns_', inverse_transform=True) assert_allclose(lm.patterns_, coef, atol=1e-5) # With epochs, scaler, and vectorizer (typical use case) X_epo = X.reshape(X.shape + (1, )) info = create_info(n_features, 1000., 'eeg') lm = LinearModel(Ridge(alpha=1)) clf = make_pipeline( Scaler(info, scalings=dict(eeg=1.)), # XXX adding this step breaks Vectorizer(), lm, ) clf.fit(X_epo, Y) if n_features > 1 and n_targets > 1: assert_allclose(A, lm.patterns_.T, atol=2e-2) coef = get_coef(clf, 'patterns_', inverse_transform=True) lm_patterns_ = lm.patterns_[..., np.newaxis] assert_allclose(lm_patterns_, coef, atol=1e-5) # Check can pass fitting parameters lm.fit(X, Y, sample_weight=np.ones(len(Y)))
tmin = i * 1.0 tmax = (i + 1) * 1.0 #Create :class:'Epochs <mne.Epochs>' object epochs = mne.Epochs(raw, events=events, event_id=event_id, tmin=tmin, tmax=tmax, baseline=None, verbose=True, preload=True) for i in range(0, len(epochs.events)): if i % 2 == 0: epochs.events[i, 2] = 3 #epochs.plot(scalings = 'auto',block = True,n_epochs=10) X = epochs.pick_types(meg=False, eeg=True) y = epochs.events[:, -1] # Define a unique pipeline to sequentially: clf = make_pipeline( Vectorizer(), # 1) vectorize across time and channels StandardScaler(), # 2) normalize features across trials LinearModel(LogisticRegression())) # 3) fits a logistic regression clf.fit(X, y) coef = get_coef(clf, 'patterns_', inverse_transform=True) evoked = EvokedArray(coef, epochs.info, tmin=epochs.tmin) fig = evoked.plot_topomap(title='EEG Patterns', size=3, show=False) fig.savefig(title + "_ti_" + str(tmin) + "_tf_" + str(tmax) + '.png')
return X, y # %% for idx in range(1, 11): # Loading data ------------------------------------------ running_name = f'MEG_S{idx:02d}' band_name = 'U07' worker = MEG_Worker(running_name=running_name) worker.pipeline(band_name=band_name) # MVPA ---------------------------------------------------------------- # Prepare classifiers _svm = svm.SVC(gamma='scale', kernel='rbf', class_weight='balanced') clf = make_pipeline(Vectorizer(), StandardScaler(), _svm) # Prepare paired X and y # Set crop crops = dict(a=(0.2, 0.4), b=(0.4, 0.6), c=(0.6, 0.8), d=(0.2, 0.8), e=(0.0, 1.0)) output_dict = dict() for crop_key in crops: crop = crops[crop_key] # Get X and y for class 1 X1, y1 = pair_X_y(worker.clean_epochs, 1, crop)
# This approach classifies the data within, rather than across, subjects. for chroma in ['hbo', 'hbr']: st_scores = [] for sub in subjects: bids_path = dataset.update(subject=sub) raw_haemo, epochs = epoch_preprocessing(bids_path) epochs.pick(chroma) X = epochs.get_data() y = epochs.events[:, 2] clf = make_pipeline(Scaler(epochs.info), Vectorizer(), LogisticRegression(solver='liblinear')) scores = 100 * cross_val_multiscore( clf, X, y, cv=5, n_jobs=1, scoring='roc_auc') st_scores.append(np.mean(scores, axis=0)) print(f"Average spatio-temporal ROC-AUC performance ({chroma}) = " f"{np.round(np.mean(st_scores))} % ({np.round(np.std(st_scores))})") # %% # Conclusion # ---------- # # Data were epoched then decoding was performed on the hbo signal and the hbr
def SVM_decoding_on_full_epochs(X, y, plot_conf_matrix=0, class_names=None, test_size=0.2, n_splits=5): """ This function decodes on the full epoch using standard SVM algorithm Parameters --------- X : data extracted from the epochs provided to the decoder y : categorical variable (i.e. discrete but it can be more then 2 categories) plot_confusion_matrix : set to 1 if you wanna see the confusion matrix class_names: needed for the legend if confusion matrices are plotted ['cat1','cat2','cat3'] test_size : proportion of the data on which you want to test the decoder n_splits : when calculating the score, number of cross-validation folds Returns: ------- score, y_test, y_pred """ # ------- define the classifier ------- scaler = preprocessing.StandardScaler() vectorizer = Vectorizer() clf = SVC(C=1, kernel='linear', decision_function_shape='ovr') concat_classifier = Pipeline([('vector', vectorizer), ('scaler', scaler), ('svm', clf)]) # This returns the 5 scores calculated for each fold kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42) y = np.asarray(y) scores = [] for train_index, test_index in kf.split(X, y): X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] # Train on X_train, y_train concat_classifier.fit(X_train, y_train) # Test on X_test and then score y_pred = concat_classifier.predict(X_test) scores.append(accuracy_score(y_true=y_test, y_pred=y_pred)) scores = np.asarray(scores) if plot_conf_matrix == 1: print('you chose to plot the confusion matrix') X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=test_size, random_state=7, stratify=y) y_pred = concat_classifier.fit(X_train, y_train).predict(X_test) # Compute confusion matrix cnf_matrix = confusion_matrix(y_test, y_pred) np.set_printoptions(precision=3) print(cnf_matrix) # Plot non-normalized confusion matrix plt.figure() plot_confusion_matrix(cnf_matrix, classes=class_names, title='Confusion matrix, without normalization') # Plot normalized confusion matrix plt.figure() plot_confusion_matrix(cnf_matrix, classes=class_names, normalize=True, title='Normalized confusion matrix') plt.show() return scores, y_test, y_pred, cnf_matrix return scores, cnf_matrix
def run_ica_experiment(_run, method_idx): # filepaths = Path(r"C:\Users\paull\Documents\GIT\BCI_MsC\notebooks\BCI_Comp_IV_2a\BCICIV_2a_gdf/").glob("*T.gdf") dataset, metadata = BCI_IV_Comp_Dataset.load_dataset(filepaths, as_epochs=True, concatenate=False, drop_bad=True, return_metadata=True, tmin=-1., tmax=3.) all_methods = get_all_methods() methods = all_methods if method_idx is None else [all_methods[method_idx]] name = "" if method_idx is None else "_{}".format(all_methods[method_idx]) print("Using methods", methods) results = dict() for method in methods: print("Running for method", method) clf = make_pipeline( CSP(n_components=CSP_N_COMPONENTS), Vectorizer(), MinMaxScaler(), LogisticRegression(penalty='l2', multi_class='auto')) results[method] = list() for i, (epochs, mdata) in enumerate(zip(dataset, metadata)): print("\t", i, mdata["id"]) ICA = get_ica_instance(method, n_components=ICA_N_COMPONENTS) start = time.time() epochs = epochs.copy().load_data().filter(l_freq=None, h_freq=40).resample(90.) transformed_epochs = ICA.fit(epochs).get_sources(epochs) duration = time.time() - start scores = dict() signal = np.hstack(transformed_epochs.get_data()) for fn_name in SCORING_FN_DICT: score = apply_pairwise_parallel(signal, SCORING_FN_DICT[fn_name]) scores[fn_name] = score X, Y = transformed_epochs.get_data(), transformed_epochs.events[:, 2] del epochs, transformed_epochs try: clf.fit(X, Y) except Exception: print("\t\tFailed during fit") results[method].append({ "id": mdata["id"], "score": None, "bas": None, "duration": duration }) continue pred = clf.predict(X) bas = balanced_accuracy_score(Y, pred) results[method].append({ "id": mdata["id"], "score": scores, "bas": bas, "duration": duration }) results_filepath = f"./results{name}.json" with open(results_filepath, "w") as json_file: json.dump(results, json_file, indent=4) _run.add_artifact(results_filepath, content_type="json")
if subject_id in exclude: continue subject = 'S%02d' % subject_id data_path = os.path.join('/home/claire/DATA/Data_Face_House/' + subject + '/EEG/Evoked_Lowpass') fname_in = os.path.join(data_path, '%s-epo.fif' % subject) epochs = mne.read_epochs(fname_in) epochs.interpolate_bads() # all_epochs.append(epochs) #epochs = mne.concatenate_epochs(all_epochs) epochs.pick_types(eeg=True) # Create classification pipeline clf = make_pipeline(Xdawn(n_components=3, reg='oas'), Vectorizer(), MinMaxScaler(), LogisticRegression(penalty='l1')) le = LabelEncoder() labels = le.fit_transform(epochs.events[:, 2]) # Cross validator cv = StratifiedKFold(y=labels, n_folds=10, shuffle=True, random_state=42) # Do cross-validation preds = np.empty(len(labels)) for train, test in cv: clf.fit(epochs[train], labels[train]) preds[test] = clf.predict(epochs[test]) # Classification report
exclude='bads') epochs = Epochs(raw, events, event_id, tmin, tmax, proj=False, picks=picks, baseline=None, preload=True, verbose=False) # Create classification pipeline clf = make_pipeline( Xdawn(n_components=n_filter), Vectorizer(), MinMaxScaler(), LogisticRegression(penalty='l1', solver='liblinear', multi_class='auto')) # Get the labels labels = epochs.events[:, -1] # Cross validator cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42) # Do cross-validation preds = np.empty(len(labels)) for train, test in cv.split(epochs, labels): clf.fit(epochs[train], labels[train]) preds[test] = clf.predict(epochs[test]) # Classification report
tmax=0.8, baseline=None, reject={'eeg': 75e-6}, preload=True, verbose=False, picks=[0, 1, 2, 3]) print('sample drop %: ', (1 - len(epochs.events) / len(events)) * 100) epochs ################################################################################################### # Run classification # ---------------------------- clfs = OrderedDict() clfs['Vect + LR'] = make_pipeline(Vectorizer(), StandardScaler(), LogisticRegression()) clfs['Vect + RegLDA'] = make_pipeline(Vectorizer(), LDA(shrinkage='auto', solver='eigen')) clfs['ERPCov + TS'] = make_pipeline(ERPCovariances(estimator='oas'), TangentSpace(), LogisticRegression()) clfs['ERPCov + MDM'] = make_pipeline(ERPCovariances(estimator='oas'), MDM()) clfs['XdawnCov + TS'] = make_pipeline(XdawnCovariances(estimator='oas'), TangentSpace(), LogisticRegression()) clfs['XdawnCov + MDM'] = make_pipeline(XdawnCovariances(estimator='oas'), MDM()) # format data epochs.pick_types(eeg=True) X = epochs.get_data() * 1e6 times = epochs.times
eog=False, exclude='bads') epochs = Epochs(raw, events, event_id, tmin, tmax, proj=False, picks=picks, baseline=None, preload=True, verbose=False) # Create classification pipeline clf = make_pipeline(Xdawn(n_components=3), Vectorizer(), MinMaxScaler(), LogisticRegression(penalty='l1')) # Get the labels labels = epochs.events[:, -1] # Cross validator cv = StratifiedKFold(y=labels, n_folds=10, shuffle=True, random_state=42) # Do cross-validation preds = np.empty(len(labels)) for train, test in cv: clf.fit(epochs[train], labels[train]) preds[test] = clf.predict(epochs[test]) # Classification report
epochs = Epochs(raw, events, event_id, tmin, tmax, proj=False, picks=picks, baseline=None, preload=True, verbose=False) X = epochs.get_data() y = label_binarize(epochs.events[:, 2], classes=[1, 3]).ravel() clf = make_pipeline(XdawnTransformer(n_components=2), Vectorizer(), StandardScaler(), LogisticRegression()) # Define a monte-carlo cross-validation generator (reduce variance): cv = ShuffleSplit(len(y), 10, test_size=0.2, random_state=42) scores = cross_val_score(clf, X, y, cv=cv) class_balance = np.mean(y == y[0]) class_balance = max(class_balance, 1. - class_balance) print("Classification accuracy: %f / Chance level: %f" % (np.mean(scores), class_balance)) ############################################################################### # plot Xdawn patterns estimated on full data for visualization
# Decoding in sensor space using a linear SVM n_times = len(rt_epochs.times) from sklearn import preprocessing # noqa from sklearn.svm import SVC # noqa from sklearn.pipeline import Pipeline # noqa from sklearn.model_selection import cross_val_score, ShuffleSplit # noqa from mne.decoding import Vectorizer, FilterEstimator # noqa scores_x, scores, std_scores = [], [], [] # don't highpass filter because it's epoched data and the signal length # is small filt = FilterEstimator(rt_epochs.info, None, 40, fir_design='firwin') scaler = preprocessing.StandardScaler() vectorizer = Vectorizer() clf = SVC(C=1, kernel='linear') concat_classifier = Pipeline([('filter', filt), ('vector', vectorizer), ('scaler', scaler), ('svm', clf)]) data_picks = mne.pick_types(rt_epochs.info, meg='grad', eeg=False, eog=True, stim=False, exclude=raw.info['bads']) ax = plt.subplot(111) ax.set_xlabel('Trials') ax.set_ylabel('Classification score (% correct)') ax.set_title('Real-time decoding')
test_epochs = test_epochs.crop(0.2, 0.8) # Select epochs in [train_epochs] selects = select_events(train_epochs) selected_train_epochs = train_epochs[selects] display(train_epochs, selected_train_epochs, test_epochs) # Get X and y train_X, train_y = get_X_y(selected_train_epochs) test_X, test_y = get_X_y(test_epochs) # Fit and pred --------------------------------------------- # Init clf = make_pipeline( Vectorizer(), StandardScaler(), pca.PCA(n_components=.95), svm.SVC( gamma='scale', kernel='rbf', class_weight='balanced', )) # Fit print('Fitting') clf.fit(X=train_X, y=train_y) # Predict print('Predicting') crop_pred_y = clf.predict(X=test_X) break
design = metadata[predictors] # # dummy code cue variable # dummies = pd.get_dummies(design[predictors], drop_first=True) # design = pd.concat([design.drop(predictors, axis=1), dummies], axis=1) # design.cue_B = design.cue_B - design.cue_B.unique().mean() # create design matrix design = patsy.dmatrix("cue", design, return_type='dataframe') design = design[['cue[T.B]']] # 4.2) vectorise channel data for linear regression # data to be analysed dat = cues[subj].get_data() dat = dat[:, :, times_to_use] Y = Vectorizer().fit_transform(dat) # 4.3) fit linear model with sklearn's LinearRegression weights = compute_sample_weight(class_weight='balanced', y=metadata.cue.to_numpy()) linear_model = LinearRegression(n_jobs=n_jobs, fit_intercept=True) linear_model.fit(design, Y, sample_weight=weights) # 4.4) extract the resulting coefficients (i.e., betas) # extract betas coefs = get_coef(linear_model, 'coef_') inter = linear_model.intercept_ # 4.5) extract model r_squared r2 = r2_score(Y, linear_model.predict(design), multioutput='raw_values') # save model R-squared
evokeds = [] for sbj in sbjs: print(sbj) if sbj == 'VP12': #No REM here continue if os.path.exists(os.path.join(save_path, sbj + '.p')): continue #sleep_epochs = myload(base_path_data, typ='epoch_preprocessed', sbj=sbj, preload=True) # WAKE! sleep_epochs = myload(sleep_path_data, typ='epoch_preprocessed', sbj=sbj, preload=True) # SLEEP! sleep_epochs.event_id = sleep_event_id # event_id remapping. For wake this step works during preprocessing # SLEEP ! sleep_epochs = sleep_epochs.crop(tmin=tmin, tmax=tmax) X1, y1 = get_Xy_balanced(sleep_epochs, contrast1) clf = make_pipeline(Vectorizer(), StandardScaler(), LinearModel(LogisticRegression(max_iter = 4000))) #StandardScaler(), cv = StratifiedKFold(n_splits=2, shuffle=True) coef_folds = [] for train_idx, test_idx in cv.split(X1, y1): clf.fit(X1[train_idx], y=y1[train_idx]) #scores1.append(clf.score(X1[test_idx], y=y1[test_idx])) coef_folds.append(get_coef(clf, attr='patterns_', inverse_transform=True)) coef = np.asarray(coef_folds).mean(0).reshape([173, -1]) #mean folds and reshape evoked = EvokedArray(coef, sleep_epochs.info, tmin=tmin) evokeds.append(evoked) ga = mne.grand_average(evokeds) #SLEEP