def calculate_generalization_across_time_decoding(epochs, picked_channels, y, fold_nr, resamplefreq, outputfile): if picked_channels: epochs.pick_channels(picked_channels) else: print 'no channels to pick' if resamplefreq: #resample epochs.resample(resamplefreq, npad=100, window='boxcar', n_jobs=1, copy=False, verbose=None) cv = StratifiedKFold(y=y, n_folds=fold_nr) # do a stratified cross-validation gat = GeneralizationAcrossTime(predict_mode='cross-validation', n_jobs=1, cv=cv, scorer=roc_auc_score, score_mode='fold-wise') # fit and score gat.fit(epochs, y=y) scores = gat.score(epochs) if outputfile: scipy.io.savemat(outputfile, {'scores': scores}) print('GAT DONE') return scores
def _get_data(): """Aux function for testing GAT viz""" gat = GeneralizationAcrossTime() raw = io.Raw(raw_fname, preload=False) events = read_events(event_name) picks = pick_types(raw.info, meg='mag', stim=False, ecg=False, eog=False, exclude='bads') picks = picks[1:13:3] decim = 30 # Test on time generalization within one condition with warnings.catch_warnings(record=True): epochs = Epochs(raw, events, event_id, tmin, tmax, picks=picks, baseline=(None, 0), preload=True, decim=decim) # Test default running gat = GeneralizationAcrossTime() gat.fit(epochs) gat.score(epochs) return gat
def quick_score(X, y, clf=None, scorer=None): from sklearn.cross_validation import KFold regression = (len(np.unique(y)) > 2) & isinstance(y[0], float) if scorer is None: scorer = scorer_spearman if regression else scorer_auc if clf is None: clf = RidgeCV(alphas=[(2 * C) ** -1 for C in [1e-4, 1e-2, 1]])\ if regression else force_predict(LogisticRegression(), axis=1) sel = np.where(~np.isnan(y))[0] X = X[sel, :, :] y = y[sel] epochs = mat2mne(X, sfreq=100) clf = make_pipeline(StandardScaler(), clf) cv = KFold(len(y), 5) if regression else None gat = GeneralizationAcrossTime(clf=clf, n_jobs=-1, scorer=scorer, cv=cv) gat.fit(epochs, y) gat.score(epochs, y) return gat
def _run(epochs, events, analysis): """Runs temporal generalization for a given subject and analysis""" print(subject, analysis['name']) # subselect the trials (e.g. exclude absent trials) with a # dataframe query defined in conditions.py query, condition = analysis['query'], analysis['condition'] sel = range(len(events)) if query is None \ else events.query(query).index sel = [ii for ii in sel if ~np.isnan(events[condition][sel][ii])] # The to-be-predicted value, for each trial: y = np.array(events[condition], dtype=np.float32) print analysis['name'], np.unique(y[sel]), len(sel) # Abort if there is no trial if len(sel) == 0: return # Apply analysis gat = GeneralizationAcrossTime(clf=analysis['clf'], cv=analysis['cv'], scorer=analysis['scorer'], n_jobs=-1) print(subject, analysis['name'], 'fit') gat.fit(epochs[sel], y=y[sel]) print(subject, analysis['name'], 'score') score = gat.score(epochs[sel], y=y[sel]) print(subject, analysis['name'], 'save') # save space if analysis['name'] not in ['probe_phase', 'target_circAngle']: # we'll need the estimator trained on the probe_phase and to generalize # to the target phase and prove that there is a significant signal. gat.estimators_ = None if analysis['name'] not in [ 'target_present', 'target_circAngle', 'probe_circAngle' ]: # We need these individual prediction to control for the correlation # between target and probe angle. gat.y_pred_ = None # Save analysis save([gat, analysis, sel, events], 'decod', subject=subject, analysis=analysis['name'], overwrite=True, upload=True) save([score, epochs.times], 'score', subject=subject, analysis=analysis['name'], overwrite=True, upload=True) return
def _get_data(tmin=-0.2, tmax=0.5, event_id=dict(aud_l=1, vis_l=3), event_id_gen=dict(aud_l=2, vis_l=4), test_times=None): """Aux function for testing GAT viz.""" with warnings.catch_warnings(record=True): # deprecated gat = GeneralizationAcrossTime() raw = read_raw_fif(raw_fname) raw.add_proj([], remove_existing=True) events = read_events(event_name) picks = pick_types(raw.info, meg='mag', stim=False, ecg=False, eog=False, exclude='bads') picks = picks[1:13:3] decim = 30 # Test on time generalization within one condition with warnings.catch_warnings(record=True): epochs = Epochs(raw, events, event_id, tmin, tmax, picks=picks, preload=True, decim=decim) epochs_list = [epochs[k] for k in event_id] equalize_epoch_counts(epochs_list) epochs = concatenate_epochs(epochs_list) # Test default running with warnings.catch_warnings(record=True): # deprecated gat = GeneralizationAcrossTime(test_times=test_times) gat.fit(epochs) gat.score(epochs) return gat
def decoding_analysis(c1, c2): c1.events[:, 2] = 0 c2.events[:, 2] = 1 c1.event_id['exp_sup_lon'] = 0 c2.event_id['exp_sup_sho'] = 1 epochs = mne.concatenate_epochs([c1, c2]) # td = TimeDecoding(predict_mode='cross-validation', n_jobs=1, scorer=roc_auc_score) # td.fit(epochs) # td.score(epochs) # td.plot('Subject: ', c1.info['subject_info'], chance=True) # GAT y = epochs.events[:, 2] # y = np.zeros(len(epochs.events), dtype=int) # y[epochs.events[:, 2] == 90] = 1 cv = StratifiedKFold(y=y) # do a stratified cross-validation gat = GeneralizationAcrossTime(predict_mode='cross-validation', n_jobs=1, cv=cv, scorer=roc_auc_score) # fit and score gat.fit(epochs, y=y) gat.score(epochs) # # plot # gat.plot(vmin=0, vmax=1) # gat.plot_diagonal() return gat
def _decod(subject, analysis): from mne.decoding import GeneralizationAcrossTime # if already computed let's just load it from disk fname_kwargs = dict(subject=subject, analysis=analysis['name'] + '_vhp') score_fname = paths('score', **fname_kwargs) if op.exists(score_fname): return load('score', **fname_kwargs) epochs = _get_epochs(subject) events = load('behavior', subject=subject) # Let's not recompute everything, this is just a control analysis print(subject, analysis['name']) epochs._data = epochs.get_data() epochs.preload = True epochs.crop(0., .900) epochs.decimate(2) query, condition = analysis['query'], analysis['condition'] sel = range(len(events)) if query is None else events.query(query).index sel = [ii for ii in sel if ~np.isnan(events[condition][sel][ii])] y = np.array(events[condition], dtype=np.float32) print analysis['name'], np.unique(y[sel]), len(sel) if len(sel) == 0: return # Apply analysis gat = GeneralizationAcrossTime(clf=analysis['clf'], cv=analysis['cv'], scorer=analysis['scorer'], n_jobs=-1) print(subject, analysis['name'], 'fit') gat.fit(epochs[sel], y=y[sel]) print(subject, analysis['name'], 'score') score = gat.score(epochs[sel], y=y[sel]) print(subject, analysis['name'], 'save') # save space gat.estimators_ = None gat.y_pred_ = None # Save analysis save([score, epochs.times], 'score', overwrite=True, upload=True, **fname_kwargs) return score, epochs.times
def _run(epochs, events, analysis): """Runs temporal generalization for a given subject and analysis""" print(subject, analysis['name']) # subselect the trials (e.g. exclude absent trials) with a # dataframe query defined in conditions.py query, condition = analysis['query'], analysis['condition'] sel = range(len(events)) if query is None \ else events.query(query).index sel = [ii for ii in sel if ~np.isnan(events[condition][sel][ii])] # The to-be-predicted value, for each trial: y = np.array(events[condition], dtype=np.float32) print analysis['name'], np.unique(y[sel]), len(sel) # Abort if there is no trial if len(sel) == 0: return # Apply analysis gat = GeneralizationAcrossTime(clf=analysis['clf'], cv=analysis['cv'], scorer=analysis['scorer'], n_jobs=-1) print(subject, analysis['name'], 'fit') gat.fit(epochs[sel], y=y[sel]) print(subject, analysis['name'], 'score') score = gat.score(epochs[sel], y=y[sel]) print(subject, analysis['name'], 'save') # save space if analysis['name'] not in ['probe_phase', 'target_circAngle']: # we'll need the estimator trained on the probe_phase and to generalize # to the target phase and prove that there is a significant signal. gat.estimators_ = None if analysis['name'] not in ['target_present', 'target_circAngle', 'probe_circAngle']: # We need these individual prediction to control for the correlation # between target and probe angle. gat.y_pred_ = None # Save analysis save([gat, analysis, sel, events], 'decod', subject=subject, analysis=analysis['name'], overwrite=True, upload=True) save([score, epochs.times], 'score', subject=subject, analysis=analysis['name'], overwrite=True, upload=True) return
def _get_data(tmin=-0.2, tmax=0.5, event_id=dict(aud_l=1, vis_l=3), event_id_gen=dict(aud_l=2, vis_l=4), test_times=None): """Aux function for testing GAT viz""" gat = GeneralizationAcrossTime() raw = read_raw_fif(raw_fname, preload=False, add_eeg_ref=False) raw.add_proj([], remove_existing=True) events = read_events(event_name) picks = pick_types(raw.info, meg='mag', stim=False, ecg=False, eog=False, exclude='bads') picks = picks[1:13:3] decim = 30 # Test on time generalization within one condition with warnings.catch_warnings(record=True): epochs = Epochs(raw, events, event_id, tmin, tmax, picks=picks, baseline=(None, 0), preload=True, decim=decim, add_eeg_ref=False) epochs_list = [epochs[k] for k in event_id] equalize_epoch_counts(epochs_list) epochs = concatenate_epochs(epochs_list) # Test default running gat = GeneralizationAcrossTime(test_times=test_times) gat.fit(epochs) gat.score(epochs) return gat
info = create_info(chan_names, 1, chan_types) events = np.c_[np.cumsum(np.ones(n_trial)), np.zeros(n_trial), np.zeros(n_trial)] epochs = EpochsArray(data, info, events) # RUN GAT ====================================================================== # SVR # --- fit & predict separately cos = lambda angles: np.cos(angle2circle(angles)) sin = lambda angles: np.sin(angle2circle(angles)) gats = list() for transform in [cos, sin]: scaler = StandardScaler() svr = SVR(C=1, kernel='linear') clf = Pipeline([('scaler', scaler), ('svr', svr)]) gat = GeneralizationAcrossTime(n_jobs=-1, clf=clf) gat.fit(epochs, y=transform(trial_angles)) gat.predict(epochs) gats.append(gat) # --- recombine predict_angles, true_angles = recombine_svr_prediction(gats[0], gats[1]) # --- score angle_errors_svr = compute_error_svr(predict_angles, true_angles) plt.matshow(np.mean(angle_errors_svr,axis=2)), plt.colorbar(), plt.show() # SVC Gat scaler = StandardScaler() svc = SVC(C=1, kernel='linear', probability=True) clf = Pipeline([('scaler', scaler), ('svc', svc)]) gat = GeneralizationAcrossTime(n_jobs=-1, clf=clf, predict_type='predict_proba')
def test_circular_classifiers(): from mne.decoding import GeneralizationAcrossTime from ..scorers import scorer_angle from sklearn.linear_model import Ridge, RidgeCV epochs, angles = make_circular_data() clf_list = [PolarRegression, AngularRegression, SVR_polar, SVR_angle] # XXX will be deprecated for clf_init in clf_list: for independent in [False, True]: if clf_init in [SVR_polar, SVR_angle]: if (not independent): continue clf = clf_init(clf=Ridge(random_state=0)) else: clf = clf_init(clf=Ridge(random_state=0), independent=independent) print clf_init, independent gat = GeneralizationAcrossTime(clf=clf, scorer=scorer_angle) gat.fit(epochs, y=angles) gat.predict(epochs) gat.score(y=angles) assert_true(np.abs(gat.scores_[0][0]) < .5) # chance level assert_true(gat.scores_[1][1] > 1.) # decode assert_true(gat.scores_[2][2] > 1.) # decode assert_true(gat.scores_[1][2] < -1.) # anti-generalize # Test args gat = GeneralizationAcrossTime(clf=RidgeCV(alphas=[1., 2.]), scorer=scorer_angle) gat.fit(epochs, y=angles) gat = GeneralizationAcrossTime(clf=RidgeCV(), scorer=scorer_angle) gat.fit(epochs, y=angles)
# ## We define the epochs and the labels n_cond1 = len(epochs_clas[event_id[0]]) n_cond2 = len(epochs_clas[event_id[1]]) y = np.r_[np.ones((n_cond1, )), np.zeros((n_cond2, ))] #------------------Start of Decoding Script -------------- cv = StratifiedKFold(y=y) # do a stratified cross-validation # define the GeneralizationAcrossTime object train_times = {'start': -0.1, 'stop': 2} gat = GeneralizationAcrossTime(predict_mode='cross-validation', train_times=train_times, n_jobs=6, cv=cv, scorer=roc_auc_score) # fit and score print("Fitting") gat.fit(epochs_clas, y=y) print("Scoring") gat.score(epochs_clas) # let's visualize now gat.plot() gat.plot_diagonal() # ------------------------------ # Time Decoding Generalization
mne.epochs.equalize_epoch_counts([epochs_classic, epochs_plan]) # Dirty hack # TODO: Check this from the Maxfilter side # epochs_classic.info['dev_head_t'] = epochs_plan.info['dev_head_t'] epochs = mne.concatenate_epochs([epochs_classic, epochs_plan]) # Crop and downsmample to make it faster epochs.crop(tmin=-3.5, tmax=0) epochs.resample(250) # Setup the y vector and GAT y = np.concatenate( (np.zeros(len(epochs["press"])), np.ones(len(epochs["plan"])))) gat = GeneralizationAcrossTime(predict_mode='mean-prediction', scorer="roc_auc", n_jobs=1) # Fit model # Scoring and visualise result gat.score(epochs, y=y) # Save model joblib.dump(gat, data_path + "decode_time_gen/%s_gat_2.jl" % subject) fig = gat.plot( title="Temporal Gen (Classic vs planning): left to right sub: %s" % subject) fig.savefig(data_path + "decode_time_gen/%s_gat_matrix_2.png" % subject)
query, condition = analysis['query'], analysis['condition'] sel = range(len(events)) if query is None \ else events.query(query).index sel = [ii for ii in sel if ~np.isnan(events[condition][sel][ii])] y = np.array(events[condition], dtype=np.float32) print analysis['name'], np.unique(y[sel]), len(sel) if len(sel) == 0: logger.warning('%s: no epoch in %s for %s.' % ( subject, data_type, analysis['name'])) continue # Apply analysis gat = GeneralizationAcrossTime(clf=analysis['clf'], cv=analysis['cv'], scorer=analysis['scorer'], n_jobs=-1) gat.fit(epochs[sel], y=y[sel]) gat.score(epochs[sel], y=y[sel]) # Save analysis pkl_fname = paths('decod', subject=subject, data_type=data_type, analysis=analysis['name'], log=True) # Save classifier results with open(pkl_fname, 'wb') as f: pickle.dump([gat, analysis, sel, events], f) # Plot fig = gat.plot_diagonal(show=False) report.add_figs_to_section(fig, ('%s %s %s: (diagonal)' %
events = mne.find_events(raw, stim_channel='UPPT001') event_id = {"faces": 1, "scrambled": 2} tmin, tmax = -0.1, 0.5 decim = 4 # decimate to make the example faster to run epochs = mne.Epochs(raw, events, event_id, tmin, tmax, proj=True, picks=picks, baseline=None, preload=True, reject=dict(mag=1.5e-12), decim=decim, verbose=False) # Define decoder. The decision_function is employed to use AUC for scoring gat = GeneralizationAcrossTime(predict_mode='cross-validation', predict_type='decision_function', n_jobs=2) # fit and score gat.fit(epochs) gat.score(epochs) gat.plot(vmin=0.1, vmax=0.9, title="Generalization Across Time (faces vs. scrambled)") gat.plot_diagonal() # plot decoding across time (correspond to GAT diagonal)
preload=True, reject=dict(mag=5e-12), decim=decim, verbose=False) # We will train the classifier on all left visual vs auditory trials # and test on all right visual vs auditory trials. # In this case, because the test data is independent from the train data, # we test the classifier of each fold and average the respective predictions. # Define events of interest triggers = epochs.events[:, 2] viz_vs_auditory = np.in1d(triggers, (1, 2)).astype(int) gat = GeneralizationAcrossTime(predict_mode='mean-prediction', n_jobs=1) # For our left events, which ones are visual? viz_vs_auditory_l = (triggers[np.in1d(triggers, (1, 3))] == 3).astype(int) # To make scikit-learn happy, we converted the bool array to integers # in the same line. This results in an array of zeros and ones: print("The unique classes' labels are: %s" % np.unique(viz_vs_auditory_l)) gat.fit(epochs[('AudL', 'VisL')], y=viz_vs_auditory_l) # For our right events, which ones are visual? viz_vs_auditory_r = (triggers[np.in1d(triggers, (2, 4))] == 4).astype(int) gat.score(epochs[('AudR', 'VisR')], y=viz_vs_auditory_r) gat.plot(title="Temporal Generalization (visual vs auditory): left to right")
############################################################################### # Generalization Across Time # -------------------------- # # This runs the analysis used in [1]_ and further detailed in [2]_ # # Here we'll use a stratified cross-validation scheme. # make response vector y = np.zeros(len(epochs.events), dtype=int) y[epochs.events[:, 2] == 3] = 1 cv = StratifiedKFold(y=y) # do a stratified cross-validation # define the GeneralizationAcrossTime object gat = GeneralizationAcrossTime(predict_mode='cross-validation', n_jobs=1, cv=cv, scorer=roc_auc_score) # fit and score gat.fit(epochs, y=y) gat.score(epochs) # let's visualize now gat.plot() gat.plot_diagonal() ############################################################################### # Exercise # -------- # - Can you improve the performance using full epochs and a common spatial # pattern (CSP) used by most BCI systems?
def test_generalization_across_time(): """Test time generalization decoding """ from sklearn.svm import SVC from sklearn.base import is_classifier # KernelRidge is used for testing 1) regression analyses 2) n-dimensional # predictions. from sklearn.kernel_ridge import KernelRidge from sklearn.preprocessing import LabelEncoder from sklearn.metrics import roc_auc_score, mean_squared_error epochs = make_epochs() y_4classes = np.hstack((epochs.events[:7, 2], epochs.events[7:, 2] + 1)) if check_version('sklearn', '0.18'): from sklearn.model_selection import (KFold, StratifiedKFold, ShuffleSplit, LeaveOneLabelOut) cv_shuffle = ShuffleSplit() cv = LeaveOneLabelOut() # XXX we cannot pass any other parameters than X and y to cv.split # so we have to build it before hand cv_lolo = [(train, test) for train, test in cv.split( X=y_4classes, y=y_4classes, labels=y_4classes)] # With sklearn >= 0.17, `clf` can be identified as a regressor, and # the scoring metrics can therefore be automatically assigned. scorer_regress = None else: from sklearn.cross_validation import (KFold, StratifiedKFold, ShuffleSplit, LeaveOneLabelOut) cv_shuffle = ShuffleSplit(len(epochs)) cv_lolo = LeaveOneLabelOut(y_4classes) # With sklearn < 0.17, `clf` cannot be identified as a regressor, and # therefore the scoring metrics cannot be automatically assigned. scorer_regress = mean_squared_error # Test default running gat = GeneralizationAcrossTime(picks='foo') assert_equal("<GAT | no fit, no prediction, no score>", "%s" % gat) assert_raises(ValueError, gat.fit, epochs) with warnings.catch_warnings(record=True): # check classic fit + check manual picks gat.picks = [0] gat.fit(epochs) # check optional y as array gat.picks = None gat.fit(epochs, y=epochs.events[:, 2]) # check optional y as list gat.fit(epochs, y=epochs.events[:, 2].tolist()) assert_equal(len(gat.picks_), len(gat.ch_names), 1) assert_equal( "<GAT | fitted, start : -0.200 (s), stop : 0.499 (s), no " "prediction, no score>", '%s' % gat) assert_equal(gat.ch_names, epochs.ch_names) # test different predict function: gat = GeneralizationAcrossTime(predict_method='decision_function') gat.fit(epochs) # With classifier, the default cv is StratifiedKFold assert_true(gat.cv_.__class__ == StratifiedKFold) gat.predict(epochs) assert_array_equal(np.shape(gat.y_pred_), (15, 15, 14, 1)) gat.predict_method = 'predict_proba' gat.predict(epochs) assert_array_equal(np.shape(gat.y_pred_), (15, 15, 14, 2)) gat.predict_method = 'foo' assert_raises(NotImplementedError, gat.predict, epochs) gat.predict_method = 'predict' gat.predict(epochs) assert_array_equal(np.shape(gat.y_pred_), (15, 15, 14, 1)) assert_equal( "<GAT | fitted, start : -0.200 (s), stop : 0.499 (s), " "predicted 14 epochs, no score>", "%s" % gat) gat.score(epochs) assert_true(gat.scorer_.__name__ == 'accuracy_score') # check clf / predict_method combinations for which the scoring metrics # cannot be inferred. gat.scorer = None gat.predict_method = 'decision_function' assert_raises(ValueError, gat.score, epochs) # Check specifying y manually gat.predict_method = 'predict' gat.score(epochs, y=epochs.events[:, 2]) gat.score(epochs, y=epochs.events[:, 2].tolist()) assert_equal( "<GAT | fitted, start : -0.200 (s), stop : 0.499 (s), " "predicted 14 epochs,\n scored " "(accuracy_score)>", "%s" % gat) with warnings.catch_warnings(record=True): gat.fit(epochs, y=epochs.events[:, 2]) old_mode = gat.predict_mode gat.predict_mode = 'super-foo-mode' assert_raises(ValueError, gat.predict, epochs) gat.predict_mode = old_mode gat.score(epochs, y=epochs.events[:, 2]) assert_true("accuracy_score" in '%s' % gat.scorer_) epochs2 = epochs.copy() # check _DecodingTime class assert_equal( "<DecodingTime | start: -0.200 (s), stop: 0.499 (s), step: " "0.050 (s), length: 0.050 (s), n_time_windows: 15>", "%s" % gat.train_times_) assert_equal( "<DecodingTime | start: -0.200 (s), stop: 0.499 (s), step: " "0.050 (s), length: 0.050 (s), n_time_windows: 15 x 15>", "%s" % gat.test_times_) # the y-check gat.predict_mode = 'mean-prediction' epochs2.events[:, 2] += 10 gat_ = copy.deepcopy(gat) with use_log_level('error'): assert_raises(ValueError, gat_.score, epochs2) gat.predict_mode = 'cross-validation' # Test basics # --- number of trials assert_true(gat.y_train_.shape[0] == gat.y_true_.shape[0] == len( gat.y_pred_[0][0]) == 14) # --- number of folds assert_true(np.shape(gat.estimators_)[1] == gat.cv) # --- length training size assert_true( len(gat.train_times_['slices']) == 15 == np.shape(gat.estimators_)[0]) # --- length testing sizes assert_true( len(gat.test_times_['slices']) == 15 == np.shape(gat.scores_)[0]) assert_true( len(gat.test_times_['slices'][0]) == 15 == np.shape(gat.scores_)[1]) # Test score_mode gat.score_mode = 'foo' assert_raises(ValueError, gat.score, epochs) gat.score_mode = 'fold-wise' scores = gat.score(epochs) assert_array_equal(np.shape(scores), [15, 15, 5]) gat.score_mode = 'mean-sample-wise' scores = gat.score(epochs) assert_array_equal(np.shape(scores), [15, 15]) gat.score_mode = 'mean-fold-wise' scores = gat.score(epochs) assert_array_equal(np.shape(scores), [15, 15]) gat.predict_mode = 'mean-prediction' with warnings.catch_warnings(record=True) as w: gat.score(epochs) assert_true( any("score_mode changed from " in str(ww.message) for ww in w)) # Test longer time window gat = GeneralizationAcrossTime(train_times={'length': .100}) with warnings.catch_warnings(record=True): gat2 = gat.fit(epochs) assert_true(gat is gat2) # return self assert_true(hasattr(gat2, 'cv_')) assert_true(gat2.cv_ != gat.cv) with warnings.catch_warnings(record=True): # not vectorizing scores = gat.score(epochs) assert_true(isinstance(scores, np.ndarray)) # type check assert_equal(len(scores[0]), len(scores)) # shape check assert_equal(len(gat.test_times_['slices'][0][0]), 2) # Decim training steps gat = GeneralizationAcrossTime(train_times={'step': .100}) with warnings.catch_warnings(record=True): gat.fit(epochs) gat.score(epochs) assert_true(len(gat.scores_) == len(gat.estimators_) == 8) # training time assert_equal(len(gat.scores_[0]), 15) # testing time # Test start stop training & test cv without n_fold params y_4classes = np.hstack((epochs.events[:7, 2], epochs.events[7:, 2] + 1)) train_times = dict(start=0.090, stop=0.250) gat = GeneralizationAcrossTime(cv=cv_lolo, train_times=train_times) # predict without fit assert_raises(RuntimeError, gat.predict, epochs) with warnings.catch_warnings(record=True): gat.fit(epochs, y=y_4classes) gat.score(epochs) assert_equal(len(gat.scores_), 4) assert_equal(gat.train_times_['times'][0], epochs.times[6]) assert_equal(gat.train_times_['times'][-1], epochs.times[9]) # Test score without passing epochs & Test diagonal decoding gat = GeneralizationAcrossTime(test_times='diagonal') with warnings.catch_warnings(record=True): # not vectorizing gat.fit(epochs) assert_raises(RuntimeError, gat.score) with warnings.catch_warnings(record=True): # not vectorizing gat.predict(epochs) scores = gat.score() assert_true(scores is gat.scores_) assert_equal(np.shape(gat.scores_), (15, 1)) assert_array_equal( [tim for ttime in gat.test_times_['times'] for tim in ttime], gat.train_times_['times']) # Test generalization across conditions gat = GeneralizationAcrossTime(predict_mode='mean-prediction', cv=2) with warnings.catch_warnings(record=True): gat.fit(epochs[0:6]) with warnings.catch_warnings(record=True): # There are some empty test folds because of n_trials gat.predict(epochs[7:]) gat.score(epochs[7:]) # Test training time parameters gat_ = copy.deepcopy(gat) # --- start stop outside time range gat_.train_times = dict(start=-999.) with use_log_level('error'): assert_raises(ValueError, gat_.fit, epochs) gat_.train_times = dict(start=999.) assert_raises(ValueError, gat_.fit, epochs) # --- impossible slices gat_.train_times = dict(step=.000001) assert_raises(ValueError, gat_.fit, epochs) gat_.train_times = dict(length=.000001) assert_raises(ValueError, gat_.fit, epochs) gat_.train_times = dict(length=999.) assert_raises(ValueError, gat_.fit, epochs) # Test testing time parameters # --- outside time range gat.test_times = dict(start=-999.) with warnings.catch_warnings(record=True): # no epochs in fold assert_raises(ValueError, gat.predict, epochs) gat.test_times = dict(start=999.) with warnings.catch_warnings(record=True): # no test epochs assert_raises(ValueError, gat.predict, epochs) # --- impossible slices gat.test_times = dict(step=.000001) with warnings.catch_warnings(record=True): # no test epochs assert_raises(ValueError, gat.predict, epochs) gat_ = copy.deepcopy(gat) gat_.train_times_['length'] = .000001 gat_.test_times = dict(length=.000001) with warnings.catch_warnings(record=True): # no test epochs assert_raises(ValueError, gat_.predict, epochs) # --- test time region of interest gat.test_times = dict(step=.150) with warnings.catch_warnings(record=True): # not vectorizing gat.predict(epochs) assert_array_equal(np.shape(gat.y_pred_), (15, 5, 14, 1)) # --- silly value gat.test_times = 'foo' with warnings.catch_warnings(record=True): # no test epochs assert_raises(ValueError, gat.predict, epochs) assert_raises(RuntimeError, gat.score) # --- unmatched length between training and testing time gat.test_times = dict(length=.150) assert_raises(ValueError, gat.predict, epochs) # --- irregular length training and testing times # 2 estimators, the first one is trained on two successive time samples # whereas the second one is trained on a single time sample. train_times = dict(slices=[[0, 1], [1]]) # The first estimator is tested once, the second estimator is tested on # two successive time samples. test_times = dict(slices=[[[0, 1]], [[0], [1]]]) gat = GeneralizationAcrossTime(train_times=train_times, test_times=test_times) gat.fit(epochs) with warnings.catch_warnings(record=True): # not vectorizing gat.score(epochs) assert_array_equal(np.shape(gat.y_pred_[0]), [1, len(epochs), 1]) assert_array_equal(np.shape(gat.y_pred_[1]), [2, len(epochs), 1]) # check cannot Automatically infer testing times for adhoc training times gat.test_times = None assert_raises(ValueError, gat.predict, epochs) svc = SVC(C=1, kernel='linear', probability=True) gat = GeneralizationAcrossTime(clf=svc, predict_mode='mean-prediction') with warnings.catch_warnings(record=True): gat.fit(epochs) # sklearn needs it: c.f. # https://github.com/scikit-learn/scikit-learn/issues/2723 # and http://bit.ly/1u7t8UT with use_log_level('error'): assert_raises(ValueError, gat.score, epochs2) gat.score(epochs) assert_true(0.0 <= np.min(scores) <= 1.0) assert_true(0.0 <= np.max(scores) <= 1.0) # Test that gets error if train on one dataset, test on another, and don't # specify appropriate cv: gat = GeneralizationAcrossTime(cv=cv_shuffle) gat.fit(epochs) with warnings.catch_warnings(record=True): gat.fit(epochs) gat.predict(epochs) assert_raises(ValueError, gat.predict, epochs[:10]) # Make CV with some empty train and test folds: # --- empty test fold(s) should warn when gat.predict() gat._cv_splits[0] = [gat._cv_splits[0][0], np.empty(0)] with warnings.catch_warnings(record=True) as w: gat.predict(epochs) assert_true(len(w) > 0) assert_true( any('do not have any test epochs' in str(ww.message) for ww in w)) # --- empty train fold(s) should raise when gat.fit() gat = GeneralizationAcrossTime(cv=[([0], [1]), ([], [0])]) assert_raises(ValueError, gat.fit, epochs[:2]) # Check that still works with classifier that output y_pred with # shape = (n_trials, 1) instead of (n_trials,) if check_version('sklearn', '0.17'): # no is_regressor before v0.17 gat = GeneralizationAcrossTime(clf=KernelRidge(), cv=2) epochs.crop(None, epochs.times[2]) gat.fit(epochs) # With regression the default cv is KFold and not StratifiedKFold assert_true(gat.cv_.__class__ == KFold) gat.score(epochs) # with regression the default scoring metrics is mean squared error assert_true(gat.scorer_.__name__ == 'mean_squared_error') # Test combinations of complex scenarios # 2 or more distinct classes n_classes = [2, 4] # 4 tested # nicely ordered labels or not le = LabelEncoder() y = le.fit_transform(epochs.events[:, 2]) y[len(y) // 2:] += 2 ys = (y, y + 1000) # Univariate and multivariate prediction svc = SVC(C=1, kernel='linear', probability=True) reg = KernelRidge() def scorer_proba(y_true, y_pred): return roc_auc_score(y_true, y_pred[:, 0]) # We re testing 3 scenario: default, classifier + predict_proba, regressor scorers = [None, scorer_proba, scorer_regress] predict_methods = [None, 'predict_proba', None] clfs = [svc, svc, reg] # Test all combinations for clf, predict_method, scorer in zip(clfs, predict_methods, scorers): for y in ys: for n_class in n_classes: for predict_mode in ['cross-validation', 'mean-prediction']: # Cannot use AUC for n_class > 2 if (predict_method == 'predict_proba' and n_class != 2): continue y_ = y % n_class with warnings.catch_warnings(record=True): gat = GeneralizationAcrossTime( cv=2, clf=clf, scorer=scorer, predict_mode=predict_mode) gat.fit(epochs, y=y_) gat.score(epochs, y=y_) # Check that scorer is correctly defined manually and # automatically. scorer_name = gat.scorer_.__name__ if scorer is None: if is_classifier(clf): assert_equal(scorer_name, 'accuracy_score') else: assert_equal(scorer_name, 'mean_squared_error') else: assert_equal(scorer_name, scorer.__name__)
def test_generalization_across_time(): """Test time generalization decoding """ from sklearn.svm import SVC raw = io.Raw(raw_fname, preload=False) events = read_events(event_name) picks = pick_types(raw.info, meg='mag', stim=False, ecg=False, eog=False, exclude='bads') picks = picks[0:2] decim = 30 # Test on time generalization within one condition with warnings.catch_warnings(record=True): epochs = Epochs(raw, events, event_id, tmin, tmax, picks=picks, baseline=(None, 0), preload=True, decim=decim) # Test default running gat = GeneralizationAcrossTime() assert_equal("<GAT | no fit, no prediction, no score>", "%s" % gat) with warnings.catch_warnings(record=True): gat.fit(epochs) assert_equal("<GAT | fitted, start : -0.200 (s), stop : 0.499 (s), no " "prediction, no score>", '%s' % gat) gat.predict(epochs) assert_equal("<GAT | fitted, start : -0.200 (s), stop : 0.499 (s), " "predict_type : 'predict' on 15 epochs, no score>", "%s" % gat) gat.score(epochs) assert_equal("<GAT | fitted, start : -0.200 (s), stop : 0.499 (s), " "predict_type : 'predict' on 15 epochs,\n scored " "(accuracy_score)>", "%s" % gat) with warnings.catch_warnings(record=True): gat.fit(epochs, y=epochs.events[:, 2]) old_type = gat.predict_type gat.predict_type = 'foo' assert_raises(ValueError, gat.predict, epochs) gat.predict_type = old_type old_mode = gat.predict_mode gat.predict_mode = 'super-foo-mode' assert_raises(ValueError, gat.predict, epochs) gat.predict_mode = old_mode gat.score(epochs, y=epochs.events[:, 2]) assert_true("accuracy_score" in '%s' % gat.scorer_) epochs2 = epochs.copy() # check _DecodingTime class assert_equal("<DecodingTime | start: -0.200 (s), stop: 0.499 (s), step: " "0.047 (s), length: 0.047 (s), n_time_windows: 15>", "%s" % gat.train_times) assert_equal("<DecodingTime | start: -0.200 (s), stop: 0.499 (s), step: " "0.047 (s), length: 0.047 (s), n_time_windows: 15 x 15>", "%s" % gat.test_times_) # the y-check gat.predict_mode = 'mean-prediction' epochs2.events[:, 2] += 10 assert_raises(ValueError, gat.score, epochs2) gat.predict_mode = 'cross-validation' # Test basics # --- number of trials assert_true(gat.y_train_.shape[0] == gat.y_true_.shape[0] == gat.y_pred_.shape[2] == 14) # --- number of folds assert_true(np.shape(gat.estimators_)[1] == gat.cv) # --- length training size assert_true(len(gat.train_times['slices']) == 15 == np.shape(gat.estimators_)[0]) # --- length testing sizes assert_true(len(gat.test_times_['slices']) == 15 == np.shape(gat.scores_)[0]) assert_true(len(gat.test_times_['slices'][0]) == 15 == np.shape(gat.scores_)[1]) # Test longer time window gat = GeneralizationAcrossTime(train_times={'length': .100}) with warnings.catch_warnings(record=True): gat2 = gat.fit(epochs) assert_true(gat is gat2) # return self assert_true(hasattr(gat2, 'cv_')) assert_true(gat2.cv_ != gat.cv) scores = gat.score(epochs) assert_true(isinstance(scores, list)) # type check assert_equal(len(scores[0]), len(scores)) # shape check assert_equal(len(gat.test_times_['slices'][0][0]), 2) # Decim training steps gat = GeneralizationAcrossTime(train_times={'step': .100}) with warnings.catch_warnings(record=True): gat.fit(epochs) gat.score(epochs) assert_equal(len(gat.scores_), 8) # Test start stop training gat = GeneralizationAcrossTime(train_times={'start': 0.090, 'stop': 0.250}) # predict without fit assert_raises(RuntimeError, gat.predict, epochs) with warnings.catch_warnings(record=True): gat.fit(epochs) gat.score(epochs) assert_equal(len(gat.scores_), 4) assert_equal(gat.train_times['times_'][0], epochs.times[6]) assert_equal(gat.train_times['times_'][-1], epochs.times[9]) # Test score without passing epochs gat = GeneralizationAcrossTime() with warnings.catch_warnings(record=True): gat.fit(epochs) assert_raises(RuntimeError, gat.score) gat.predict(epochs, test_times='diagonal') # Test diagonal decoding scores = gat.score() assert_true(scores is gat.scores_) assert_equal(np.shape(gat.scores_), (15, 1)) # Test generalization across conditions gat = GeneralizationAcrossTime(predict_mode='mean-prediction') with warnings.catch_warnings(record=True): gat.fit(epochs[0:6]) gat.predict(epochs[7:]) assert_raises(ValueError, gat.predict, epochs, test_times='hahahaha') assert_raises(RuntimeError, gat.score) gat.score(epochs[7:]) svc = SVC(C=1, kernel='linear', probability=True) gat = GeneralizationAcrossTime(clf=svc, predict_type='predict_proba', predict_mode='mean-prediction') with warnings.catch_warnings(record=True): gat.fit(epochs) # sklearn needs it: c.f. # https://github.com/scikit-learn/scikit-learn/issues/2723 # and http://bit.ly/1u7t8UT assert_raises(ValueError, gat.score, epochs2) gat.score(epochs) scores = sum(scores, []) # flatten assert_true(0.0 <= np.min(scores) <= 1.0) assert_true(0.0 <= np.max(scores) <= 1.0) # test various predict_type gat = GeneralizationAcrossTime(clf=svc, predict_type="predict_proba") with warnings.catch_warnings(record=True): gat.fit(epochs) gat.predict(epochs) # check that 2 class probabilistic estimates are [p, 1-p] assert_true(gat.y_pred_.shape[3] == 2) gat.score(epochs) # check that continuous prediction leads to AUC rather than accuracy assert_true("roc_auc_score" in '%s' % gat.scorer_) gat = GeneralizationAcrossTime(predict_type="decision_function") # XXX Sklearn doesn't like non-binary inputs. We could binarize the data, # or change Sklearn default behavior epochs.events[:, 2][epochs.events[:, 2] == 3] = 0 with warnings.catch_warnings(record=True): gat.fit(epochs) gat.predict(epochs) # check that 2 class non-probabilistic continuous estimates are [distance] assert_true(gat.y_pred_.shape[3] == 1) gat.score(epochs) # check that continuous prediction leads to AUC rather than accuracy assert_true("roc_auc_score" in '%s' % gat.scorer_) # Test that gets error if train on one dataset, test on another, and don't # specify appropriate cv: gat = GeneralizationAcrossTime() with warnings.catch_warnings(record=True): gat.fit(epochs) gat.predict(epochs) assert_raises(ValueError, gat.predict, epochs[:10]) # Test combinations of complex scenarios # 2 or more distinct classes n_classes = [2] # 4 tested # nicely ordered labels or not y = epochs.events[:, 2] y[len(y) // 2:] += 2 ys = (y, y + 1000) # Classifier and regressor svc = SVC(C=1, kernel='linear', probability=True) clfs = [svc] # SVR tested # Continuous, and probabilistic estimate predict_types = ['predict_proba', 'decision_function'] # Test all combinations for clf_n, clf in enumerate(clfs): for y in ys: for n_class in n_classes: for pt in predict_types: y_ = y % n_class with warnings.catch_warnings(record=True): gat = GeneralizationAcrossTime( cv=2, clf=clf, predict_type=pt) gat.fit(epochs, y=y_) gat.score(epochs, y=y_)
############################################################################### # Generalization Across Time # -------------------------- # # This runs the analysis used in [1]_ and further detailed in [2]_ # # Here we'll use a stratified cross-validation scheme. # make response vector y = np.zeros(len(epochs.events), dtype=int) y[epochs.events[:, 2] == 3] = 1 cv = StratifiedKFold(y=y) # do a stratified cross-validation # define the GeneralizationAcrossTime object gat = GeneralizationAcrossTime(predict_mode="cross-validation", n_jobs=1, cv=cv, scorer=roc_auc_score) # fit and score gat.fit(epochs, y=y) gat.score(epochs) # let's visualize now gat.plot() gat.plot_diagonal() ############################################################################### # Exercise # -------- # - Can you improve the performance using full epochs and a common spatial # pattern (CSP) used by most BCI systems? # - Explore other datasets from MNE (e.g. Face dataset from SPM to predict
mne.equalize_channels([epochs_classic, epochs_plan]) mne.epochs.equalize_epoch_counts([epochs_classic, epochs_plan]) # Dirty hack # TODO: Check this from the Maxfilter side # epochs_classic.info['dev_head_t'] = epochs_plan.info['dev_head_t'] epochs = mne.concatenate_epochs([epochs_classic, epochs_plan]) # Crop and downsmample to make it faster epochs.crop(tmin=-3.5, tmax=0) epochs.resample(250) # Setup the y vector and GAT y = np.concatenate( (np.zeros(len(epochs["press"])), np.ones(len(epochs["plan"])))) gat = GeneralizationAcrossTime( predict_mode='mean-prediction', scorer="roc_auc", n_jobs=1) # Fit model # Scoring and visualise result gat.score(epochs, y=y) # Save model joblib.dump(gat, data_path + "decode_time_gen/%s_gat_2.jl" % subject) fig = gat.plot( title="Temporal Gen (Classic vs planning): left to right sub: %s" % subject) fig.savefig(data_path + "decode_time_gen/%s_gat_matrix_2.png" % subject)
# decoding epochs = epochs.decimate(decim) n_tr = len(epochs.times) X = epochs[stim_list>0] y = stim_list[stim_list>0] # SVM parameters scaler = StandardScaler() # centers data by removing the mean and scales to unit variance # model = svm.SVC(C=1, kernel='linear', class_weight='auto') model = svm.LinearSVC(C=1, multi_class='ovr', class_weight='auto') clf = make_pipeline(scaler, model) cv = StratifiedKFold(y, n_fold) gat = GeneralizationAcrossTime( cv=cv, clf=clf, predict_mode='cross-validation', n_jobs=n_jobs, # train_times=dict(step=step, length=length) ) gat.fit(X, y) # score = gat.score(X, y) # gat.plot(vmin=.2, vmax=.3) # gat.plot_diagonal(chance=.25) ####################################################################################################### # now for the rsvp data fname = op.join(data_path, 'abse_' + subject + '_main.mat') epochs = sm_fieldtrip2mne(fname) n_trial = len(epochs)
evokeds = {key:epochs[key].average() for key in event_id.keys()} np468 = mne.combine_evoked([evokeds['np_4'],evokeds['np_6'],evokeds['np_8']], weights='equal') f1 = np468.plot_joint([.18,.3,.45,.6],title='Non Pop') axes = f1.get_axes() axes[0].set_ylim([-110, 110]) p468 = mne.combine_evoked([evokeds['p_4'],evokeds['p_6'],evokeds['p_8']], weights='equal') f2 = p468.plot_joint([.18,.3,.45,.6],title='Pop') axes = f2.get_axes() axes[0].set_ylim([-110, 110]) triggers = epochs.events[:, 2] gat = GeneralizationAcrossTime(predict_mode='cross-validation', n_jobs=12) #gat = GeneralizationAcrossTime(predict_mode='mean-prediction', n_jobs=12) ind = np.in1d(triggers, (4, 5, 6)).astype(int) gat.fit(epochs[('np_4', 'np_6', 'np_8' ,'p_4', 'p_6', 'p_8')], y=ind) gat.score(epochs[('np_4', 'np_6', 'np_8' ,'p_4', 'p_6', 'p_8')], y=ind) gat.plot(vmin=.55,vmax=.7) gat.plot_diagonal() ### np8_vs_p4 = (triggers[np.in1d(triggers, (3, 4))] == 4).astype(int) p8_vs_np4 = (triggers[np.in1d(triggers, (6, 1))] == 1).astype(int) p8_vs_np8 = (triggers[np.in1d(triggers, (6, 3))] == 3).astype(int) p6_vs_np6 = (triggers[np.in1d(triggers, (5, 2))] == 2).astype(int)
def test_generalization_across_time(): """Test time generalization decoding """ from sklearn.svm import SVC from sklearn.linear_model import RANSACRegressor, LinearRegression from sklearn.preprocessing import LabelEncoder from sklearn.metrics import mean_squared_error from sklearn.cross_validation import LeaveOneLabelOut epochs = make_epochs() # Test default running gat = GeneralizationAcrossTime(picks='foo') assert_equal("<GAT | no fit, no prediction, no score>", "%s" % gat) assert_raises(ValueError, gat.fit, epochs) with warnings.catch_warnings(record=True): # check classic fit + check manual picks gat.picks = [0] gat.fit(epochs) # check optional y as array gat.picks = None gat.fit(epochs, y=epochs.events[:, 2]) # check optional y as list gat.fit(epochs, y=epochs.events[:, 2].tolist()) assert_equal(len(gat.picks_), len(gat.ch_names), 1) assert_equal( "<GAT | fitted, start : -0.200 (s), stop : 0.499 (s), no " "prediction, no score>", '%s' % gat) assert_equal(gat.ch_names, epochs.ch_names) gat.predict(epochs) assert_equal( "<GAT | fitted, start : -0.200 (s), stop : 0.499 (s), " "predicted 14 epochs, no score>", "%s" % gat) gat.score(epochs) gat.score(epochs, y=epochs.events[:, 2]) gat.score(epochs, y=epochs.events[:, 2].tolist()) assert_equal( "<GAT | fitted, start : -0.200 (s), stop : 0.499 (s), " "predicted 14 epochs,\n scored " "(accuracy_score)>", "%s" % gat) with warnings.catch_warnings(record=True): gat.fit(epochs, y=epochs.events[:, 2]) old_mode = gat.predict_mode gat.predict_mode = 'super-foo-mode' assert_raises(ValueError, gat.predict, epochs) gat.predict_mode = old_mode gat.score(epochs, y=epochs.events[:, 2]) assert_true("accuracy_score" in '%s' % gat.scorer_) epochs2 = epochs.copy() # check _DecodingTime class assert_equal( "<DecodingTime | start: -0.200 (s), stop: 0.499 (s), step: " "0.047 (s), length: 0.047 (s), n_time_windows: 15>", "%s" % gat.train_times_) assert_equal( "<DecodingTime | start: -0.200 (s), stop: 0.499 (s), step: " "0.047 (s), length: 0.047 (s), n_time_windows: 15 x 15>", "%s" % gat.test_times_) # the y-check gat.predict_mode = 'mean-prediction' epochs2.events[:, 2] += 10 gat_ = copy.deepcopy(gat) assert_raises(ValueError, gat_.score, epochs2) gat.predict_mode = 'cross-validation' # Test basics # --- number of trials assert_true(gat.y_train_.shape[0] == gat.y_true_.shape[0] == len( gat.y_pred_[0][0]) == 14) # --- number of folds assert_true(np.shape(gat.estimators_)[1] == gat.cv) # --- length training size assert_true( len(gat.train_times_['slices']) == 15 == np.shape(gat.estimators_)[0]) # --- length testing sizes assert_true( len(gat.test_times_['slices']) == 15 == np.shape(gat.scores_)[0]) assert_true( len(gat.test_times_['slices'][0]) == 15 == np.shape(gat.scores_)[1]) # Test longer time window gat = GeneralizationAcrossTime(train_times={'length': .100}) with warnings.catch_warnings(record=True): gat2 = gat.fit(epochs) assert_true(gat is gat2) # return self assert_true(hasattr(gat2, 'cv_')) assert_true(gat2.cv_ != gat.cv) scores = gat.score(epochs) assert_true(isinstance(scores, list)) # type check assert_equal(len(scores[0]), len(scores)) # shape check assert_equal(len(gat.test_times_['slices'][0][0]), 2) # Decim training steps gat = GeneralizationAcrossTime(train_times={'step': .100}) with warnings.catch_warnings(record=True): gat.fit(epochs) gat.score(epochs) assert_true(len(gat.scores_) == len(gat.estimators_) == 8) # training time assert_equal(len(gat.scores_[0]), 15) # testing time # Test start stop training & test cv without n_fold params y_4classes = np.hstack((epochs.events[:7, 2], epochs.events[7:, 2] + 1)) gat = GeneralizationAcrossTime(cv=LeaveOneLabelOut(y_4classes), train_times={ 'start': 0.090, 'stop': 0.250 }) # predict without fit assert_raises(RuntimeError, gat.predict, epochs) with warnings.catch_warnings(record=True): gat.fit(epochs, y=y_4classes) gat.score(epochs) assert_equal(len(gat.scores_), 4) assert_equal(gat.train_times_['times'][0], epochs.times[6]) assert_equal(gat.train_times_['times'][-1], epochs.times[9]) # Test score without passing epochs & Test diagonal decoding gat = GeneralizationAcrossTime(test_times='diagonal') with warnings.catch_warnings(record=True): gat.fit(epochs) assert_raises(RuntimeError, gat.score) gat.predict(epochs) scores = gat.score() assert_true(scores is gat.scores_) assert_equal(np.shape(gat.scores_), (15, 1)) assert_array_equal( [tim for ttime in gat.test_times_['times'] for tim in ttime], gat.train_times_['times']) # Test generalization across conditions gat = GeneralizationAcrossTime(predict_mode='mean-prediction') with warnings.catch_warnings(record=True): gat.fit(epochs[0:6]) gat.predict(epochs[7:]) gat.score(epochs[7:]) # Test training time parameters gat_ = copy.deepcopy(gat) # --- start stop outside time range gat_.train_times = dict(start=-999.) assert_raises(ValueError, gat_.fit, epochs) gat_.train_times = dict(start=999.) assert_raises(ValueError, gat_.fit, epochs) # --- impossible slices gat_.train_times = dict(step=.000001) assert_raises(ValueError, gat_.fit, epochs) gat_.train_times = dict(length=.000001) assert_raises(ValueError, gat_.fit, epochs) gat_.train_times = dict(length=999.) assert_raises(ValueError, gat_.fit, epochs) # Test testing time parameters # --- outside time range gat.test_times = dict(start=-999.) assert_raises(ValueError, gat.predict, epochs) gat.test_times = dict(start=999.) assert_raises(ValueError, gat.predict, epochs) # --- impossible slices gat.test_times = dict(step=.000001) assert_raises(ValueError, gat.predict, epochs) gat_ = copy.deepcopy(gat) gat_.train_times_['length'] = .000001 gat_.test_times = dict(length=.000001) assert_raises(ValueError, gat_.predict, epochs) # --- test time region of interest gat.test_times = dict(step=.150) gat.predict(epochs) assert_array_equal(np.shape(gat.y_pred_), (15, 5, 14, 1)) # --- silly value gat.test_times = 'foo' assert_raises(ValueError, gat.predict, epochs) assert_raises(RuntimeError, gat.score) # --- unmatched length between training and testing time gat.test_times = dict(length=.150) assert_raises(ValueError, gat.predict, epochs) svc = SVC(C=1, kernel='linear', probability=True) gat = GeneralizationAcrossTime(clf=svc, predict_mode='mean-prediction') with warnings.catch_warnings(record=True): gat.fit(epochs) # sklearn needs it: c.f. # https://github.com/scikit-learn/scikit-learn/issues/2723 # and http://bit.ly/1u7t8UT assert_raises(ValueError, gat.score, epochs2) gat.score(epochs) scores = sum(scores, []) # flatten assert_true(0.0 <= np.min(scores) <= 1.0) assert_true(0.0 <= np.max(scores) <= 1.0) # Test that gets error if train on one dataset, test on another, and don't # specify appropriate cv: gat = GeneralizationAcrossTime() with warnings.catch_warnings(record=True): gat.fit(epochs) gat.predict(epochs) assert_raises(ValueError, gat.predict, epochs[:10]) # Check that still works with classifier that output y_pred with # shape = (n_trials, 1) instead of (n_trials,) gat = GeneralizationAcrossTime(clf=RANSACRegressor(LinearRegression()), cv=2) epochs.crop(None, epochs.times[2]) gat.fit(epochs) gat.predict(epochs) # Test combinations of complex scenarios # 2 or more distinct classes n_classes = [2, 4] # 4 tested # nicely ordered labels or not le = LabelEncoder() y = le.fit_transform(epochs.events[:, 2]) y[len(y) // 2:] += 2 ys = (y, y + 1000) # Univariate and multivariate prediction svc = SVC(C=1, kernel='linear') class SVC_proba(SVC): def predict(self, x): probas = super(SVC_proba, self).predict_proba(x) return probas[:, 0] svcp = SVC_proba(C=1, kernel='linear', probability=True) clfs = [svc, svcp] scorers = [None, mean_squared_error] # Test all combinations for clf, scorer in zip(clfs, scorers): for y in ys: for n_class in n_classes: y_ = y % n_class with warnings.catch_warnings(record=True): gat = GeneralizationAcrossTime(cv=2, clf=clf, scorer=scorer) gat.fit(epochs, y=y_) gat.score(epochs, y=y_)
def test_generalization_across_time(): """Test time generalization decoding """ from sklearn.svm import SVC from sklearn.base import is_classifier # KernelRidge is used for testing 1) regression analyses 2) n-dimensional # predictions. from sklearn.kernel_ridge import KernelRidge from sklearn.preprocessing import LabelEncoder from sklearn.metrics import roc_auc_score, mean_squared_error epochs = make_epochs() y_4classes = np.hstack((epochs.events[:7, 2], epochs.events[7:, 2] + 1)) if check_version('sklearn', '0.18'): from sklearn.model_selection import (KFold, StratifiedKFold, ShuffleSplit, LeaveOneLabelOut) cv_shuffle = ShuffleSplit() cv = LeaveOneLabelOut() # XXX we cannot pass any other parameters than X and y to cv.split # so we have to build it before hand cv_lolo = [(train, test) for train, test in cv.split( X=y_4classes, y=y_4classes, labels=y_4classes)] # With sklearn >= 0.17, `clf` can be identified as a regressor, and # the scoring metrics can therefore be automatically assigned. scorer_regress = None else: from sklearn.cross_validation import (KFold, StratifiedKFold, ShuffleSplit, LeaveOneLabelOut) cv_shuffle = ShuffleSplit(len(epochs)) cv_lolo = LeaveOneLabelOut(y_4classes) # With sklearn < 0.17, `clf` cannot be identified as a regressor, and # therefore the scoring metrics cannot be automatically assigned. scorer_regress = mean_squared_error # Test default running gat = GeneralizationAcrossTime(picks='foo') assert_equal("<GAT | no fit, no prediction, no score>", "%s" % gat) assert_raises(ValueError, gat.fit, epochs) with warnings.catch_warnings(record=True): # check classic fit + check manual picks gat.picks = [0] gat.fit(epochs) # check optional y as array gat.picks = None gat.fit(epochs, y=epochs.events[:, 2]) # check optional y as list gat.fit(epochs, y=epochs.events[:, 2].tolist()) assert_equal(len(gat.picks_), len(gat.ch_names), 1) assert_equal("<GAT | fitted, start : -0.200 (s), stop : 0.499 (s), no " "prediction, no score>", '%s' % gat) assert_equal(gat.ch_names, epochs.ch_names) # test different predict function: gat = GeneralizationAcrossTime(predict_method='decision_function') gat.fit(epochs) # With classifier, the default cv is StratifiedKFold assert_true(gat.cv_.__class__ == StratifiedKFold) gat.predict(epochs) assert_array_equal(np.shape(gat.y_pred_), (15, 15, 14, 1)) gat.predict_method = 'predict_proba' gat.predict(epochs) assert_array_equal(np.shape(gat.y_pred_), (15, 15, 14, 2)) gat.predict_method = 'foo' assert_raises(NotImplementedError, gat.predict, epochs) gat.predict_method = 'predict' gat.predict(epochs) assert_array_equal(np.shape(gat.y_pred_), (15, 15, 14, 1)) assert_equal("<GAT | fitted, start : -0.200 (s), stop : 0.499 (s), " "predicted 14 epochs, no score>", "%s" % gat) gat.score(epochs) assert_true(gat.scorer_.__name__ == 'accuracy_score') # check clf / predict_method combinations for which the scoring metrics # cannot be inferred. gat.scorer = None gat.predict_method = 'decision_function' assert_raises(ValueError, gat.score, epochs) # Check specifying y manually gat.predict_method = 'predict' gat.score(epochs, y=epochs.events[:, 2]) gat.score(epochs, y=epochs.events[:, 2].tolist()) assert_equal("<GAT | fitted, start : -0.200 (s), stop : 0.499 (s), " "predicted 14 epochs,\n scored " "(accuracy_score)>", "%s" % gat) with warnings.catch_warnings(record=True): gat.fit(epochs, y=epochs.events[:, 2]) old_mode = gat.predict_mode gat.predict_mode = 'super-foo-mode' assert_raises(ValueError, gat.predict, epochs) gat.predict_mode = old_mode gat.score(epochs, y=epochs.events[:, 2]) assert_true("accuracy_score" in '%s' % gat.scorer_) epochs2 = epochs.copy() # check _DecodingTime class assert_equal("<DecodingTime | start: -0.200 (s), stop: 0.499 (s), step: " "0.050 (s), length: 0.050 (s), n_time_windows: 15>", "%s" % gat.train_times_) assert_equal("<DecodingTime | start: -0.200 (s), stop: 0.499 (s), step: " "0.050 (s), length: 0.050 (s), n_time_windows: 15 x 15>", "%s" % gat.test_times_) # the y-check gat.predict_mode = 'mean-prediction' epochs2.events[:, 2] += 10 gat_ = copy.deepcopy(gat) with use_log_level('error'): assert_raises(ValueError, gat_.score, epochs2) gat.predict_mode = 'cross-validation' # Test basics # --- number of trials assert_true(gat.y_train_.shape[0] == gat.y_true_.shape[0] == len(gat.y_pred_[0][0]) == 14) # --- number of folds assert_true(np.shape(gat.estimators_)[1] == gat.cv) # --- length training size assert_true(len(gat.train_times_['slices']) == 15 == np.shape(gat.estimators_)[0]) # --- length testing sizes assert_true(len(gat.test_times_['slices']) == 15 == np.shape(gat.scores_)[0]) assert_true(len(gat.test_times_['slices'][0]) == 15 == np.shape(gat.scores_)[1]) # Test score_mode gat.score_mode = 'foo' assert_raises(ValueError, gat.score, epochs) gat.score_mode = 'fold-wise' scores = gat.score(epochs) assert_array_equal(np.shape(scores), [15, 15, 5]) gat.score_mode = 'mean-sample-wise' scores = gat.score(epochs) assert_array_equal(np.shape(scores), [15, 15]) gat.score_mode = 'mean-fold-wise' scores = gat.score(epochs) assert_array_equal(np.shape(scores), [15, 15]) gat.predict_mode = 'mean-prediction' with warnings.catch_warnings(record=True) as w: gat.score(epochs) assert_true(any("score_mode changed from " in str(ww.message) for ww in w)) # Test longer time window gat = GeneralizationAcrossTime(train_times={'length': .100}) with warnings.catch_warnings(record=True): gat2 = gat.fit(epochs) assert_true(gat is gat2) # return self assert_true(hasattr(gat2, 'cv_')) assert_true(gat2.cv_ != gat.cv) with warnings.catch_warnings(record=True): # not vectorizing scores = gat.score(epochs) assert_true(isinstance(scores, np.ndarray)) # type check assert_equal(len(scores[0]), len(scores)) # shape check assert_equal(len(gat.test_times_['slices'][0][0]), 2) # Decim training steps gat = GeneralizationAcrossTime(train_times={'step': .100}) with warnings.catch_warnings(record=True): gat.fit(epochs) gat.score(epochs) assert_true(len(gat.scores_) == len(gat.estimators_) == 8) # training time assert_equal(len(gat.scores_[0]), 15) # testing time # Test start stop training & test cv without n_fold params y_4classes = np.hstack((epochs.events[:7, 2], epochs.events[7:, 2] + 1)) train_times = dict(start=0.090, stop=0.250) gat = GeneralizationAcrossTime(cv=cv_lolo, train_times=train_times) # predict without fit assert_raises(RuntimeError, gat.predict, epochs) with warnings.catch_warnings(record=True): gat.fit(epochs, y=y_4classes) gat.score(epochs) assert_equal(len(gat.scores_), 4) assert_equal(gat.train_times_['times'][0], epochs.times[6]) assert_equal(gat.train_times_['times'][-1], epochs.times[9]) # Test score without passing epochs & Test diagonal decoding gat = GeneralizationAcrossTime(test_times='diagonal') with warnings.catch_warnings(record=True): # not vectorizing gat.fit(epochs) assert_raises(RuntimeError, gat.score) with warnings.catch_warnings(record=True): # not vectorizing gat.predict(epochs) scores = gat.score() assert_true(scores is gat.scores_) assert_equal(np.shape(gat.scores_), (15, 1)) assert_array_equal([tim for ttime in gat.test_times_['times'] for tim in ttime], gat.train_times_['times']) # Test generalization across conditions gat = GeneralizationAcrossTime(predict_mode='mean-prediction', cv=2) with warnings.catch_warnings(record=True): gat.fit(epochs[0:6]) with warnings.catch_warnings(record=True): # There are some empty test folds because of n_trials gat.predict(epochs[7:]) gat.score(epochs[7:]) # Test training time parameters gat_ = copy.deepcopy(gat) # --- start stop outside time range gat_.train_times = dict(start=-999.) with use_log_level('error'): assert_raises(ValueError, gat_.fit, epochs) gat_.train_times = dict(start=999.) assert_raises(ValueError, gat_.fit, epochs) # --- impossible slices gat_.train_times = dict(step=.000001) assert_raises(ValueError, gat_.fit, epochs) gat_.train_times = dict(length=.000001) assert_raises(ValueError, gat_.fit, epochs) gat_.train_times = dict(length=999.) assert_raises(ValueError, gat_.fit, epochs) # Test testing time parameters # --- outside time range gat.test_times = dict(start=-999.) with warnings.catch_warnings(record=True): # no epochs in fold assert_raises(ValueError, gat.predict, epochs) gat.test_times = dict(start=999.) with warnings.catch_warnings(record=True): # no test epochs assert_raises(ValueError, gat.predict, epochs) # --- impossible slices gat.test_times = dict(step=.000001) with warnings.catch_warnings(record=True): # no test epochs assert_raises(ValueError, gat.predict, epochs) gat_ = copy.deepcopy(gat) gat_.train_times_['length'] = .000001 gat_.test_times = dict(length=.000001) with warnings.catch_warnings(record=True): # no test epochs assert_raises(ValueError, gat_.predict, epochs) # --- test time region of interest gat.test_times = dict(step=.150) with warnings.catch_warnings(record=True): # not vectorizing gat.predict(epochs) assert_array_equal(np.shape(gat.y_pred_), (15, 5, 14, 1)) # --- silly value gat.test_times = 'foo' with warnings.catch_warnings(record=True): # no test epochs assert_raises(ValueError, gat.predict, epochs) assert_raises(RuntimeError, gat.score) # --- unmatched length between training and testing time gat.test_times = dict(length=.150) assert_raises(ValueError, gat.predict, epochs) # --- irregular length training and testing times # 2 estimators, the first one is trained on two successive time samples # whereas the second one is trained on a single time sample. train_times = dict(slices=[[0, 1], [1]]) # The first estimator is tested once, the second estimator is tested on # two successive time samples. test_times = dict(slices=[[[0, 1]], [[0], [1]]]) gat = GeneralizationAcrossTime(train_times=train_times, test_times=test_times) gat.fit(epochs) with warnings.catch_warnings(record=True): # not vectorizing gat.score(epochs) assert_array_equal(np.shape(gat.y_pred_[0]), [1, len(epochs), 1]) assert_array_equal(np.shape(gat.y_pred_[1]), [2, len(epochs), 1]) # check cannot Automatically infer testing times for adhoc training times gat.test_times = None assert_raises(ValueError, gat.predict, epochs) svc = SVC(C=1, kernel='linear', probability=True) gat = GeneralizationAcrossTime(clf=svc, predict_mode='mean-prediction') with warnings.catch_warnings(record=True): gat.fit(epochs) # sklearn needs it: c.f. # https://github.com/scikit-learn/scikit-learn/issues/2723 # and http://bit.ly/1u7t8UT with use_log_level('error'): assert_raises(ValueError, gat.score, epochs2) gat.score(epochs) assert_true(0.0 <= np.min(scores) <= 1.0) assert_true(0.0 <= np.max(scores) <= 1.0) # Test that gets error if train on one dataset, test on another, and don't # specify appropriate cv: gat = GeneralizationAcrossTime(cv=cv_shuffle) gat.fit(epochs) with warnings.catch_warnings(record=True): gat.fit(epochs) gat.predict(epochs) assert_raises(ValueError, gat.predict, epochs[:10]) # Make CV with some empty train and test folds: # --- empty test fold(s) should warn when gat.predict() gat._cv_splits[0] = [gat._cv_splits[0][0], np.empty(0)] with warnings.catch_warnings(record=True) as w: gat.predict(epochs) assert_true(len(w) > 0) assert_true(any('do not have any test epochs' in str(ww.message) for ww in w)) # --- empty train fold(s) should raise when gat.fit() gat = GeneralizationAcrossTime(cv=[([0], [1]), ([], [0])]) assert_raises(ValueError, gat.fit, epochs[:2]) # Check that still works with classifier that output y_pred with # shape = (n_trials, 1) instead of (n_trials,) if check_version('sklearn', '0.17'): # no is_regressor before v0.17 gat = GeneralizationAcrossTime(clf=KernelRidge(), cv=2) epochs.crop(None, epochs.times[2]) gat.fit(epochs) # With regression the default cv is KFold and not StratifiedKFold assert_true(gat.cv_.__class__ == KFold) gat.score(epochs) # with regression the default scoring metrics is mean squared error assert_true(gat.scorer_.__name__ == 'mean_squared_error') # Test combinations of complex scenarios # 2 or more distinct classes n_classes = [2, 4] # 4 tested # nicely ordered labels or not le = LabelEncoder() y = le.fit_transform(epochs.events[:, 2]) y[len(y) // 2:] += 2 ys = (y, y + 1000) # Univariate and multivariate prediction svc = SVC(C=1, kernel='linear', probability=True) reg = KernelRidge() def scorer_proba(y_true, y_pred): return roc_auc_score(y_true, y_pred[:, 0]) # We re testing 3 scenario: default, classifier + predict_proba, regressor scorers = [None, scorer_proba, scorer_regress] predict_methods = [None, 'predict_proba', None] clfs = [svc, svc, reg] # Test all combinations for clf, predict_method, scorer in zip(clfs, predict_methods, scorers): for y in ys: for n_class in n_classes: for predict_mode in ['cross-validation', 'mean-prediction']: # Cannot use AUC for n_class > 2 if (predict_method == 'predict_proba' and n_class != 2): continue y_ = y % n_class with warnings.catch_warnings(record=True): gat = GeneralizationAcrossTime( cv=2, clf=clf, scorer=scorer, predict_mode=predict_mode) gat.fit(epochs, y=y_) gat.score(epochs, y=y_) # Check that scorer is correctly defined manually and # automatically. scorer_name = gat.scorer_.__name__ if scorer is None: if is_classifier(clf): assert_equal(scorer_name, 'accuracy_score') else: assert_equal(scorer_name, 'mean_squared_error') else: assert_equal(scorer_name, scorer.__name__)
# decimate data decim = 2 epochs.decimate(decim) # only keep data > 0 ms & < 400 ms epochs.crop(-.100, .750) ###################################################################### # Decoding # set up a classifier based on a regularized Logistic Regression clf = LogisticRegression(C=1) # force the classifer to output a probabilistic prediction clf = force_predict(clf, axis=1) # insert a z-score normalization step before the classification clf = make_pipeline(StandardScaler(), clf) # initialize the GAT object gat = GeneralizationAcrossTime(clf=clf, scorer=scorer_auc, n_jobs=-1, cv=10) # select the trials where a target is presented for contrast in ['HL', 'EU', 'PR']: epochs_ = concatenate_epochs((epochs[contrast[0]], epochs[contrast[1]])) y = np.hstack((np.zeros(len(epochs[contrast[0]])), np.ones(len(epochs[contrast[1]])))) gat.fit(epochs_, y=y) fname = op.join(data_path, 's%i_%s_fit.pkl' % (subject, contrast)) with open(fname, 'wb') as f: pickle.dump(gat, f) # TODO: should save y_pred separately # predict + score scores = gat.score(epochs_, y=y)
# RUN GAT angle2circle = lambda angles: np.deg2rad(2 * (angles + 7.5)) circle2angle = lambda angles: np.rad2deg(2 * angles) / 2 - 7.5 cos = lambda angles: np.cos(angle2circle(angles)) sin = lambda angles: np.sin(angle2circle(angles)) scaler = StandardScaler() svr = SVR(C=1, kernel='linear') clf = Pipeline([('scaler', scaler), ('svr', svr)]) gats = list() for transform in [cos, sin]: gat = GeneralizationAcrossTime(n_jobs=1, clf=clf) gat.fit(epochs, y=transform(trial_angles)) gat.score(epochs, y=transform(trial_angles)) gats.append(gat) # ALIGN ANGLES cart2pol = lambda x, y: np.arctan2(y, x) pi = np.pi gatx = gats[0] gaty = gats[1] # get true angle true_x = gatx.y_train_ true_y = gaty.y_train_
sel = np.where(sel)[0] # reduce number or trials if too many XXX just for speed, remove if len(sel) > 400: import random random.shuffle(sel) sel = sel[0:400] y = np.array(events[cond_name].tolist()) # Apply contrast if clf_type['name']=='SVC': decoding_parameters = decoding_params[0]['values'] elif clf_type['name']=='SVR': decoding_parameters = decoding_params[1]['values'] gat = GeneralizationAcrossTime(**decoding_parameters) gat.fit(epochs[sel], y=y[sel]) gat.score(epochs[sel], y=y[sel]) # Plot fig = gat.plot_diagonal(show=False) report.add_figs_to_section(fig, ('%s %s: (decoding)' % (subject, cond_name)), subject) fig = gat.plot(show=False) report.add_figs_to_section(fig, ('%s %s: GAT' % (subject, cond_name)), subject) # Save contrast pkl_fname = op.join(data_path, subject, 'mvpas', '{}-decod_{}_{}{}.pickle'.format(subject, cond_name,clf_type['name'],fname_appendix))
lbl = LabelEncoder() y = lbl.fit_transform(epochs.events[:, -1]) print 'get ready for decoding ;)' # Generalization Across Time # default GAT: LogisticRegression with KFold (n=5) train_times = { 'start': tmin, 'stop': tmax, 'length': length, 'step': step } gat = GeneralizationAcrossTime(predict_mode='cross-validation', n_jobs=1, train_times=train_times, clf=clf, cv=n_folds) gat.fit(epochs, y=y) gat.score(epochs, y=y) np.save(fname_gat, gat.scores_) # store weights weights = list() for fold in range(n_folds): # weights explained: gat.estimator_[time_point][fold].steps[-1][-1].coef_ weights.append( np.vstack([ gat.estimators_[idx][fold].steps[-1][-1].coef_ for idx in range(len(epochs.times)) ]))
decim = 2 # decimate to make the example faster to run epochs = mne.Epochs(raw, events, event_id, -0.050, 0.400, proj=True, picks=picks, baseline=None, preload=True, reject=dict(mag=5e-12), decim=decim, verbose=False) # We will train the classifier on all left visual vs auditory trials # and test on all right visual vs auditory trials. # In this case, because the test data is independent from the train data, # we test the classifier of each fold and average the respective predictions. # Define events of interest triggers = epochs.events[:, 2] viz_vs_auditory = np.in1d(triggers, (1, 2)).astype(int) gat = GeneralizationAcrossTime(predict_mode='mean-prediction', n_jobs=1) # For our left events, which ones are visual? viz_vs_auditory_l = (triggers[np.in1d(triggers, (1, 3))] == 3).astype(int) # To make scikit-learn happy, we converted the bool array to integers # in the same line. This results in an array of zeros and ones: print("The unique classes' labels are: %s" % np.unique(viz_vs_auditory_l)) gat.fit(epochs[('AudL', 'VisL')], y=viz_vs_auditory_l) # For our right events, which ones are visual? viz_vs_auditory_r = (triggers[np.in1d(triggers, (2, 4))] == 4).astype(int) gat.score(epochs[('AudR', 'VisR')], y=viz_vs_auditory_r) gat.plot( title="Generalization Across Time (visual vs auditory): left to right")
cv = StratifiedKFold(n_splits=10, shuffle=True) # Create epochs to use for classification n_trial, n_chan, n_time = X.shape events = np.vstack((range(n_trial), np.zeros(n_trial, int), y.astype(int))).T chan_names = ['MEG %i' % chan for chan in range(n_chan)] chan_types = ['mag'] * n_chan sfreq = 250 info = create_info(chan_names, sfreq, chan_types) epochs = EpochsArray(data=X, info=info, events=events, verbose=False) epochs.times = selected_times[:n_time] epochs.crop(-3.8, None) # fit model and score gat = GeneralizationAcrossTime( scorer="accuracy", cv=cv, predict_method="predict") gat.fit(epochs, y=y) gat.score(epochs, y=y) # Save model joblib.dump(gat, data_path + "decode_time_gen/%s_gat_tr.jl" % subject) # make matrix plot and save it fig = gat.plot( cmap="viridis", title="Temporal Gen (Classic vs planning) for transitivity.") fig.savefig(data_path + "decode_time_gen/%s_gat_matrix_tr.png" % subject) fig = gat.plot_diagonal( chance=0.5, title="Temporal Gen (Classic vs planning) for transitivity") fig.savefig(data_path + "decode_time_gen/%s_gat_diagonal_tr.png" % subject)
def test_generalization_across_time(): """Test time generalization decoding """ from sklearn.svm import SVC from sklearn.linear_model import RANSACRegressor, LinearRegression from sklearn.preprocessing import LabelEncoder from sklearn.metrics import mean_squared_error from sklearn.cross_validation import LeaveOneLabelOut epochs = make_epochs() # Test default running gat = GeneralizationAcrossTime(picks='foo') assert_equal("<GAT | no fit, no prediction, no score>", "%s" % gat) assert_raises(ValueError, gat.fit, epochs) with warnings.catch_warnings(record=True): # check classic fit + check manual picks gat.picks = [0] gat.fit(epochs) # check optional y as array gat.picks = None gat.fit(epochs, y=epochs.events[:, 2]) # check optional y as list gat.fit(epochs, y=epochs.events[:, 2].tolist()) assert_equal(len(gat.picks_), len(gat.ch_names), 1) assert_equal("<GAT | fitted, start : -0.200 (s), stop : 0.499 (s), no " "prediction, no score>", '%s' % gat) assert_equal(gat.ch_names, epochs.ch_names) gat.predict(epochs) assert_equal("<GAT | fitted, start : -0.200 (s), stop : 0.499 (s), " "predicted 14 epochs, no score>", "%s" % gat) gat.score(epochs) gat.score(epochs, y=epochs.events[:, 2]) gat.score(epochs, y=epochs.events[:, 2].tolist()) assert_equal("<GAT | fitted, start : -0.200 (s), stop : 0.499 (s), " "predicted 14 epochs,\n scored " "(accuracy_score)>", "%s" % gat) with warnings.catch_warnings(record=True): gat.fit(epochs, y=epochs.events[:, 2]) old_mode = gat.predict_mode gat.predict_mode = 'super-foo-mode' assert_raises(ValueError, gat.predict, epochs) gat.predict_mode = old_mode gat.score(epochs, y=epochs.events[:, 2]) assert_true("accuracy_score" in '%s' % gat.scorer_) epochs2 = epochs.copy() # check _DecodingTime class assert_equal("<DecodingTime | start: -0.200 (s), stop: 0.499 (s), step: " "0.050 (s), length: 0.050 (s), n_time_windows: 15>", "%s" % gat.train_times_) assert_equal("<DecodingTime | start: -0.200 (s), stop: 0.499 (s), step: " "0.050 (s), length: 0.050 (s), n_time_windows: 15 x 15>", "%s" % gat.test_times_) # the y-check gat.predict_mode = 'mean-prediction' epochs2.events[:, 2] += 10 gat_ = copy.deepcopy(gat) assert_raises(ValueError, gat_.score, epochs2) gat.predict_mode = 'cross-validation' # Test basics # --- number of trials assert_true(gat.y_train_.shape[0] == gat.y_true_.shape[0] == len(gat.y_pred_[0][0]) == 14) # --- number of folds assert_true(np.shape(gat.estimators_)[1] == gat.cv) # --- length training size assert_true(len(gat.train_times_['slices']) == 15 == np.shape(gat.estimators_)[0]) # --- length testing sizes assert_true(len(gat.test_times_['slices']) == 15 == np.shape(gat.scores_)[0]) assert_true(len(gat.test_times_['slices'][0]) == 15 == np.shape(gat.scores_)[1]) # Test longer time window gat = GeneralizationAcrossTime(train_times={'length': .100}) with warnings.catch_warnings(record=True): gat2 = gat.fit(epochs) assert_true(gat is gat2) # return self assert_true(hasattr(gat2, 'cv_')) assert_true(gat2.cv_ != gat.cv) scores = gat.score(epochs) assert_true(isinstance(scores, list)) # type check assert_equal(len(scores[0]), len(scores)) # shape check assert_equal(len(gat.test_times_['slices'][0][0]), 2) # Decim training steps gat = GeneralizationAcrossTime(train_times={'step': .100}) with warnings.catch_warnings(record=True): gat.fit(epochs) gat.score(epochs) assert_true(len(gat.scores_) == len(gat.estimators_) == 8) # training time assert_equal(len(gat.scores_[0]), 15) # testing time # Test start stop training & test cv without n_fold params y_4classes = np.hstack((epochs.events[:7, 2], epochs.events[7:, 2] + 1)) gat = GeneralizationAcrossTime(cv=LeaveOneLabelOut(y_4classes), train_times={'start': 0.090, 'stop': 0.250}) # predict without fit assert_raises(RuntimeError, gat.predict, epochs) with warnings.catch_warnings(record=True): gat.fit(epochs, y=y_4classes) gat.score(epochs) assert_equal(len(gat.scores_), 4) assert_equal(gat.train_times_['times'][0], epochs.times[6]) assert_equal(gat.train_times_['times'][-1], epochs.times[9]) # Test score without passing epochs & Test diagonal decoding gat = GeneralizationAcrossTime(test_times='diagonal') with warnings.catch_warnings(record=True): gat.fit(epochs) assert_raises(RuntimeError, gat.score) gat.predict(epochs) scores = gat.score() assert_true(scores is gat.scores_) assert_equal(np.shape(gat.scores_), (15, 1)) assert_array_equal([tim for ttime in gat.test_times_['times'] for tim in ttime], gat.train_times_['times']) # Test generalization across conditions gat = GeneralizationAcrossTime(predict_mode='mean-prediction') with warnings.catch_warnings(record=True): gat.fit(epochs[0:6]) gat.predict(epochs[7:]) gat.score(epochs[7:]) # Test training time parameters gat_ = copy.deepcopy(gat) # --- start stop outside time range gat_.train_times = dict(start=-999.) assert_raises(ValueError, gat_.fit, epochs) gat_.train_times = dict(start=999.) assert_raises(ValueError, gat_.fit, epochs) # --- impossible slices gat_.train_times = dict(step=.000001) assert_raises(ValueError, gat_.fit, epochs) gat_.train_times = dict(length=.000001) assert_raises(ValueError, gat_.fit, epochs) gat_.train_times = dict(length=999.) assert_raises(ValueError, gat_.fit, epochs) # Test testing time parameters # --- outside time range gat.test_times = dict(start=-999.) assert_raises(ValueError, gat.predict, epochs) gat.test_times = dict(start=999.) assert_raises(ValueError, gat.predict, epochs) # --- impossible slices gat.test_times = dict(step=.000001) assert_raises(ValueError, gat.predict, epochs) gat_ = copy.deepcopy(gat) gat_.train_times_['length'] = .000001 gat_.test_times = dict(length=.000001) assert_raises(ValueError, gat_.predict, epochs) # --- test time region of interest gat.test_times = dict(step=.150) gat.predict(epochs) assert_array_equal(np.shape(gat.y_pred_), (15, 5, 14, 1)) # --- silly value gat.test_times = 'foo' assert_raises(ValueError, gat.predict, epochs) assert_raises(RuntimeError, gat.score) # --- unmatched length between training and testing time gat.test_times = dict(length=.150) assert_raises(ValueError, gat.predict, epochs) svc = SVC(C=1, kernel='linear', probability=True) gat = GeneralizationAcrossTime(clf=svc, predict_mode='mean-prediction') with warnings.catch_warnings(record=True): gat.fit(epochs) # sklearn needs it: c.f. # https://github.com/scikit-learn/scikit-learn/issues/2723 # and http://bit.ly/1u7t8UT assert_raises(ValueError, gat.score, epochs2) gat.score(epochs) scores = sum(scores, []) # flatten assert_true(0.0 <= np.min(scores) <= 1.0) assert_true(0.0 <= np.max(scores) <= 1.0) # Test that gets error if train on one dataset, test on another, and don't # specify appropriate cv: gat = GeneralizationAcrossTime() with warnings.catch_warnings(record=True): gat.fit(epochs) gat.predict(epochs) assert_raises(IndexError, gat.predict, epochs[:10]) # TODO JRK: test GAT with non-exhaustive CV (eg. train on 80%, test on 10%) # Check that still works with classifier that output y_pred with # shape = (n_trials, 1) instead of (n_trials,) gat = GeneralizationAcrossTime(clf=RANSACRegressor(LinearRegression()), cv=2) epochs.crop(None, epochs.times[2]) gat.fit(epochs) gat.predict(epochs) # Test combinations of complex scenarios # 2 or more distinct classes n_classes = [2, 4] # 4 tested # nicely ordered labels or not le = LabelEncoder() y = le.fit_transform(epochs.events[:, 2]) y[len(y) // 2:] += 2 ys = (y, y + 1000) # Univariate and multivariate prediction svc = SVC(C=1, kernel='linear') class SVC_proba(SVC): def predict(self, x): probas = super(SVC_proba, self).predict_proba(x) return probas[:, 0] svcp = SVC_proba(C=1, kernel='linear', probability=True) clfs = [svc, svcp] scorers = [None, mean_squared_error] # Test all combinations for clf, scorer in zip(clfs, scorers): for y in ys: for n_class in n_classes: y_ = y % n_class with warnings.catch_warnings(record=True): gat = GeneralizationAcrossTime(cv=2, clf=clf, scorer=scorer) gat.fit(epochs, y=y_) gat.score(epochs, y=y_)
print(__doc__) # Preprocess data data_path = spm_face.data_path() # Load and filter data, set up epochs raw_fname = data_path + '/MEG/spm/SPM_CTF_MEG_example_faces%d_3D_raw.fif' raw = mne.io.Raw(raw_fname % 1, preload=True) # Take first run picks = mne.pick_types(raw.info, meg=True, exclude='bads') raw.filter(1, 45, method='iir') events = mne.find_events(raw, stim_channel='UPPT001') event_id = {"faces": 1, "scrambled": 2} tmin, tmax = -0.1, 0.5 decim = 4 # decimate to make the example faster to run epochs = mne.Epochs(raw, events, event_id, tmin, tmax, proj=True, picks=picks, baseline=None, preload=True, reject=dict(mag=1.5e-12), decim=decim, verbose=False) # Define decoder. The decision function is employed to use cross-validation gat = GeneralizationAcrossTime(predict_mode='cross-validation', n_jobs=1) # fit and score gat.fit(epochs) gat.score(epochs) gat.plot(vmin=0.1, vmax=0.9, title="Generalization Across Time (faces vs. scrambled)") gat.plot_diagonal() # plot decoding across time (correspond to GAT diagonal)
epochs_clt_left.events[:, 2] = 0 epochs_clt_right.events[:, 2] = 1 epochs_clt_left.event_id = {"0": 0} epochs_clt_right.event_id = {"1": 1} epochs_data = mne.concatenate_epochs([epochs_clt_left, epochs_clt_right]) # Equalise channels and epochs, and concatenate epochs epochs_data.equalize_event_counts(["0", "1"]) # Classifier clf = make_pipeline(StandardScaler(), LogisticRegression(C=1)) # Setup the y vector and GAT gat = GeneralizationAcrossTime( predict_mode='mean-prediction', scorer="roc_auc", n_jobs=1) # Fit model print("fitting GAT") gat.fit(epochs_data) # Scoring print("Scoring GAT") gat.score(epochs_data) # Save model joblib.dump( gat, data_path + "decode_time_gen/%s_gat_allsensor-grad_ctl.jl" % subject) # make matrix plot and save it fig = gat.plot(cmap="viridis", title="Temporal Gen for subject: %s" % subject)
def simulate_model(sources, mixin, background, snr=.5, n_trial=100): """Run simulations : 1. Takes source activations in two visibility conditions: dict(high=(n_sources * n_times), low=(n_sources * n_times)) 2. Target presence/absence is coded in y vector and corresponds to the reverse activation in source space. 3. Takes a mixin matrix that project the data from source space to sensor space 4. Generates multiple low and high visibility trials. 5. Fit target presence (y) across all trials (both high and low visiblity), 6. Score target presence separately for high and low visibility trials 7. Fit and score target visibility (for simplicity reasons, we only have 2 visibility conditions. Consequently, we will fit a logistic regression and not a ridge like the one used for in empirical part of the paper.) """ n_source, n_chan = mixin.shape # add information X, y, visibility = list(), list(), list() for vis, source in sources.iteritems(): n_source, n_time = source.shape # define present and absent in source space present = np.stack([source + background] * (n_trial // 2)) absent = np.stack([background] * (n_trial // 2)) source = np.vstack((present, absent)) y_ = np.hstack((np.ones(n_trial // 2), -1 * np.ones(n_trial // 2))) # transform in sensor space sensor = np.dot(mixin.T, np.hstack((source))) sensor = np.reshape(sensor, [n_chan, -1, n_time]).transpose(1, 0, 2) # add sensor specific noise sensor += np.random.randn(n_trial, n_chan, n_time) / snr X.append(sensor) y.append(y_) visibility.append(int(vis == 'high') * np.ones(n_trial)) X = np.concatenate(X, axis=0) y = np.concatenate(y, axis=0) visibility = np.concatenate(visibility, axis=0) # shuffle trials idx = range(n_trial * 2) np.random.shuffle(idx) X, y, visibility = X[idx], y[idx], visibility[idx] # format to MNE epochs epochs = EpochsArray(X, create_info(n_chan, sfreq, 'mag'), tmin=times[0], proj=False, baseline=None) # Temporal generalization pipeline gat = GeneralizationAcrossTime(clf=analysis['clf'], cv=8, scorer=scorer_auc, n_jobs=-1, score_mode='mean-sample-wise') gat.fit(epochs, y=y) y_pred = gat.predict(epochs) y_pred = y_pred[:, :, :, 0].transpose(2, 0, 1) score = list() for vis in range(2): # select all absent trials + present at a given visibility sel = np.unique(np.hstack((np.where(y == -1)[0], np.where(visibility == vis)[0]))) score_ = scorer_auc(y[sel], y_pred[sel], n_jobs=-1) score.append(score_) # correlation with visibility sel = np.where(y == 1)[0] corr_vis = scorer_spearman(visibility[sel], y_pred[sel], n_jobs=-1) # decode visibility sel = np.where(y == 1)[0] # present trials only gat.fit(epochs[sel], y=visibility[sel]) score_vis = gat.score(epochs[sel], y=visibility[sel]) return np.array(score), np.squeeze(score_vis), np.squeeze(corr_vis)
def test_generalization_across_time(): """Test time generalization decoding """ from sklearn.svm import SVC from sklearn.preprocessing import LabelEncoder from sklearn.metrics import mean_squared_error raw = io.Raw(raw_fname, preload=False) events = read_events(event_name) picks = pick_types(raw.info, meg='mag', stim=False, ecg=False, eog=False, exclude='bads') picks = picks[0:2] decim = 30 # Test on time generalization within one condition with warnings.catch_warnings(record=True): epochs = Epochs(raw, events, event_id, tmin, tmax, picks=picks, baseline=(None, 0), preload=True, decim=decim) # Test default running gat = GeneralizationAcrossTime() assert_equal("<GAT | no fit, no prediction, no score>", "%s" % gat) assert_raises(ValueError, gat.fit, epochs, picks='foo') with warnings.catch_warnings(record=True): # check classic fit + check manual picks gat.fit(epochs, picks=[0]) # check optional y as array gat.fit(epochs, y=epochs.events[:, 2]) # check optional y as list gat.fit(epochs, y=epochs.events[:, 2].tolist()) assert_equal(len(gat.picks_), len(gat.ch_names), 1) assert_equal("<GAT | fitted, start : -0.200 (s), stop : 0.499 (s), no " "prediction, no score>", '%s' % gat) assert_equal(gat.ch_names, epochs.ch_names) gat.predict(epochs) assert_equal("<GAT | fitted, start : -0.200 (s), stop : 0.499 (s), " "predicted 14 epochs, no score>", "%s" % gat) gat.score(epochs) gat.score(epochs, y=epochs.events[:, 2]) gat.score(epochs, y=epochs.events[:, 2].tolist()) assert_equal("<GAT | fitted, start : -0.200 (s), stop : 0.499 (s), " "predicted 14 epochs,\n scored " "(accuracy_score)>", "%s" % gat) with warnings.catch_warnings(record=True): gat.fit(epochs, y=epochs.events[:, 2]) old_mode = gat.predict_mode gat.predict_mode = 'super-foo-mode' assert_raises(ValueError, gat.predict, epochs) gat.predict_mode = old_mode gat.score(epochs, y=epochs.events[:, 2]) assert_true("accuracy_score" in '%s' % gat.scorer_) epochs2 = epochs.copy() # check _DecodingTime class assert_equal("<DecodingTime | start: -0.200 (s), stop: 0.499 (s), step: " "0.047 (s), length: 0.047 (s), n_time_windows: 15>", "%s" % gat.train_times) assert_equal("<DecodingTime | start: -0.200 (s), stop: 0.499 (s), step: " "0.047 (s), length: 0.047 (s), n_time_windows: 15 x 15>", "%s" % gat.test_times_) # the y-check gat.predict_mode = 'mean-prediction' epochs2.events[:, 2] += 10 gat_ = copy.deepcopy(gat) assert_raises(ValueError, gat_.score, epochs2) gat.predict_mode = 'cross-validation' # Test basics # --- number of trials assert_true(gat.y_train_.shape[0] == gat.y_true_.shape[0] == len(gat.y_pred_[0][0]) == 14) # --- number of folds assert_true(np.shape(gat.estimators_)[1] == gat.cv) # --- length training size assert_true(len(gat.train_times['slices']) == 15 == np.shape(gat.estimators_)[0]) # --- length testing sizes assert_true(len(gat.test_times_['slices']) == 15 == np.shape(gat.scores_)[0]) assert_true(len(gat.test_times_['slices'][0]) == 15 == np.shape(gat.scores_)[1]) # Test longer time window gat = GeneralizationAcrossTime(train_times={'length': .100}) with warnings.catch_warnings(record=True): gat2 = gat.fit(epochs) assert_true(gat is gat2) # return self assert_true(hasattr(gat2, 'cv_')) assert_true(gat2.cv_ != gat.cv) scores = gat.score(epochs) assert_true(isinstance(scores, list)) # type check assert_equal(len(scores[0]), len(scores)) # shape check assert_equal(len(gat.test_times_['slices'][0][0]), 2) # Decim training steps gat = GeneralizationAcrossTime(train_times={'step': .100}) with warnings.catch_warnings(record=True): gat.fit(epochs) gat.score(epochs) assert_true(len(gat.scores_) == len(gat.estimators_) == 8) # training time assert_equal(len(gat.scores_[0]), 15) # testing time # Test start stop training gat = GeneralizationAcrossTime(train_times={'start': 0.090, 'stop': 0.250}) # predict without fit assert_raises(RuntimeError, gat.predict, epochs) with warnings.catch_warnings(record=True): gat.fit(epochs) gat.score(epochs) assert_equal(len(gat.scores_), 4) assert_equal(gat.train_times['times_'][0], epochs.times[6]) assert_equal(gat.train_times['times_'][-1], epochs.times[9]) # Test score without passing epochs gat = GeneralizationAcrossTime() with warnings.catch_warnings(record=True): gat.fit(epochs) assert_raises(RuntimeError, gat.score) gat.predict(epochs, test_times='diagonal') # Test diagonal decoding scores = gat.score() assert_true(scores is gat.scores_) assert_equal(np.shape(gat.scores_), (15, 1)) # Test generalization across conditions gat = GeneralizationAcrossTime(predict_mode='mean-prediction') with warnings.catch_warnings(record=True): gat.fit(epochs[0:6]) gat.predict(epochs[7:]) assert_raises(ValueError, gat.predict, epochs, test_times='hahahaha') assert_raises(RuntimeError, gat.score) gat.score(epochs[7:]) svc = SVC(C=1, kernel='linear', probability=True) gat = GeneralizationAcrossTime(clf=svc, predict_mode='mean-prediction') with warnings.catch_warnings(record=True): gat.fit(epochs) # sklearn needs it: c.f. # https://github.com/scikit-learn/scikit-learn/issues/2723 # and http://bit.ly/1u7t8UT assert_raises(ValueError, gat.score, epochs2) gat.score(epochs) scores = sum(scores, []) # flatten assert_true(0.0 <= np.min(scores) <= 1.0) assert_true(0.0 <= np.max(scores) <= 1.0) # Test that gets error if train on one dataset, test on another, and don't # specify appropriate cv: gat = GeneralizationAcrossTime() with warnings.catch_warnings(record=True): gat.fit(epochs) gat.predict(epochs) assert_raises(ValueError, gat.predict, epochs[:10]) # Test combinations of complex scenarios # 2 or more distinct classes n_classes = [2, 4] # 4 tested # nicely ordered labels or not le = LabelEncoder() y = le.fit_transform(epochs.events[:, 2]) y[len(y) // 2:] += 2 ys = (y, y + 1000) # Univariate and multivariate prediction svc = SVC(C=1, kernel='linear') class SVC_proba(SVC): def predict(self, x): probas = super(SVC_proba, self).predict_proba(x) return probas[:, 0] svcp = SVC_proba(C=1, kernel='linear', probability=True) clfs = [svc, svcp] scorers = [None, mean_squared_error] # Test all combinations for clf, scorer in zip(clfs, scorers): for y in ys: for n_class in n_classes: y_ = y % n_class with warnings.catch_warnings(record=True): gat = GeneralizationAcrossTime(cv=2, clf=clf) gat.fit(epochs, y=y_) gat.score(epochs, y=y_, scorer=scorer)
# linear regression reg = linear_regression(epochs, design_matrix, reg_names) reg[c_name].beta.save(fname_reg) print 'get ready for decoding ;)' train_times = { 'start': tmin, 'stop': tmax, 'length': length, 'step': step } cv = KFold(n=len(y), n_folds=n_folds, random_state=random_state) gat = GeneralizationAcrossTime(predict_mode='cross-validation', n_jobs=-1, train_times=train_times, scorer=rank_scorer, clf=clf, cv=cv) gat.fit(epochs, y=y) gat.score(epochs, y=y) print gat.scores_.shape np.save(fname_gat, gat.scores_) # store weights weights = list() for fold in range(n_folds): # weights explained: gat.estimator_[time_point][fold].steps[-1][-1].coef_ weights.append( np.vstack([ gat.estimators_[idx][fold].steps[-1][-1].coef_ for idx in range(len(epochs.times))
# Create epochs to use for classification n_trial, n_chan, n_time = X.shape events = np.vstack((range(n_trial), np.zeros(n_trial, int), y.astype(int))).T chan_names = ['MEG %i' % chan for chan in range(n_chan)] chan_types = ['mag'] * n_chan sfreq = 250 info = create_info(chan_names, sfreq, chan_types) epochs = EpochsArray(data=X, info=info, events=events, verbose=False) epochs.times = selected_times[:n_time] # make classifier clf = LogisticRegression(C=0.0001) # fit model and score gat = GeneralizationAcrossTime( clf=clf, scorer="roc_auc", cv=cv, predict_method="predict") gat.fit(epochs, y=y) gat.score(epochs, y=y) # Save model joblib.dump(gat, data_path + "decode_time_gen/gat_ge.jl") # make matrix plot and save it fig = gat.plot( cmap="viridis", title="Temporal Gen (Classic vs planning) for Global Eff.") fig.savefig(data_path + "decode_time_gen/gat_matrix_ge.png") fig = gat.plot_diagonal( chance=0.5, title="Temporal Gen (Classic vs planning) for Global eff.") fig.savefig(data_path + "decode_time_gen/gat_diagonal_ge.png")
# Create epochs to use for classification n_trial, n_chan, n_time = X.shape events = np.vstack((range(n_trial), np.zeros(n_trial, int), y.astype(int))).T chan_names = ['MEG %i' % chan for chan in range(n_chan)] chan_types = ['mag'] * n_chan sfreq = 250 info = create_info(chan_names, sfreq, chan_types) epochs = EpochsArray(data=X, info=info, events=events, verbose=False) epochs.times = selected_times[:n_time] # make classifier clf = LogisticRegression(C=0.0001) # fit model and score gat = GeneralizationAcrossTime(clf=clf, scorer="roc_auc", cv=cv, predict_method="predict") gat.fit(epochs, y=y) gat.score(epochs, y=y) # Save model joblib.dump(gat, data_path + "decode_time_gen/gat_ge.jl") # make matrix plot and save it fig = gat.plot(cmap="viridis", title="Temporal Gen (Classic vs planning) for Global Eff.") fig.savefig(data_path + "decode_time_gen/gat_matrix_ge.png") fig = gat.plot_diagonal( chance=0.5, title="Temporal Gen (Classic vs planning) for Global eff.") fig.savefig(data_path + "decode_time_gen/gat_diagonal_ge.png")
def test_generalization_across_time(): """Test time generalization decoding """ from sklearn.svm import SVC raw = io.Raw(raw_fname, preload=False) events = read_events(event_name) picks = pick_types(raw.info, meg='mag', stim=False, ecg=False, eog=False, exclude='bads') picks = picks[0:2] decim = 30 # Test on time generalization within one condition with warnings.catch_warnings(record=True): epochs = Epochs(raw, events, event_id, tmin, tmax, picks=picks, baseline=(None, 0), preload=True, decim=decim) # Test default running gat = GeneralizationAcrossTime() assert_equal("<GAT | no fit, no prediction, no score>", "%s" % gat) with warnings.catch_warnings(record=True): gat.fit(epochs) assert_equal( "<GAT | fitted, start : -0.200 (s), stop : 0.499 (s), no " "prediction, no score>", '%s' % gat) gat.predict(epochs) assert_equal( "<GAT | fitted, start : -0.200 (s), stop : 0.499 (s), " "predict_type : 'predict' on 15 epochs, no score>", "%s" % gat) gat.score(epochs) assert_equal( "<GAT | fitted, start : -0.200 (s), stop : 0.499 (s), " "predict_type : 'predict' on 15 epochs,\n scored " "(accuracy_score)>", "%s" % gat) with warnings.catch_warnings(record=True): gat.fit(epochs, y=epochs.events[:, 2]) old_type = gat.predict_type gat.predict_type = 'foo' assert_raises(ValueError, gat.predict, epochs) gat.predict_type = old_type old_mode = gat.predict_mode gat.predict_mode = 'super-foo-mode' assert_raises(ValueError, gat.predict, epochs) gat.predict_mode = old_mode gat.score(epochs, y=epochs.events[:, 2]) assert_true("accuracy_score" in '%s' % gat.scorer_) epochs2 = epochs.copy() # check _DecodingTime class assert_equal( "<DecodingTime | start: -0.200 (s), stop: 0.499 (s), step: " "0.047 (s), length: 0.047 (s), n_time_windows: 15>", "%s" % gat.train_times) assert_equal( "<DecodingTime | start: -0.200 (s), stop: 0.499 (s), step: " "0.047 (s), length: 0.047 (s), n_time_windows: 15 x 15>", "%s" % gat.test_times_) # the y-check gat.predict_mode = 'mean-prediction' epochs2.events[:, 2] += 10 assert_raises(ValueError, gat.score, epochs2) gat.predict_mode = 'cross-validation' # Test basics # --- number of trials assert_true(gat.y_train_.shape[0] == gat.y_true_.shape[0] == gat.y_pred_.shape[2] == 14) # --- number of folds assert_true(np.shape(gat.estimators_)[1] == gat.cv) # --- length training size assert_true( len(gat.train_times['slices']) == 15 == np.shape(gat.estimators_)[0]) # --- length testing sizes assert_true( len(gat.test_times_['slices']) == 15 == np.shape(gat.scores_)[0]) assert_true( len(gat.test_times_['slices'][0]) == 15 == np.shape(gat.scores_)[1]) # Test longer time window gat = GeneralizationAcrossTime(train_times={'length': .100}) with warnings.catch_warnings(record=True): gat2 = gat.fit(epochs) assert_true(gat is gat2) # return self assert_true(hasattr(gat2, 'cv_')) assert_true(gat2.cv_ != gat.cv) scores = gat.score(epochs) assert_true(isinstance(scores, list)) # type check assert_equal(len(scores[0]), len(scores)) # shape check assert_equal(len(gat.test_times_['slices'][0][0]), 2) # Decim training steps gat = GeneralizationAcrossTime(train_times={'step': .100}) with warnings.catch_warnings(record=True): gat.fit(epochs) gat.score(epochs) assert_equal(len(gat.scores_), 8) # Test start stop training gat = GeneralizationAcrossTime(train_times={'start': 0.090, 'stop': 0.250}) # predict without fit assert_raises(RuntimeError, gat.predict, epochs) with warnings.catch_warnings(record=True): gat.fit(epochs) gat.score(epochs) assert_equal(len(gat.scores_), 4) assert_equal(gat.train_times['times_'][0], epochs.times[6]) assert_equal(gat.train_times['times_'][-1], epochs.times[9]) # Test diagonal decoding gat = GeneralizationAcrossTime() with warnings.catch_warnings(record=True): gat.fit(epochs) scores = gat.score(epochs, test_times='diagonal') assert_true(scores is gat.scores_) assert_equal(np.shape(gat.scores_), (15, 1)) # Test generalization across conditions gat = GeneralizationAcrossTime(predict_mode='mean-prediction') with warnings.catch_warnings(record=True): gat.fit(epochs[0:6]) gat.predict(epochs[7:]) assert_raises(ValueError, gat.predict, epochs, test_times='hahahaha') gat.score(epochs[7:]) svc = SVC(C=1, kernel='linear', probability=True) gat = GeneralizationAcrossTime(clf=svc, predict_type='predict_proba', predict_mode='mean-prediction') with warnings.catch_warnings(record=True): gat.fit(epochs) # sklearn needs it: c.f. # https://github.com/scikit-learn/scikit-learn/issues/2723 # and http://bit.ly/1u7t8UT assert_raises(ValueError, gat.score, epochs2) gat.score(epochs) scores = sum(scores, []) # flatten assert_true(0.0 <= np.min(scores) <= 1.0) assert_true(0.0 <= np.max(scores) <= 1.0) # test various predict_type gat = GeneralizationAcrossTime(clf=svc, predict_type="predict_proba") with warnings.catch_warnings(record=True): gat.fit(epochs) gat.predict(epochs) # check that 2 class probabilistic estimates are [p, 1-p] assert_true(gat.y_pred_.shape[3] == 2) gat.score(epochs) # check that continuous prediction leads to AUC rather than accuracy assert_true("roc_auc_score" in '%s' % gat.scorer_) gat = GeneralizationAcrossTime(predict_type="decision_function") # XXX Sklearn doesn't like non-binary inputs. We could binarize the data, # or change Sklearn default behavior epochs.events[:, 2][epochs.events[:, 2] == 3] = 0 with warnings.catch_warnings(record=True): gat.fit(epochs) gat.predict(epochs) # check that 2 class non-probabilistic continuous estimates are [distance] assert_true(gat.y_pred_.shape[3] == 1) gat.score(epochs) # check that continuous prediction leads to AUC rather than accuracy assert_true("roc_auc_score" in '%s' % gat.scorer_) # Test that gets error if train on one dataset, test on another, and don't # specify appropriate cv: gat = GeneralizationAcrossTime() with warnings.catch_warnings(record=True): gat.fit(epochs) gat.predict(epochs) assert_raises(ValueError, gat.predict, epochs[:10]) # Test combinations of complex scenarios # 2 or more distinct classes n_classes = [2] # 4 tested # nicely ordered labels or not y = epochs.events[:, 2] y[len(y) // 2:] += 2 ys = (y, y + 1000) # Classifier and regressor svc = SVC(C=1, kernel='linear', probability=True) clfs = [svc] # SVR tested # Continuous, and probabilistic estimate predict_types = ['predict_proba', 'decision_function'] # Test all combinations for clf_n, clf in enumerate(clfs): for y in ys: for n_class in n_classes: for pt in predict_types: y_ = y % n_class with warnings.catch_warnings(record=True): gat = GeneralizationAcrossTime(cv=2, clf=clf, predict_type=pt) gat.fit(epochs, y=y_) gat.score(epochs, y=y_)