def run_gat(name, decoder="ridge"): """ Function to run Generalization Across Time (GAT). Parameters ---------- name: str Name (pseudonym) of individual subject. decoder: str Specify type of classifier -'ridge' for Ridge Regression (default),'lin-svm' for linear SVM 'svm' for nonlinear (RBF) SVM and 'log_reg' for Logistic Regression """ # load high cloze epochs epochs = get_epochs(name)['song', 'voice'] # specify whether to use a linear or nonlinear SVM if SVM is used lin = '' # if not svm it doesn't matter, both log_reg and ridge are linear if "svm" in decoder: decoder, lin = decoder.split("-") # build classifier pipeline # # pick a machine learning algorithm to use (ridge/SVM/logistic regression) decoder_dict = { "ridge": RidgeClassifier(class_weight='balanced', random_state=42, solver="sag"), "svm": SVC(class_weight='balanced', kernel=("rbf" if "non" in lin else "linear"), random_state=42), "log_reg": LogisticRegression(class_weight='balanced', random_state=42) } clf = make_pipeline(StandardScaler(), decoder_dict[decoder]) gen_clf = GeneralizingEstimator(clf, scoring="roc_auc", n_jobs=4) scores = cross_val_multiscore(gen_clf, epochs.get_data(), epochs.events[:, -1], cv=5, n_jobs=4).mean(0) data = epochs.get_data() labels = epochs.events[:, -1] cv = StratifiedKFold(n_splits=5, random_state=42) # calculate prediction confidence scores preds = np.empty((len(labels), 225, 225)) for train, test in cv.split(data, labels): gen_clf.fit(data[train], labels[train]) d = gen_clf.decision_function(data[test]) preds[test] = d return scores, preds # return subject scores and prediction confidence
def run_gat(subj, decoder="ridge", n_jobs=2): """ Function to run Generalization Across Time (GAT). Parameters ---------- subj: int decoder: str Specify type of classifier -'ridge' for Ridge Regression (default), 'lin-svm' for linear SVM 'svm' for nonlinear (RBF) SVM and 'log_reg' for Logistic Regression n_jobs: int The number of jobs to run in parallel. """ # load cue A and cue B epochs epochs = get_epochs(subj)['Correct A', 'Correct B'] # specify whether to use a linear or nonlinear SVM if SVM is used lin = '' # if not svm it doesn't matter, both log_reg and ridge are linear if "svm" in decoder: decoder, lin = decoder.split("-") # build classifier pipeline # # pick a machine learning algorithm to use (ridge/SVM/logistic regression) decoder_dict = { "ridge": RidgeClassifier(class_weight='balanced', random_state=42, solver="sag"), "svm": SVC(class_weight='balanced', kernel=("rbf" if "non" in lin else "linear"), random_state=42), "log_reg": LogisticRegression(class_weight='balanced', random_state=42) } # get data and targets data = epochs.get_data() labels = epochs.events[:, -1] # create classifier pipeline clf = make_pipeline(StandardScaler(), decoder_dict[decoder]) gen_clf = GeneralizingEstimator(clf, scoring="roc_auc", n_jobs=n_jobs) # compute cross validated performance scores scores = cross_val_multiscore(gen_clf, data, labels, cv=5, n_jobs=n_jobs).mean(0) # calculate prediction confidence scores cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42) preds = np.empty((len(labels), data.shape[2], data.shape[2])) for train, test in cv.split(data, labels): gen_clf.fit(data[train], labels[train]) d = gen_clf.decision_function(data[test]) preds[test] = d # compute topographical patterns dat = Vectorizer().fit_transform(data) clf.fit(dat, labels) dat = dat - dat.mean(0, keepdims=True) # look for the type of classifier and get the weights if decoder == 'ridge': filt_ = clf.named_steps.ridgeclassifier.coef_.copy() elif decoder == 'svm': filt_ = clf.named_steps.svc.coef_.copy() elif decoder == 'log_reg': filt_ = clf.named_steps.logisticregression.coef_.copy() # Compute patterns using Haufe's trick: A = Cov_X . W . Precision_Y # cf.Haufe, et al., 2014, NeuroImage, # doi:10.1016/j.neuroimage.2013.10.067) inv_y = 1. patt_ = np.cov(dat.T).dot(filt_.T.dot(inv_y)).T # store the patterns accordingly if decoder == 'ridge': clf.named_steps.ridgeclassifier.patterns_ = patt_ elif decoder == 'svm': clf.named_steps.svc.patterns_ = patt_ elif decoder == 'log_reg': clf.named_steps.logisticregression.patterns_ = patt_ # back transform using steps in pipeline patterns = get_coef(clf, 'patterns_', inverse_transform=True) # return subject scores, prediction confidence and topographical patterns return scores, preds, patterns