def ml_classifier(inputs, targets, classifier=None, pipeline=None): """Uses sklearn to fit a model given inputs and targets Args: inputs: list containing (N trials * M channels) data segments of length(number of features). targets: list containing (N trials * M channels) of marker data (0 or 1). classifier: pre-trained lda classifier; if None train from scratch pipeline: name of pipeline to create if classifier is None Returns: classifier: classifier object """ pipeline_dict = { 'vect_lr': make_pipeline(Vectorizer(), StandardScaler(), LogisticRegression()), 'vecct_reglda': make_pipeline(Vectorizer(), LDA(shrinkage='auto', solver='eigen')), 'xdawn_reglda': make_pipeline(Xdawn(2, classes=[1]), Vectorizer(), LDA(shrinkage='auto', solver='eigen')), 'erpcov_ts': make_pipeline(ERPCovariances(), TangentSpace(), LogisticRegression()), 'erpcov_mdm': make_pipeline(ERPCovariances(), MDM()) } if not classifier and pipeline: classifier = pipeline_dict[pipeline.lower()] classifier.fit(inputs, targets) return classifier
def test_Xdawn_transform(): """Test transform of Xdawn""" x = np.random.randn(100,3,10) labels = np.array([0,1]).repeat(50) xd = Xdawn() xd.fit(x,labels) xd.transform(x)
def fit(self, X, y): """Fit. Estimate spatial filters and prototyped response for each classes. Parameters ---------- X : ndarray, shape (n_trials, n_channels, n_samples) ndarray of trials. y : ndarray shape (n_trials,) labels corresponding to each trial. Returns ------- self : XdawnCovariances instance The XdawnCovariances instance. """ yb = continuous2discrete(y, self.bins) self.Xd_ = Xdawn(nfilter=self.nfilter, classes=self.classes, estimator=self.xdawn_estimator, baseline_cov=self.baseline_cov) self.Xd_.fit(X, yb) self.P_ = self.Xd_.evokeds_ return self
def test_Xdawn_baselinecov(): """Test cov precomputation""" x = np.random.randn(100, 3, 10) labels = np.array([0, 1]).repeat(50) baseline_cov = np.identity(3) xd = Xdawn(baseline_cov=baseline_cov) xd.fit(x, labels) xd.transform(x)
def test_xdawn_baselinecov(rndstate, get_labels): """Test cov precomputation""" n_matrices, n_channels, n_times = 6, 5, 100 n_classes, default_nfilter = 2, 4 x = rndstate.randn(n_matrices, n_channels, n_times) labels = get_labels(n_matrices, n_classes) baseline_cov = np.identity(n_channels) xd = Xdawn(baseline_cov=baseline_cov) xd.fit(x, labels).transform(x) assert len(xd.filters_) == n_classes * default_nfilter for sfilt in xd.filters_: assert sfilt.shape == (n_channels, )
def P300_test(session_id): markers = P300_MARKERS epochs = get_session_erp_epochs(session_id, markers) conditions = OrderedDict() for i in range(len(markers)-1): conditions[markers[i]] = [i+1] clfs = OrderedDict() clfs['Vect + LR'] = make_pipeline(Vectorizer(), StandardScaler(), LogisticRegression()) clfs['Vect + RegLDA'] = make_pipeline(Vectorizer(), LDA(shrinkage='auto', solver='eigen')) clfs['Xdawn + RegLDA'] = make_pipeline(Xdawn(2, classes=[1]), Vectorizer(), LDA(shrinkage='auto', solver='eigen')) clfs['ERPCov + TS'] = make_pipeline(ERPCovariances(), TangentSpace(), LogisticRegression()) clfs['ERPCov + MDM'] = make_pipeline(ERPCovariances(), MDM()) methods_list = ['Vect + LR','Vect + RegLDA','Xdawn + RegLDA','ERPCov + TS','ERPCov + MDM'] # format data epochs.pick_types(eeg=True) X = epochs.get_data() * 1e6 times = epochs.times y = epochs.events[:, -1] # define cross validation cv = StratifiedShuffleSplit(n_splits=20, test_size=0.25, random_state=42) # run cross validation for each pipeline auc = [] methods = [] print('Calcul in progress...') for m in clfs: try: res = cross_val_score(clfs[m], X, y==2, scoring='roc_auc', cv=cv, n_jobs=-1) auc.extend(res) methods.extend([m]*len(res)) except: pass results = pd.DataFrame(data=auc, columns=['AUC']) results['Method'] = methods n_row,n_column = results.shape auc_means = [] for method in methods_list: auc = [] for i in range(n_row): if results.loc[i,'Method']== method: auc.append(results.loc[i,'AUC']) auc_means.append(np.mean(auc)) counter = 0 for i in range(len(methods_list)): color = 'green' if auc_means[i]>=0.7 else 'red' counter = counter +1 if auc_means[i]>=0.7 else counter return counter > 0, counter
from sklearn.cross_validation import KFold from sklearn.metrics import roc_auc_score from utils import (DownSampler, EpochsVectorizer, CospBoostingClassifier, epoch_data) dataframe1 = pd.read_csv('ecog_train_with_labels.csv') array_clfs = OrderedDict() # ERPs models array_clfs['XdawnCov'] = make_pipeline(XdawnCovariances(6, estimator='oas'), TangentSpace('riemann'), LogisticRegression('l2')) array_clfs['Xdawn'] = make_pipeline(Xdawn(12, estimator='oas'), DownSampler(5), EpochsVectorizer(), LogisticRegression('l2')) # Induced activity models baseclf = make_pipeline( ElectrodeSelection(10, metric=dict(mean='logeuclid', distance='riemann')), TangentSpace('riemann'), LogisticRegression('l1')) array_clfs['Cosp'] = make_pipeline( CospCovariances(fs=1000, window=32, overlap=0.95, fmax=300, fmin=1), CospBoostingClassifier(baseclf)) array_clfs['HankelCov'] = make_pipeline( DownSampler(2), HankelCovariances(delays=[2, 4, 8, 12, 16],
pipelines = {} # we have to do this because the classes are called 'Target' and 'NonTarget' # but the evaluation function uses a LabelEncoder, transforming them # to 0 and 1 labels_dict = {"Target": 1, "NonTarget": 0} # Riemannian geometry based classification pipelines["RG + LDA"] = make_pipeline( XdawnCovariances(nfilter=5, estimator="lwf", xdawn_estimator="scm"), TangentSpace(), LDA(solver="lsqr", shrinkage="auto"), ) pipelines["Xdw + LDA"] = make_pipeline( Xdawn(nfilter=2, estimator="scm"), Vectorizer(), LDA(solver="lsqr", shrinkage="auto") ) ############################################################################## # Evaluation # ---------- # # We define the paradigm (P300) and use all three datasets available for it. # The evaluation will return a dataframe containing AUCs for each permutation # and dataset size. paradigm = P300(resample=processing_sampling_rate) dataset = BNCI2014009() # Remove the slicing of the subject list to evaluate multiple subjects dataset.subject_list = dataset.subject_list[1:2] datasets = [dataset]
def test_Xdawn_fit(): """Test Fit of Xdawn""" x = np.random.randn(100,3,10) labels = np.array([0,1]).repeat(50) xd = Xdawn() xd.fit(x,labels)
def test_Xdawn_init(): """Test init of Xdawn""" xd = Xdawn()
X_train[:, channel_2d[i][0], channel_2d[i][1]] - mu[i]) / std[i] X_val[:, channel_2d[i][0], channel_2d[i][1]] = ( X_val[:, channel_2d[i][0], channel_2d[i][1]] - mu[i]) / std[i] return X_train, X_val results = 0 if __name__ == "__main__": f = [] auc = [] acc = [] methods = [] clfs = OrderedDict() clfs['Xdawn + RegLDA'] = make_pipeline( Xdawn(2), Vectorizer(), LDA(shrinkage='auto', solver='eigen')) for name in file: X1 = np.load('data/X_' + name + '.npy') y1 = np.load('data/Y_' + name + '.npy') X = X1[:, 4:9, :, 50:150] X = np.reshape(X, (-1, 9 * 5, 100)) y = y1.flatten() zero = np.sum(X, axis=-1)[0] != 0 X = X[:, zero, :] cv = StratifiedKFold(n_splits=10, random_state=0) for m in clfs: print name, m res1 = cross_val_score(clfs[m], X, y,
processing_sampling_rate = 128 pipelines = {} # we have to do this because the classes are called 'Target' and 'NonTarget' # but the evaluation function uses a LabelEncoder, transforming them # to 0 and 1 labels_dict = {"Target": 1, "NonTarget": 0} # Riemannian geometry based classification pipelines["RG+LDA"] = make_pipeline( XdawnCovariances(nfilter=5, estimator="lwf", xdawn_estimator="scm"), TangentSpace(), LDA(solver="lsqr", shrinkage="auto"), ) pipelines["Xdw+LDA"] = make_pipeline(Xdawn(nfilter=2, estimator="scm"), Vectorizer(), LDA(solver="lsqr", shrinkage="auto")) ############################################################################## # Evaluation # ---------- # # We define the paradigm (P300) and use all three datasets available for it. # The evaluation will return a dataframe containing AUCs for each permutation # and dataset size. paradigm = P300(resample=processing_sampling_rate) dataset = BNCI2014009() # Remove the slicing of the subject list to evaluate multiple subjects dataset.subject_list = dataset.subject_list[1:2]
), 'SVM': ( make_pipeline(Vectorizer(), SVC()), { 'svc__C': np.exp(np.linspace(-4, 4, 9)) }, ), 'CSP LDA': ( make_pipeline(CSP(), LDA(shrinkage='auto', solver='eigen')), { 'csp__n_components': (6, 9, 13), 'csp__cov_est': ('concat', 'epoch') }, ), 'Xdawn LDA': ( make_pipeline(Xdawn(2, classes=[1]), Vectorizer(), LDA(shrinkage='auto', solver='eigen')), {}, ), 'ERPCov TS LR': ( make_pipeline(ERPCovariances(estimator='oas'), TangentSpace(), LogisticRegression()), { 'erpcovariances__estimator': ('lwf', 'oas') }, ), 'ERPCov MDM': ( make_pipeline(ERPCovariances(), MDM()), { 'erpcovariances__estimator': ('lwf', 'oas') },