def fit(self, X, y):
        # validate
        X, y = check_X_y(X, y, allow_nd=True)
        X = check_array(X, allow_nd=True)

        # set internal vars
        self.classes_ = unique_labels(y)
        self.X_ = X
        self.y_ = y

        ##################################################
        # split X into train and test sets, so that
        # grid search can be performed on train set only
        seed = 7
        np.random.seed(seed)
        #X_TRAIN, X_TEST, y_TRAIN, y_TEST = train_test_split(X, y, test_size=0.25, random_state=seed)

        for epoch_trim in self.epoch_bounds:
            for bandpass in self.bandpass_filters:

                X_train, X_test, y_train, y_test = train_test_split(
                    X, y, test_size=0.25, random_state=seed)

                # X_train = np.copy(X_TRAIN)
                # X_test = np.copy(X_TEST)
                # y_train = np.copy(y_TRAIN)
                # y_test = np.copy(y_TEST)

                # separate out inputs that are tuples
                bandpass_start, bandpass_end = bandpass
                epoch_trim_start, epoch_trim_end = epoch_trim

                # bandpass filter coefficients
                b, a = butter(
                    5,
                    np.array([bandpass_start, bandpass_end]) /
                    (self.sfreq * 0.5), 'bandpass')

                # filter and crop TRAINING SET
                X_train = self.preprocess_X(X_train, b, a, epoch_trim_start,
                                            epoch_trim_end)
                # validate
                X_train, y_train = check_X_y(X_train, y_train, allow_nd=True)
                X_train = check_array(X_train, allow_nd=True)

                # filter and crop TEST SET
                X_test = self.preprocess_X(X_test, b, a, epoch_trim_start,
                                           epoch_trim_end)
                # validate
                X_test, y_test = check_X_y(X_test, y_test, allow_nd=True)
                X_test = check_array(X_test, allow_nd=True)

                ###########################################################################
                # self-tune CSP to find optimal number of filters to use at these settings
                #[best_num_filters, best_num_filters_score] = self.self_tune(X_train, y_train)
                best_num_filters = 5

                # as an option, we could tune optimal CSP filter num against complete train set
                #X_tune = self.preprocess_X(X, b, a, epoch_trim_start, epoch_trim_end)
                #[best_num_filters, best_num_filters_score] = self.self_tune(X_tune, y)

                # now use this insight to really fit with optimal CSP spatial filters
                """
				reg : float | str | None (default None)
			        if not None, allow regularization for covariance estimation
			        if float, shrinkage covariance is used (0 <= shrinkage <= 1).
			        if str, optimal shrinkage using Ledoit-Wolf Shrinkage ('ledoit_wolf')
			        or Oracle Approximating Shrinkage ('oas').
				"""
                transformer = CSP(n_components=best_num_filters,
                                  reg='ledoit_wolf')
                transformer.fit(X_train, y_train)

                # use these CSP spatial filters to transform train and test
                spatial_filters_train = transformer.transform(X_train)
                spatial_filters_test = transformer.transform(X_test)

                # put this back in as failsafe if NaN or inf starts cropping up
                # spatial_filters_train = np.nan_to_num(spatial_filters_train)
                # check_X_y(spatial_filters_train, y_train)
                # spatial_filters_test = np.nan_to_num(spatial_filters_test)
                # check_X_y(spatial_filters_test, y_test)

                # train LDA
                classifier = LinearDiscriminantAnalysis()
                classifier.fit(spatial_filters_train, y_train)
                score = classifier.score(spatial_filters_test, y_test)

                #print "current score",score
                print "bandpass:"******"epoch window:", epoch_trim_start, epoch_trim_end
                #print best_num_filters,"filters chosen"

                # put in ranked order Top 10 list
                idx = bisect(self.ranked_scores, score)
                self.ranked_scores.insert(idx, score)
                self.ranked_scores_opts.insert(
                    idx,
                    dict(bandpass=bandpass,
                         epoch_trim=epoch_trim,
                         filters=best_num_filters))
                self.ranked_classifiers.insert(idx, classifier)
                self.ranked_transformers.insert(idx, transformer)

                if len(self.ranked_scores) > self.num_votes:
                    self.ranked_scores.pop(0)
                if len(self.ranked_scores_opts) > self.num_votes:
                    self.ranked_scores_opts.pop(0)
                if len(self.ranked_classifiers) > self.num_votes:
                    self.ranked_classifiers.pop(0)
                if len(self.ranked_transformers) > self.num_votes:
                    self.ranked_transformers.pop(0)
                """
				Covariance computation
				"""
                # compute covariance matrices
                cov_data_train = covariances(X=X_train)
                cov_data_test = covariances(X=X_test)

                clf_mdm = MDM(metric=dict(mean='riemann', distance='riemann'))
                clf_mdm.fit(cov_data_train, y_train)
                score_mdm = clf_mdm.score(cov_data_test, y_test)
                # print "MDM prediction score:",score_mdm
                # put in ranked order Top 10 list
                idx = bisect(self.ranked_scores_mdm, score_mdm)
                self.ranked_scores_mdm.insert(idx, score_mdm)
                self.ranked_scores_opts_mdm.insert(
                    idx,
                    dict(bandpass=bandpass,
                         epoch_trim=epoch_trim,
                         filters=best_num_filters))
                self.ranked_classifiers_mdm.insert(idx, clf_mdm)

                if len(self.ranked_scores_mdm) > self.num_votes:
                    self.ranked_scores_mdm.pop(0)
                if len(self.ranked_scores_opts_mdm) > self.num_votes:
                    self.ranked_scores_opts_mdm.pop(0)
                if len(self.ranked_classifiers_mdm) > self.num_votes:
                    self.ranked_classifiers_mdm.pop(0)

                clf_ts = TSclassifier()
                clf_ts.fit(cov_data_train, y_train)
                score_ts = clf_ts.score(cov_data_test, y_test)
                # put in ranked order Top 10 list
                idx = bisect(self.ranked_scores_ts, score_ts)
                self.ranked_scores_ts.insert(idx, score_ts)
                self.ranked_scores_opts_ts.insert(
                    idx,
                    dict(bandpass=bandpass,
                         epoch_trim=epoch_trim,
                         filters=best_num_filters))
                self.ranked_classifiers_ts.insert(idx, clf_ts)

                if len(self.ranked_scores_ts) > self.num_votes:
                    self.ranked_scores_ts.pop(0)
                if len(self.ranked_scores_opts_ts) > self.num_votes:
                    self.ranked_scores_opts_ts.pop(0)
                if len(self.ranked_classifiers_ts) > self.num_votes:
                    self.ranked_classifiers_ts.pop(0)

                print "CSP+LDA score:", score, "Tangent space w/LR score:", score_ts

                print "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
                print "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^"
                print "    T O P  ", self.num_votes, "  C L A S S I F I E R S"
                print
                #j=1
                for i in xrange(len(self.ranked_scores)):
                    print i, ",", round(self.ranked_scores[i], 4), ",",
                    print self.ranked_scores_opts[i]
                print "-------------------------------------"
                for i in xrange(len(self.ranked_scores_ts)):
                    print i, ",", round(self.ranked_scores_ts[i], 4), ",",
                    print self.ranked_scores_opts_ts[i]
                print "-------------------------------------"
                for i in xrange(len(self.ranked_scores_mdm)):
                    print i, ",", round(self.ranked_scores_mdm[i], 4), ",",
                    print self.ranked_scores_opts_mdm[i]

        # finish up, set the flag to indicate "fitted" state
        self.fit_ = True

        # Return the classifier
        return self
示例#2
0
kf = KFold(n_splits=n_splits)
for train_index, test_index in tqdm(kf.split(covs), total=n_splits):

    # split into training and testing datasets
    covs_train = covs[train_index]
    labs_train = labels[train_index]
    covs_test = covs[test_index]
    labs_test = labels[test_index]

    # reduce the dimensions with ['covpca', 'gpcaRiemann']
    for meth in ['covpca', 'gpcaRiemann']:
        trf = DR.RDR(n_components=pred, method=meth)
        trf.fit(covs_train)
        covs_train_red = trf.transform(covs_train)
        covs_test_red = trf.transform(covs_test)
        clf.fit(covs_train_red, labs_train)
        scores[meth].append(clf.score(covs_test_red, labs_test))

    # reduce the dimensions with [SELg, SELb]
    for meth, sel in zip(['SELg', 'SELb'], [SELg, SELb]):
        covs_train_red = covs_train[:, sel, :][:, :, sel]
        covs_test_red = covs_test[:, sel, :][:, :, sel]
        clf.fit(covs_train_red, labs_train)
        scores[meth].append(clf.score(covs_test_red, labs_test))

print('subject ', subject)
# print the scores
for meth in scores.keys():
    print(meth, np.mean(scores[meth]))
print('')
def score_pooling_rct(settings, subject_target, ntop):

    dataset = settings['dataset']
    paradigm = settings['paradigm']
    session = settings['session']
    storage = settings['storage']
    filepath = '../results/' + dataset + '/TL_intra-subject_scores.pkl'
    acc_intra_dict = joblib.load(filepath)

    scores = []
    subject_sources = []
    for subject in settings['subject_list']:
        if subject == subject_target:
            continue
        else:
            scores.append(acc_intra_dict[subject])
            subject_sources.append(subject)
    scores = np.array(scores)

    subject_sources = np.array(subject_sources)
    idx_sort = scores.argsort()[::-1]
    scores = scores[idx_sort]
    subject_sources = subject_sources[idx_sort]
    subject_sources_ntop = subject_sources[:ntop]

    # get the geometric means for each subject (each class and also the center)
    filename = '../results/' + dataset + '/subject_means.pkl'
    subj_means = joblib.load(filename)

    # get the data for the target subject
    target_org = GD.get_dataset(dataset, subject_target, session, storage)
    if paradigm == 'MI':
        # things here are only implemented for MI for now
        target_org['covs'] = Covariances(estimator='oas').fit_transform(
            target_org['signals'])
        target_org['labels'] = target_org['labels']

    ncovs = settings['ncovs_list'][0]
    score_rzt = 0.0
    nrzt = 10
    for rzt in range(nrzt):

        # split randomly the target dataset
        target_org_train, target_org_test = get_target_split_motorimagery(
            target_org, ncovs)

        # get the data from the sources and pool it all together
        class_mean_1 = []
        class_mean_2 = []
        for subj_source in subject_sources_ntop:
            MC_source = subj_means[subj_source]['center']
            M1_source = subj_means[subj_source]['left_hand']
            M2_source = subj_means[subj_source]['right_hand']
            M1_source_rct = np.dot(invsqrtm(MC_source),
                                   np.dot(M1_source, invsqrtm(MC_source)))
            class_mean_1.append(M1_source_rct)
            M2_source_rct = np.dot(invsqrtm(MC_source),
                                   np.dot(M2_source, invsqrtm(MC_source)))
            class_mean_2.append(M2_source_rct)
        class_mean_1_source = np.stack(class_mean_1)
        class_mean_2_source = np.stack(class_mean_2)
        covs_train_source = np.concatenate(
            [class_mean_1_source, class_mean_2_source])
        labs_train_source = np.concatenate([
            len(class_mean_1_source) * ['left_hand'],
            len(class_mean_2_source) * ['right_hand']
        ])

        # re-center data for the target
        covs_train_target = target_org['covs']
        MC_target = mean_riemann(covs_train_target)
        labs_train_target = target_org['labels']
        class_mean_1_target = mean_riemann(
            covs_train_target[labs_train_target == 'left_hand'])
        class_mean_1_target = np.dot(
            invsqrtm(MC_target),
            np.dot(class_mean_1_target, invsqrtm(MC_target)))
        class_mean_2_target = mean_riemann(
            covs_train_target[labs_train_target == 'right_hand'])
        class_mean_2_target = np.dot(
            invsqrtm(MC_target),
            np.dot(class_mean_2_target, invsqrtm(MC_target)))

        covs_train_target = np.stack(
            [class_mean_1_target, class_mean_2_target])
        labs_train_target = np.array(['left_hand', 'right_hand'])

        covs_train = np.concatenate([covs_train_source, covs_train_target])
        labs_train = np.concatenate([labs_train_source, labs_train_target])

        covs_test = target_org_test['covs']
        labs_test = target_org_test['labels']

        # do the classification
        clf = MDM()
        clf.fit(covs_train, labs_train)
        score_rzt = score_rzt + clf.score(covs_test, labs_test)

    score = score_rzt / nrzt

    return score