Пример #1
0
def main():
    save_dir = "Results/ensembles/stacking/per_sample/15hz"

    all_x_train = pickle.load(
        open(
            "DataTransformed/wavelet_complex/15hz/pca_80/x_train_all_samples.pkl",
            "rb"))
    all_freq_pipelines = get_freq_pipelines(15)

    all_results = np.zeros((21, 50))

    for sample in range(21):
        print("sample {}".format(sample))

        all_y_train = get_y_train(sample + 1)
        sample_x_train = np.array(all_x_train[sample])

        time_results = np.zeros(50)

        for time in range(50):
            intervals = np.arange(start=time,
                                  stop=all_y_train.shape[0],
                                  step=50)
            y_train = all_y_train[intervals]
            x_train = sample_x_train[:, intervals]
            x_train = x_train.transpose(1, 0, 2).reshape(x_train.shape[1], -1)

            model = StackingClassifier(estimators=all_freq_pipelines,
                                       final_estimator=LogisticRegression(),
                                       cv=5,
                                       stack_method='predict_proba')
            scores = cross_val_score(model, x_train, y_train, cv=5)
            print("Time {} accuracy: %0.2f (+/- %0.2f)".format(time) %
                  (scores.mean(), scores.std() * 2))
            time_results[time] = scores.mean()

        sns.set()
        ax = sns.lineplot(data=time_results, dashes=False)
        ax.set(
            ylim=(0, 1),
            xlabel='Timepoints',
            ylabel='Accuracy',
            title='Cross Val Accuracy Stacking Ensemble for Sample {}'.format(
                sample + 1))
        plt.axvline(x=15, color='b', linestyle='--')
        plt.axhline(0.125, color='k', linestyle='--')
        ax.figure.savefig("{}/LOOCV_sample_{}.png".format(
            save_dir, sample + 1),
                          dpi=300)
        plt.clf()

        all_results[sample] = time_results

    sns.set()
    ax = sns.lineplot(data=np.mean(all_results, axis=0), dashes=False)
    ax.set(
        ylim=(0, 1),
        xlabel='Timepoints',
        ylabel='Accuracy',
        title='Average Cross Val Accuracy Stacking Ensemble for All Samples')
    plt.axvline(x=15, color='b', linestyle='--')
    plt.axhline(0.125, color='k', linestyle='--')
    ax.figure.savefig("{}/LOOCV_all_samples.png".format(save_dir), dpi=300)
    plt.clf()

    results_df = pd.DataFrame(np.mean(all_results, axis=0))
    results_df.to_csv("{}/LOOCV_all_samples.csv".format(save_dir))
Пример #2
0
def vis_embeddings(dim_red_method, epochs, sample):
    n_comp = 2

    x_train = epochs.get_data()
    x_train = x_train.transpose(0, 2, 1).reshape(-1, x_train.shape[1])
    x_train = StandardScaler().fit_transform(x_train)
    y_train = get_y_train(sample)

    inds = np.arange(15, 8000, 50)
    x_train = x_train[inds]
    y_train = y_train[inds]

    print('fitting {}'.format(dim_red_method))
    if dim_red_method == 'pca':
        pca = PCA(n_components=n_comp)
        reduced_data = pca.fit_transform(x_train)
    elif dim_red_method == 'ica':
        ica = FastICA(n_components=n_comp)
        reduced_data = ica.fit_transform(x_train)
    elif dim_red_method == 'se':
        se = SpectralEmbedding(n_components=n_comp)
        reduced_data = se.fit_transform(x_train)
    elif dim_red_method == 'tsne':
        pca = PCA(n_components=50)
        pca_data = pca.fit_transform(x_train)
        tsne = TSNE(n_components=n_comp,
                    verbose=1,
                    perplexity=10,
                    learning_rate=200)
        reduced_data = tsne.fit_transform(pca_data)
    else:
        raise ValueError("{} method not implemented".format(dim_red_method))
    print('fitting done')

    if n_comp == 2:
        reduced_data_df = pd.DataFrame(data=reduced_data,
                                       columns=['PC1', 'PC2'])
    elif n_comp == 3:
        reduced_data_df = pd.DataFrame(data=reduced_data,
                                       columns=['PC1', 'PC2', 'PC3'])
    y_train_df = pd.DataFrame(data=y_train, columns=["labels"])
    final_df = pd.concat([reduced_data_df, y_train_df[['labels']]], axis=1)

    if n_comp == 2:
        sns.set()
        palette = sns.color_palette("bright", 8)
        ax = sns.scatterplot(x='PC1',
                             y='PC2',
                             hue='labels',
                             data=final_df,
                             palette=palette,
                             legend='full')
        ax.set(xlabel='PC1',
               ylabel='PC2',
               title='2 component {}'.format(dim_red_method))
        plt.show()
    elif n_comp == 3:
        ax = plt.figure(figsize=(16, 10)).gca(projection='3d')
        ax.scatter(xs=final_df["PC1"],
                   ys=final_df["PC2"],
                   zs=final_df["PC2"],
                   c=final_df["labels"],
                   cmap='tab10')
        ax.set_xlabel('PC1')
        ax.set_ylabel('PC2')
        ax.set_zlabel('PC3')
        plt.show()
Пример #3
0
def main():
    model_type = "lda"
    exp_name = "wavelet_class/lsqr/complex"

    save_dir = "Results/{}/{}".format(model_type, exp_name)

    sample_preds = []

    for sample in range(1, 22):
        print("sample {}".format(sample))

        epochs = get_epochs(sample, scale=False)

        freqs = np.logspace(*np.log10([2, 25]), num=15)
        n_cycles = freqs / 4.

        print("applying morlet wavelet")
        # returns (n_epochs, n_channels, n_freqs, n_times)
        wavelet_output = tfr_array_morlet(epochs.get_data(),
                                          sfreq=epochs.info['sfreq'],
                                          freqs=freqs,
                                          n_cycles=n_cycles,
                                          output='complex')
        y_train = get_y_train(sample)

        freq_preds = []

        for freq in range(wavelet_output.shape[2]):
            print("frequency: {}".format(freqs[freq]))

            wavelet_epochs = wavelet_output[:, :, freq, :]
            wavelet_epochs = np.append(wavelet_epochs.real,
                                       wavelet_epochs.imag,
                                       axis=1)

            wavelet_info = mne.create_info(ch_names=wavelet_epochs.shape[1],
                                           sfreq=epochs.info['sfreq'],
                                           ch_types='mag')
            wavelet_epochs = mne.EpochsArray(wavelet_epochs,
                                             info=wavelet_info,
                                             events=epochs.events)

            reduced = pca(80, wavelet_epochs, plot=False)
            x_train = reduced.transpose(0, 2, 1).reshape(-1, reduced.shape[1])

            time_preds = []

            for time in range(50):
                print("time {}".format(time))
                intervals = np.arange(start=time,
                                      stop=x_train.shape[0],
                                      step=50)

                x_sample = x_train[intervals, :]
                y_sample = y_train[intervals]
                model = LinearDiscriminantAnalysis(solver='lsqr',
                                                   shrinkage='auto')
                cross_val_preds = cross_val_predict(model,
                                                    x_sample,
                                                    y_sample,
                                                    cv=5,
                                                    method="predict_proba")

                time_preds.append(cross_val_preds)

            freq_preds.append(time_preds)

        sample_preds.append(freq_preds)
        print('saving preds for sample {}'.format(sample))
        pickle.dump(
            freq_preds,
            open(
                "{}/sample_{}/all_freq_proba_preds.pkl".format(
                    save_dir, sample), "wb"))
        print("preds saved")

    print('saving preds for all samples')
    pickle.dump(sample_preds,
                open("{}/all_proba_preds.pkl".format(save_dir), "wb"))
    print("preds saved")
Пример #4
0
def main():
    model_type = "lda"
    exp_name = "wavelet_class/lsqr/complex/15hz"

    for sample in range(1, 22):
        print("sample {}".format(sample))

        if not os.path.isdir("Results/{}/{}/sample_{}".format(
                model_type, exp_name, sample)):
            os.mkdir("Results/{}/{}/sample_{}".format(model_type, exp_name,
                                                      sample))

        epochs = get_epochs(sample, scale=False)

        freqs = np.logspace(*np.log10([2, 15]), num=15)
        n_cycles = freqs / 4.

        print("applying morlet wavelet")

        # returns (n_epochs, n_channels, n_freqs, n_times)
        if exp_name.split("/")[-2] == "real" or exp_name.split(
                "/")[-2] == "complex":
            wavelet_output = tfr_array_morlet(epochs.get_data(),
                                              sfreq=epochs.info['sfreq'],
                                              freqs=freqs,
                                              n_cycles=n_cycles,
                                              output='complex')
        elif exp_name.split("/")[-2] == "power":
            wavelet_output = tfr_array_morlet(epochs.get_data(),
                                              sfreq=epochs.info['sfreq'],
                                              freqs=freqs,
                                              n_cycles=n_cycles,
                                              output='power')
        elif exp_name.split("/")[-2] == "phase":
            wavelet_output = tfr_array_morlet(epochs.get_data(),
                                              sfreq=epochs.info['sfreq'],
                                              freqs=freqs,
                                              n_cycles=n_cycles,
                                              output='phase')
        else:
            raise ValueError("{} not an output of wavelet function".format(
                exp_name.split("/")[-2]))

        y_train = get_y_train(sample)

        freq_results = np.zeros((wavelet_output.shape[2], 50))

        for freq in range(wavelet_output.shape[2]):
            print("frequency: {}".format(freqs[freq]))

            wavelet_epochs = wavelet_output[:, :, freq, :]

            if exp_name.split("/")[-2] == "real":
                wavelet_epochs = wavelet_epochs.real
            if exp_name.split("/")[-2] == "complex":
                wavelet_epochs = np.append(wavelet_epochs.real,
                                           wavelet_epochs.imag,
                                           axis=1)

            wavelet_info = mne.create_info(ch_names=wavelet_epochs.shape[1],
                                           sfreq=epochs.info['sfreq'],
                                           ch_types='mag')
            wavelet_epochs = mne.EpochsArray(wavelet_epochs,
                                             info=wavelet_info,
                                             events=epochs.events)

            reduced = pca(80, wavelet_epochs, plot=False)
            x_train = reduced.transpose(0, 2, 1).reshape(-1, reduced.shape[1])

            results = linear_models(x_train, y_train, model_type=model_type)
            freq_results[freq] = results

            curr_freq = str(round(freqs[freq], 2))

            sns.set()
            ax = sns.lineplot(data=results, dashes=False)
            ax.set(ylim=(0, 1),
                   xlabel='Time',
                   ylabel='Accuracy',
                   title='Cross Val Accuracy {} for Subject {} for Freq {}'.
                   format(model_type, sample, curr_freq))
            plt.axvline(x=15, color='b', linestyle='--')
            ax.figure.savefig("Results/{}/{}/sample_{}/freq_{}.png".format(
                model_type, exp_name, sample, curr_freq),
                              dpi=300)
            plt.clf()

        all_results_df = pd.DataFrame(freq_results)
        all_results_df.to_csv(
            "Results/{}/{}/sample_{}/all_freq_results.csv".format(
                model_type, exp_name, sample))
Пример #5
0
def main():
    mode = "soft"
    pca_comp = 80
    diff_freqs = True

    save_dir = "Results/ensembles/voting/diff_freqs/15hz/pca_{}/{}".format(
        pca_comp, mode)

    all_x_train = pickle.load(
        open(
            "DataTransformed/wavelet_complex/15hz/pca_{}/x_train_all_samples.pkl"
            .format(pca_comp), "rb"))
    all_freq_pipelines = get_freq_pipelines(15)

    all_results = np.zeros((21, 50))

    for sample in range(21):
        print("sample {}".format(sample))

        all_y_train = get_y_train(sample + 1)
        sample_x_train = np.array(all_x_train[sample])

        time_results = np.zeros(50)

        for time in range(50):
            if diff_freqs:
                if time <= 23:
                    freq_pipelines = all_freq_pipelines
                else:
                    freq_pipelines = all_freq_pipelines[:11]
            else:
                freq_pipelines = all_freq_pipelines

            intervals = np.arange(start=time,
                                  stop=all_y_train.shape[0],
                                  step=50)
            y_train = all_y_train[intervals]
            x_train = sample_x_train[:, intervals]
            x_train = x_train.transpose(1, 0, 2).reshape(x_train.shape[1], -1)

            model = VotingClassifier(estimators=freq_pipelines, voting=mode)
            scores = cross_val_score(model, x_train, y_train, cv=5)
            print("Time {} accuracy: %0.2f (+/- %0.2f)".format(time) %
                  (scores.mean(), scores.std() * 2))
            time_results[time] = scores.mean()

        sns.set()
        ax = sns.lineplot(data=time_results, dashes=False)
        ax.set(ylim=(0, 1),
               xlabel='Timepoints',
               ylabel='Accuracy',
               title='Cross Val Accuracy Soft Voting Ensemble for Sample {}'.
               format(sample + 1))
        plt.axvline(x=15, color='b', linestyle='--')
        if diff_freqs:
            plt.axvline(x=23, color='g', linestyle='--')
        plt.axhline(0.125, color='k', linestyle='--')
        ax.figure.savefig("{}/LOOCV_sample_{}.png".format(
            save_dir, sample + 1),
                          dpi=300)
        plt.clf()

        all_results[sample] = time_results

    sns.set()
    ax = sns.lineplot(data=np.mean(all_results, axis=0), dashes=False)
    ax.set(
        ylim=(0, 1),
        xlabel='Timepoints',
        ylabel='Accuracy',
        title='Average Cross Val Accuracy Soft Voting Ensemble for All Samples'
    )
    plt.axvline(x=15, color='b', linestyle='--')
    if diff_freqs:
        plt.axvline(x=23, color='g', linestyle='--')
    plt.axhline(0.125, color='k', linestyle='--')
    ax.figure.savefig("{}/LOOCV_all_samples.png".format(save_dir), dpi=300)
    plt.clf()

    results_df = pd.DataFrame(np.mean(all_results, axis=0))
    results_df.to_csv("{}/LOOCV_all_samples.csv".format(save_dir))
def main():
    base_model_type = "lda"
    base_model_dir = "wavelet_class/lsqr/complex"
    save_dir = "Results/ensembles/stacking_ensemble/"

    load_dir = "Results/{}/{}".format(base_model_type, base_model_dir)

    # (21, 15, 50, epochs, 8)
    all_sample_preds = np.array(
        pickle.load(open(load_dir + "/all_proba_preds.pkl", "rb")))

    # (21, all_time)
    all_y_train = []

    for sample in range(1, 22):
        all_y_train.append(get_y_train(sample))

    results = np.zeros(50)

    for time in range(50):
        print("time {}".format(time))

        # (21, 15, epochs, 8)
        sample_preds = all_sample_preds[:, :, time]

        # (21, epochs)
        sample_y_train = []

        for sample in range(21):
            intervals = np.arange(start=time,
                                  stop=all_y_train[sample].shape[0],
                                  step=50)
            sample_y_train.append(all_y_train[sample][intervals])

        sample_predictions_proba = [
            np.vstack(sample).astype(np.float) for sample in sample_preds
        ]

        sample_y_train = np.array(sample_y_train)
        sample_y_train = np.repeat(sample_y_train[:, np.newaxis], 15, axis=1)
        sample_y_train = [
            np.vstack(sample).astype(np.int) for sample in sample_y_train
        ]
        final = []
        for sample in range(21):
            final.append([
                data for freq_data in sample_y_train[sample]
                for data in freq_data
            ])
        sample_y_train = final

        all_val_acc = []

        for val_sample in range(21):
            print("left out validation subject: {}".format(val_sample + 1))

            x_train = sample_predictions_proba[:
                                               val_sample] + sample_predictions_proba[
                                                   val_sample + 1:]
            x_train = [data for freq_data in x_train for data in freq_data]
            y_train = sample_y_train[:val_sample] + sample_y_train[val_sample +
                                                                   1:]
            y_train = [data for freq_data in y_train for data in freq_data]

            x_val = sample_predictions_proba[val_sample]
            # x_val = [data for freq_data in x_val for data in freq_data]
            y_val = sample_y_train[val_sample]
            # y_val = [data for freq_data in y_val for data in freq_data]

            meta_model = LogisticRegression()
            meta_model.fit(x_train, y_train)

            acc_score = meta_model.score(x_val, y_val)
            all_val_acc.append(acc_score)

        all_val_acc = np.array(all_val_acc)
        avg_val_acc = np.mean(all_val_acc, axis=0)
        print("average cross val score: {}".format(avg_val_acc))
        results[time] = avg_val_acc

    sns.set()
    ax = sns.lineplot(data=results, dashes=False)
    ax.set(ylim=(0, 1),
           xlabel='Timepoints',
           ylabel='Accuracy',
           title='Average Cross Val Accuracy Stacking Ensemble {} Base Models'.
           format(base_model_type))
    plt.axvline(x=15, color='b', linestyle='--')
    ax.figure.savefig("{}/LOOCV.png".format(save_dir), dpi=300)
    plt.clf()

    results_df = pd.DataFrame(results)
    results_df.to_csv("{}/LOOCV.csv".format(save_dir))
def main():
    base_model_type = "lda"
    base_model_dir = "wavelet_class/lsqr/complex"
    save_dir = "Results/ensembles/stacking_ensemble/custom/per_sample"

    load_dir = "Results/{}/{}".format(base_model_type, base_model_dir)

    all_sample_preds = np.array(
        pickle.load(open(load_dir + "/all_proba_preds.pkl", "rb")))

    all_sample_results = np.zeros((21, 50))

    for sample in range(21):
        print("sample {}".format(sample))
        sample_y_train = get_y_train(sample + 1)
        freq_preds = all_sample_preds[sample]

        results = np.zeros(50)

        for time in range(50):
            intervals = np.arange(start=time,
                                  stop=sample_y_train.shape[0],
                                  step=50)
            y_train = sample_y_train[intervals]
            time_preds = freq_preds[:, time]

            y_train = np.tile(y_train, 15)
            x_train = [data for freq_data in time_preds for data in freq_data]
            x_train = np.array(x_train)

            meta_model = LogisticRegression()
            scores = cross_val_score(meta_model, x_train, y_train, cv=5)

            print("Time {} accuracy: %0.2f (+/- %0.2f)".format(time) %
                  (scores.mean(), scores.std() * 2))

            results[time] = scores.mean()

        all_sample_results[sample] = results

        sns.set()
        ax = sns.lineplot(data=results, dashes=False)
        ax.set(
            ylim=(0, 0.7),
            xlabel='Timepoints',
            ylabel='Accuracy',
            title=
            'Cross Val Accuracy Stacking Ensemble {} Base Models for Sample {}'
            .format(base_model_type, sample + 1))
        plt.axvline(x=15, color='b', linestyle='--')
        ax.figure.savefig("{}/LOOCV_sample_{}.png".format(
            save_dir, sample + 1),
                          dpi=300)
        plt.clf()

    sns.set()
    ax = sns.lineplot(data=np.mean(all_sample_results, axis=0), dashes=False)
    ax.set(
        ylim=(0, 0.6),
        xlabel='Timepoints',
        ylabel='Accuracy',
        title=
        'Average Cross Val Accuracy Stacking Ensemble {} Base Models for All Samples'
        .format(base_model_type))
    plt.axvline(x=15, color='b', linestyle='--')
    ax.figure.savefig("{}/LOOCV_all_samples.png".format(save_dir), dpi=300)
    plt.clf()

    results_df = pd.DataFrame(np.mean(all_sample_results, axis=0))
    results_df.to_csv("{}/LOOCV_all_samples.csv".format(save_dir))
Пример #8
0
def main():
    save_dir = "Results/ensembles/bagging/decision_tree"

    all_x_train = pickle.load(
        open(
            "DataTransformed/wavelet_complex/25hz/pca_80/x_train_all_samples.pkl",
            "rb"))

    all_results = np.zeros((21, 50))

    for sample in range(21):
        print("sample {}".format(sample))

        all_y_train = get_y_train(sample + 1)
        sample_x_train = np.array(all_x_train[sample])

        time_results = np.zeros(50)

        for time in range(50):
            intervals = np.arange(start=time,
                                  stop=all_y_train.shape[0],
                                  step=50)
            y_train = all_y_train[intervals]
            x_train = sample_x_train[:, intervals]
            x_train = x_train.transpose(1, 0, 2).reshape(x_train.shape[1], -1)

            model = BaggingClassifier(
                base_estimator=LinearDiscriminantAnalysis(solver='lsqr',
                                                          shrinkage='auto'),
                n_estimators=15,
                max_samples=0.8,
                max_features=0.5,
                random_state=0)
            scores = cross_val_score(model, x_train, y_train, cv=5)
            print("Time {} accuracy: %0.2f (+/- %0.2f)".format(time) %
                  (scores.mean(), scores.std() * 2))
            time_results[time] = scores.mean()

        sns.set()
        ax = sns.lineplot(data=time_results, dashes=False)
        ax.set(
            ylim=(0, 1),
            xlabel='Timepoints',
            ylabel='Accuracy',
            title='Cross Val Accuracy Bagging Ensemble for Sample {}'.format(
                sample + 1))
        plt.axvline(x=15, color='b', linestyle='--')
        plt.axhline(0.125, color='k', linestyle='--')
        ax.figure.savefig("{}/LOOCV_sample_{}.png".format(
            save_dir, sample + 1),
                          dpi=300)
        plt.clf()

        all_results[sample] = time_results

    sns.set()
    ax = sns.lineplot(data=np.mean(all_results, axis=0), dashes=False)
    ax.set(ylim=(0, 1),
           xlabel='Timepoints',
           ylabel='Accuracy',
           title='Average Cross Val Accuracy Bagging Ensemble for All Samples')
    plt.axvline(x=15, color='b', linestyle='--')
    plt.axhline(0.125, color='k', linestyle='--')
    ax.figure.savefig("{}/LOOCV_all_samples.png".format(save_dir), dpi=300)
    plt.clf()

    results_df = pd.DataFrame(np.mean(all_results, axis=0))
    results_df.to_csv("{}/LOOCV_all_samples.csv".format(save_dir))