def load_auto_data():
    lags = 10
    length = 2 * lags
    fn = '../data/autocorr_len20_lag10.npy'
    X = np.load(fn)
    labels = ca_data_utils.load_labels()[9:39992:2]
    labels = labels[length - 1:]
    print(X.shape)
    print(labels.shape)
    max_lens = np.linspace(3, 8, 6).astype(int)
    learning_rates = np.logspace(-2, 2, 5)
    boosters = ['gbtree', 'gblinear', 'dart']
    gammas = np.linspace(0.0, 0.4, 5)
    reg_alphas = np.logspace(-1, 2, 4)
    param_dict = dict(max_depth=max_lens,
                      learning_rate=learning_rates,
                      booster=boosters,
                      gamma=gammas,
                      reg_alpha=reg_alphas)
    grid = GridSearchCV(xgb.XGBClassifier(),
                        param_grid=param_dict,
                        n_jobs=20,
                        verbose=2)

    print('start to train...')
    grid.fit(X, labels)
    print('finished')
    df = pd.DataFrame.from_dict(grid.cv_results_)
    filename = '../data/clf_results/autocorr_xgbooster'
    df.to_csv(filename)
def select_models():
    vid = ca_data_utils.load_v_matrix().T[8:39992]
    labels = ca_data_utils.load_labels()[8:39992]
    X_train, X_test, y_train, y_test = train_test_split(
        vid, labels, test_size=0.25)  # random state?
    classifiers = [
        SVC(kernel="linear", C=0.025),
        SVC(gamma=2, C=1),
        RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
        AdaBoostClassifier()
    ]
    names = ['linear SVM', 'RBF SVM', 'Random Forest', 'AdaBoost']
    train = []
    test = []
    for name, clf in zip(names, classifiers):
        print('starting to train with ', name)
        clf.fit(X_train, y_train)
        print('---> calculating training set accuracy:')
        train_accu = clf.score(X_train, y_train)
        train.append(train_accu)
        print('---> training set accuracy: ', train_accu)
        print('---> calculating testing set accuracy:')
        test_accu = clf.score(X_test, y_test)
        test.append(test_accu)
        print('---> testing set accuracy: ', test_accu)
    np.save('../data/clf_results/clf_names', names)
    np.save('../data/clf_results/train_accuracy', train)
    np.save('../data/clf_results/test_accuracy', test)
def xgboost(gamma):
    X = ca_data_utils.load_v_matrix().T[8:39992:5]
    labels = ca_data_utils.load_labels()[8:39992:5]
    clf = xgb.XGBClassifier(gamma=gamma, learning_rate=0.1)
    total_len = len(labels)
    cut = int(3 * total_len / 4)
    clf.fit(X[:cut], labels[:cut])
    score = clf.score(X[cut:], labels[cut:])
    print(score)
def load_data():
    # X = ca_data_utils.load_v_matrix().T[9:39992]
    X = np.load('../data/autocorr.npy')
    labels = ca_data_utils.load_labels()[9:39992:2]
    # labels[labels==3] = 2
    # labels = [labels[k+1] - labels[k] for k in range(len(labels)-1)]
    # labels = np.array(labels)
    # X = [X[k+1] - X[k] for k in range(len(X) - 1)]
    # labels[labels!=0] = 1
    return X, labels
def kernel_pca():
    X = ca_data_utils.load_v_matrix().T[8:39992]
    labels = ca_data_utils.load_labels()[8:39992]
    kpca = KernelPCA(kernel="rbf",
                     fit_inverse_transform=True,
                     gamma=10,
                     n_jobs=20)
    print('starting kpca')
    X_kpca = kpca.fit_transform(X)
    print('finished')
    # pca = PCA()
    # X_pca = pca.fit_transform(X)

    sleep = labels == 1
    wake1 = labels == 2
    wake2 = labels == 3

    plt.figure()
    plt.subplot(1, 2, 1, aspect='equal')
    plt.title("Original space")
    plt.scatter(X[sleep, 0], X[sleep, 1], c="red", s=20, edgecolor='k')
    plt.scatter(X[wake1, 0], X[wake1, 1], c="blue", s=20, edgecolor='k')
    plt.scatter(X[wake2, 0], X[wake2, 1], c='green', s=20, edgecolors='k')
    plt.xlabel("$x_1$")
    plt.ylabel("$x_2$")

    plt.subplot(1, 2, 2, aspect='equal')
    plt.scatter(X_kpca[sleep, 0],
                X_kpca[sleep, 1],
                c="red",
                s=20,
                edgecolor='k')
    plt.scatter(X_kpca[wake1, 0],
                X_kpca[wake1, 1],
                c="blue",
                s=20,
                edgecolor='k')
    plt.scatter(X_kpca[wake2, 0],
                X_kpca[wake2, 1],
                c='green',
                s=20,
                edgecolor='k')
    plt.title("Projection by KPCA")
    plt.xlabel(r"1st principal component in space induced by $\phi$")
    plt.ylabel("2nd component")
    plt.tight_layout()
    plt.savefig('../data/clf_results/kpca')
def tune_rbf_svm():
    vid = ca_data_utils.load_v_matrix()[:, 9:39992]
    labels = ca_data_utils.load_labels()[9:39992]

    scaler = StandardScaler()
    X = scaler.fit_transform(vid.T)

    C_range = np.logspace(-1, 2, 4)
    print('C range: ', C_range)
    gamma_range = np.logspace(-3, 3, 7) * 1. / X.shape[1]  #(-3,3,7)
    print('gamma range: ', gamma_range)
    param_grid = dict(gamma=gamma_range, C=C_range)

    cv = StratifiedShuffleSplit(test_size=0.25, random_state=42)
    grid = GridSearchCV(SVC(), param_grid=param_grid, cv=cv, n_jobs=20)
    print('start to train...')
    grid.fit(X, labels)

    print("The best parameters are %s with a score of %0.2f" %
          (grid.best_params_, grid.best_score_))

    print('The results are:')
    print(grid.cv_results_)
示例#7
0
def make_video():
    vid = skimage.io.imread('../data/vid.tif')[9:39992]
    labels = ca_data_utils.load_labels()[9:39992]
    preds = np.load('../data/clf_results/y_pred.npy')
    i = 0
    for frame, label, pred in zip(vid, labels, preds):
        print('startint ', i)
        print(frame.shape)
        f, (ax1, ax2) = plt.subplots(1,
                                     2,
                                     gridspec_kw={'width_ratios': [5, 1]})
        ax1.imshow(frame)
        color = 'g'
        if (label != pred):
            color = 'r'
        circle = Ellipse((0.5, 0.1), 0.5, 0.1, color=color)
        color = 'r'  # sleep
        if label == 2:  # wake1
            color = 'g'
        elif label == 3:  # wake2
            color = 'b'
        actual = Ellipse((0.5, 0.9), 0.5, 0.1, color=color)
        color = 'r'  # sleep
        if pred == 2:  # wake1
            color = 'g'
        elif pred == 3:  # wake2
            color = 'b'
        got = Ellipse((0.5, 0.7), 0.5, 0.1, color=color)
        ax2.add_artist(circle)
        ax2.add_artist(actual)
        ax2.add_artist(got)
        fname = '../data/clf_results/video/image_{0:05d}'.format(i)
        i += 1
        print(fname)
        ax2.get_xaxis().set_visible(False)
        ax2.get_yaxis().set_visible(False)
        f.savefig(fname)
def load_data(step):
    v = ca_data_utils.load_v_matrix().T[9:39992:step]
    labels = ca_data_utils.load_labels()[9:39992:step]
    return v, labels
import scipy.io
import numpy as np
import skimage.io

import ca_data_utils


def trunk_data_by_states(vid, labels):
    state_1 = np.where(labels == 1)
    state_2 = np.where(labels == 2)
    state_3 = np.where(labels == 3)
    print(state_1[0].shape)
    print(state_2[0].shape)
    print(state_3[0].shape)
    sleep = vid[:, state_1[0]]
    wake_1 = vid[:, state_2[0]]
    wake_2 = vid[:, state_3[0]]
    print(sleep.shape)
    np.save('../data/byState/sleep', sleep)
    np.save('../data/byState/wake_1', wake_1)
    np.save('../data/byState/wake_2', wake_2)


if __name__ == '__main__':
    labels = ca_data_utils.load_labels()
    vid = ca_data_utils.load_vid()
    print(vid.shape)
    trunk_data_by_states(vid, labels)
示例#10
0
    cv = StratifiedShuffleSplit(n_splits=5, test_size=0.2, random_state=42)
    grid = GridSearchCV(SVC(), param_grid=param_grid, cv=cv)
    print('start to train...')
    grid.fit(X, labels)

    print("The best parameters are %s with a score of %0.2f" %
          (grid.best_params_, grid.best_score_))

    print('The results are:')
    print(grid.cv_results_)


if __name__ == '__main__':
    vid = ca_data_utils.load_v_matrix().T[9:39992]
    labels = ca_data_utils.load_labels()[9:39992]

    for k in range(5, 11):
        select_models(vid, labels, k)
        # select_k(vid, labels, k)

    # clf = SVC(gamma=0.001, C=10)

    # scaler = StandardScaler()
    # X = scaler.fit_transform(vid)
    # X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.25)
    # print('start to train')
    # clf.fit(X_train, y_train)
    # print('finally finished tada~~')
    # y_pred = clf.predict(X)
    # print('saving y_pred for whole video, the accuracy for test data is ', clf.score(X_test, y_test))