示例#1
0
def main():
    k_range = [8, 16]
    C_range = [[0.001, 5], [0.005, 10]]
    #pca = KernelPCA(n_components=50, kernel='linear')
    pca = IncrementalPCA(n_components=50, batch_size=1000)
    #lda = LinearDiscriminantAnalysis(n_components=40)
    for k in k_range:
        print("VLAD, k:%d" % (k))
        X = VLAD(k)
        print(X.shape)
        X = StandardScaler().fit_transform(X)

        col_name = ['feature' + str(i) for i in range(X.shape[1])]
        X = pd.DataFrame(data=X, columns=col_name)
        y = pd.read_csv(y_file_name, names=['label'])
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.4)

        print("PCA")
        pca.fit(X_train)
        X_train_pca = pca.transform(X_train)
        X_test_pca = pca.transform(X_test)
        for C in C_range:
            linear_score = SVMmodel.runSVM(X_train_pca, X_test_pca, y_train,
                                           y_test, C[0], 'linear')
            rbf_score = SVMmodel.runSVM(X_train_pca, X_test_pca, y_train,
                                        y_test, C[1], 'rbf')
            with open('res_VLAD_PCA.txt', "a") as f:
                f.write(
                    "VLAD with k=%d, Z-score, SVM with %s kernel, C=%f, score=%f\n"
                    % (k, 'linear', C[0], linear_score))
                f.write(
                    "VLAD with k=%d, Z-score, SVM with %s kernel, C=%f, score=%f\n"
                    % (k, 'rbf', C[1], rbf_score))
def main():
    k_range = [8, 16, 32, 64, 128, 256, 512, 1024]
    C_range = [[0.001, 5], [0.005, 10]]
    for k in k_range:
        print("BOW, k:%d" % (k))
        X = BOW(k)
        print(X.shape)

        col_name = ['feature' + str(i) for i in range(k)]
        y = pd.read_csv(y_file_name, names=['label'])

        # X_scaled = MinMaxScaler().fit_transform(X)
        X_scaled = StandardScaler().fit_transform(X)
        X_scaled = pd.DataFrame(data=X_scaled, columns=col_name)
        X_train, X_test, y_train, y_test = train_test_split(X_scaled,
                                                            y,
                                                            test_size=0.4)
        for C in C_range:
            linear_score = SVMmodel.runSVM(X_train, X_test, y_train, y_test,
                                           C[0], 'linear')
            rbf_score = SVMmodel.runSVM(X_train, X_test, y_train, y_test, C[1],
                                        'rbf')
            with open('res_BOW.txt', "a") as f:
                f.write(
                    "BOW with k=%d, scale=%s, SVM with %s kernel, C=%f, score=%f\n"
                    % (k, 'Z-score', 'linear', C[0], linear_score))
                f.write(
                    "BOW with k=%d, scale=%s, SVM with %s kernel, C=%f, score=%f\n"
                    % (k, 'Z-score', 'rbf', C[1], rbf_score))
示例#3
0
def runLDA(X_train, X_test, y_train, y_test, comp_range):
    rbf_scores = []
    linear_scores = []
    for n_comp in comp_range:
        print("\nn_comp=%d\n"%(n_comp))
        transformer = LinearDiscriminantAnalysis(solver='svd', n_components=n_comp)
        transformer.fit(X_train, y_train)
        X_train_proj = transformer.transform(X_train)
        X_test_proj = transformer.transform(X_test)
        if n_comp == 2:
            np.save('X_train_proj_2d_LDA', X_train_proj)
            np.save('X_test_proj_2d_LDA', X_test_proj)
        score_rbf = SVMmodel.runSVM(X_train_proj, X_test_proj, y_train, y_test, SVMmodel.getBestParam('rbf'), 'rbf')
        rbf_scores.append(score_rbf.mean())
        score_linear = SVMmodel.runSVM(X_train_proj, X_test_proj, y_train, y_test, SVMmodel.getBestParam('linear'), 'linear')
        linear_scores.append(score_linear.mean())
    return rbf_scores, linear_scores
def runAE(comp_range):
    rbf_scores = []
    linear_scores = []
    for n_comp in comp_range:
        print("\nn_comp=%d\n" % (n_comp))
        data_pointer = 0
        X_train_proj, X_test_proj = AutoEncoder(n_comp)
        if n_comp == 2:
            np.save('X_train_proj_2d_AE', X_train_proj)
            np.save('X_test_proj_2d_AE', X_test_proj)
        score_rbf = SVMmodel.runSVM(X_train_proj, X_test_proj, y_train, y_test,
                                    SVMmodel.getBestParam('rbf'), 'rbf')
        rbf_scores.append(score_rbf.mean())
        score_linear = SVMmodel.runSVM(X_train_proj, X_test_proj, y_train,
                                       y_test, SVMmodel.getBestParam('linear'),
                                       'linear')
        linear_scores.append(score_linear.mean())
    return rbf_scores, linear_scores
def runTreeBasedSelection(X_train, X_test, y_train, y_test, comp_range):
    rbf_scores = []
    linear_scores = []
    dimension = []

    for n_comp in comp_range:
        print("\nn_comp=%f\n"%(n_comp))

        clf = ExtraTreesClassifier(n_estimators=n_comp)
        model = SelectFromModel(estimator=clf)
        model.fit(X_train, y_train)
        X_train_sel = model.transform(X_train)
        X_test_sel = model.transform(X_test)

        dimension.append(X_train_sel.shape[1])

        score_rbf = SVMmodel.runSVM(X_train_sel, X_test_sel, y_train, y_test, SVMmodel.getBestParam('rbf'), 'rbf')
        rbf_scores.append(score_rbf.mean())
        score_linear = SVMmodel.runSVM(X_train_sel, X_test_sel, y_train, y_test, SVMmodel.getBestParam('linear'), 'linear')
        linear_scores.append(score_linear.mean())
    return rbf_scores, linear_scores, dimension
示例#6
0
def runIsomap(X_train, X_test, y_train, y_test, comp_range, n_neigh):
    rbf_scores = []
    linear_scores = []
    for n_comp in comp_range:
        print("\nn_comp=%d\n" % (n_comp))
        transformer = Isomap(n_neighbors=n_neigh,
                             n_components=n_comp,
                             n_jobs=8)
        transformer.fit(X_train)
        X_train_proj = transformer.transform(X_train)
        X_test_proj = transformer.transform(X_test)
        if n_comp == 2:
            np.save('X_train_proj_2d_Isomap_' + str(n_neigh), X_train_proj)
            np.save('X_test_proj_2d_Isomap_' + str(n_neigh), X_test_proj)
        score_rbf = SVMmodel.runSVM(X_train_proj, X_test_proj, y_train, y_test,
                                    SVMmodel.getBestParam('rbf'), 'rbf')
        rbf_scores.append(score_rbf.mean())
        score_linear = SVMmodel.runSVM(X_train_proj, X_test_proj, y_train,
                                       y_test, SVMmodel.getBestParam('linear'),
                                       'linear')
        linear_scores.append(score_linear.mean())
    for i, scores in enumerate([rbf_scores, linear_scores]):
        if i == 0:
            kernel = 'rbf'
        elif i == 1:
            kernel = 'linear'
        else:
            kernel = ''
        bestIdx = np.argmax(scores)
        bestNComp = comp_range[bestIdx]
        bestAcc = scores[bestIdx]
        with open('res_Isomap_' + kernel + '_' + str(n_neigh) + '.txt',
                  'w') as f:
            for j in range(len(comp_range)):
                f.write(kernel + ": n_comp = %f, acc = %f\n" %
                        (comp_range[j], scores[j]))
            f.write(kernel + ": Best n_comp = %f\n" % (bestNComp))
            f.write(kernel + ": acc = %f\n" % (bestAcc))
    return rbf_scores, linear_scores
def runSelectKBest(X_train, X_test, y_train, y_test, comp_range):
    rbf_scores = []
    linear_scores = []
    for n_comp in comp_range:
        print("\nn_comp=%d\n" % (n_comp))

        selector = SelectKBest(score_func=f_classif, k=n_comp)
        selector.fit(X_train, y_train)
        X_train_sel = selector.transform(X_train)
        X_test_sel = selector.transform(X_test)

        if n_comp == 2:
            np.save('X_train_proj_2d_SelectKBest', X_train_sel)
            np.save('X_test_proj_2d_SelectKBest', X_test_sel)
        score_rbf = SVMmodel.runSVM(X_train_sel, X_test_sel, y_train, y_test,
                                    SVMmodel.getBestParam('rbf'), 'rbf')
        rbf_scores.append(score_rbf.mean())
        score_linear = SVMmodel.runSVM(X_train_sel, X_test_sel, y_train,
                                       y_test, SVMmodel.getBestParam('linear'),
                                       'linear')
        linear_scores.append(score_linear.mean())
    return rbf_scores, linear_scores
示例#8
0
def runTSNE(X_train, X_test, y_train, y_test, comp_range, ppl, m):
    rbf_scores = []
    linear_scores = []
    for n_comp in comp_range:
        print("\nn_comp=%d\n" % (n_comp))
        transformer = TSNE(n_components=n_comp,
                           perplexity=50.0,
                           method=m,
                           n_jobs=8)
        transformer.fit(X_train)
        X_train_proj = transformer.fit_transform(X_train)
        X_test_proj = transformer.fit_transform(X_test)
        if n_comp == 2 and m == 'barnes_hut':
            np.save('X_train_proj_2d_TSNE_' + str(ppl), X_train_proj)
            np.save('X_test_proj_2d_TSNE_' + str(ppl), X_test_proj)
        score_rbf = SVMmodel.runSVM(X_train_proj, X_test_proj, y_train, y_test,
                                    SVMmodel.getBestParam('rbf'), 'rbf')
        rbf_scores.append(score_rbf.mean())
        score_linear = SVMmodel.runSVM(X_train_proj, X_test_proj, y_train,
                                       y_test, SVMmodel.getBestParam('linear'),
                                       'linear')
        linear_scores.append(score_linear.mean())
    return rbf_scores, linear_scores
def runVarianceThreshold(X_train, X_test, y_train, y_test, comp_range):
    rbf_scores = []
    linear_scores = []
    dimension = []

    for n_comp in comp_range:
        print("\nn_comp=%f\n" % (n_comp))

        selector = VarianceThreshold(threshold=n_comp)
        selector.fit(X_train)
        X_train_sel = selector.transform(X_train)
        X_test_sel = selector.transform(X_test)

        dimension.append(X_train_sel.shape[1])

        score_rbf = SVMmodel.runSVM(X_train_sel, X_test_sel, y_train, y_test,
                                    SVMmodel.getBestParam('rbf'), 'rbf')
        rbf_scores.append(score_rbf.mean())
        score_linear = SVMmodel.runSVM(X_train_sel, X_test_sel, y_train,
                                       y_test, SVMmodel.getBestParam('linear'),
                                       'linear')
        linear_scores.append(score_linear.mean())
    return rbf_scores, linear_scores, dimension
示例#10
0
def runPCA(X_train, X_test, y_train, y_test, comp_range, Kernel):
    C = SVMmodel.getBestParam(Kernel)
    scores = []
    for n_comp in comp_range:
        print("\nn_comp=%d\n" % (n_comp))
        transformer = KernelPCA(n_components=n_comp,
                                kernel=Kernel,
                                copy_X=True,
                                n_jobs=8)
        transformer.fit(X_train)
        X_train_proj = transformer.transform(X_train)
        X_test_proj = transformer.transform(X_test)
        if n_comp == 2:
            np.save('X_train_proj_2d_' + Kernel, X_train_proj)
            np.save('X_test_proj_2d_' + Kernel, X_test_proj)
        score = SVMmodel.runSVM(X_train_proj, X_test_proj, y_train, y_test, C,
                                Kernel)
        scores.append(score.mean())
        print(scores)
    return scores