def main(): k_range = [8, 16] C_range = [[0.001, 5], [0.005, 10]] #pca = KernelPCA(n_components=50, kernel='linear') pca = IncrementalPCA(n_components=50, batch_size=1000) #lda = LinearDiscriminantAnalysis(n_components=40) for k in k_range: print("VLAD, k:%d" % (k)) X = VLAD(k) print(X.shape) X = StandardScaler().fit_transform(X) col_name = ['feature' + str(i) for i in range(X.shape[1])] X = pd.DataFrame(data=X, columns=col_name) y = pd.read_csv(y_file_name, names=['label']) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) print("PCA") pca.fit(X_train) X_train_pca = pca.transform(X_train) X_test_pca = pca.transform(X_test) for C in C_range: linear_score = SVMmodel.runSVM(X_train_pca, X_test_pca, y_train, y_test, C[0], 'linear') rbf_score = SVMmodel.runSVM(X_train_pca, X_test_pca, y_train, y_test, C[1], 'rbf') with open('res_VLAD_PCA.txt', "a") as f: f.write( "VLAD with k=%d, Z-score, SVM with %s kernel, C=%f, score=%f\n" % (k, 'linear', C[0], linear_score)) f.write( "VLAD with k=%d, Z-score, SVM with %s kernel, C=%f, score=%f\n" % (k, 'rbf', C[1], rbf_score))
def main(): k_range = [8, 16, 32, 64, 128, 256, 512, 1024] C_range = [[0.001, 5], [0.005, 10]] for k in k_range: print("BOW, k:%d" % (k)) X = BOW(k) print(X.shape) col_name = ['feature' + str(i) for i in range(k)] y = pd.read_csv(y_file_name, names=['label']) # X_scaled = MinMaxScaler().fit_transform(X) X_scaled = StandardScaler().fit_transform(X) X_scaled = pd.DataFrame(data=X_scaled, columns=col_name) X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.4) for C in C_range: linear_score = SVMmodel.runSVM(X_train, X_test, y_train, y_test, C[0], 'linear') rbf_score = SVMmodel.runSVM(X_train, X_test, y_train, y_test, C[1], 'rbf') with open('res_BOW.txt', "a") as f: f.write( "BOW with k=%d, scale=%s, SVM with %s kernel, C=%f, score=%f\n" % (k, 'Z-score', 'linear', C[0], linear_score)) f.write( "BOW with k=%d, scale=%s, SVM with %s kernel, C=%f, score=%f\n" % (k, 'Z-score', 'rbf', C[1], rbf_score))
def runLDA(X_train, X_test, y_train, y_test, comp_range): rbf_scores = [] linear_scores = [] for n_comp in comp_range: print("\nn_comp=%d\n"%(n_comp)) transformer = LinearDiscriminantAnalysis(solver='svd', n_components=n_comp) transformer.fit(X_train, y_train) X_train_proj = transformer.transform(X_train) X_test_proj = transformer.transform(X_test) if n_comp == 2: np.save('X_train_proj_2d_LDA', X_train_proj) np.save('X_test_proj_2d_LDA', X_test_proj) score_rbf = SVMmodel.runSVM(X_train_proj, X_test_proj, y_train, y_test, SVMmodel.getBestParam('rbf'), 'rbf') rbf_scores.append(score_rbf.mean()) score_linear = SVMmodel.runSVM(X_train_proj, X_test_proj, y_train, y_test, SVMmodel.getBestParam('linear'), 'linear') linear_scores.append(score_linear.mean()) return rbf_scores, linear_scores
def runAE(comp_range): rbf_scores = [] linear_scores = [] for n_comp in comp_range: print("\nn_comp=%d\n" % (n_comp)) data_pointer = 0 X_train_proj, X_test_proj = AutoEncoder(n_comp) if n_comp == 2: np.save('X_train_proj_2d_AE', X_train_proj) np.save('X_test_proj_2d_AE', X_test_proj) score_rbf = SVMmodel.runSVM(X_train_proj, X_test_proj, y_train, y_test, SVMmodel.getBestParam('rbf'), 'rbf') rbf_scores.append(score_rbf.mean()) score_linear = SVMmodel.runSVM(X_train_proj, X_test_proj, y_train, y_test, SVMmodel.getBestParam('linear'), 'linear') linear_scores.append(score_linear.mean()) return rbf_scores, linear_scores
def runTreeBasedSelection(X_train, X_test, y_train, y_test, comp_range): rbf_scores = [] linear_scores = [] dimension = [] for n_comp in comp_range: print("\nn_comp=%f\n"%(n_comp)) clf = ExtraTreesClassifier(n_estimators=n_comp) model = SelectFromModel(estimator=clf) model.fit(X_train, y_train) X_train_sel = model.transform(X_train) X_test_sel = model.transform(X_test) dimension.append(X_train_sel.shape[1]) score_rbf = SVMmodel.runSVM(X_train_sel, X_test_sel, y_train, y_test, SVMmodel.getBestParam('rbf'), 'rbf') rbf_scores.append(score_rbf.mean()) score_linear = SVMmodel.runSVM(X_train_sel, X_test_sel, y_train, y_test, SVMmodel.getBestParam('linear'), 'linear') linear_scores.append(score_linear.mean()) return rbf_scores, linear_scores, dimension
def runIsomap(X_train, X_test, y_train, y_test, comp_range, n_neigh): rbf_scores = [] linear_scores = [] for n_comp in comp_range: print("\nn_comp=%d\n" % (n_comp)) transformer = Isomap(n_neighbors=n_neigh, n_components=n_comp, n_jobs=8) transformer.fit(X_train) X_train_proj = transformer.transform(X_train) X_test_proj = transformer.transform(X_test) if n_comp == 2: np.save('X_train_proj_2d_Isomap_' + str(n_neigh), X_train_proj) np.save('X_test_proj_2d_Isomap_' + str(n_neigh), X_test_proj) score_rbf = SVMmodel.runSVM(X_train_proj, X_test_proj, y_train, y_test, SVMmodel.getBestParam('rbf'), 'rbf') rbf_scores.append(score_rbf.mean()) score_linear = SVMmodel.runSVM(X_train_proj, X_test_proj, y_train, y_test, SVMmodel.getBestParam('linear'), 'linear') linear_scores.append(score_linear.mean()) for i, scores in enumerate([rbf_scores, linear_scores]): if i == 0: kernel = 'rbf' elif i == 1: kernel = 'linear' else: kernel = '' bestIdx = np.argmax(scores) bestNComp = comp_range[bestIdx] bestAcc = scores[bestIdx] with open('res_Isomap_' + kernel + '_' + str(n_neigh) + '.txt', 'w') as f: for j in range(len(comp_range)): f.write(kernel + ": n_comp = %f, acc = %f\n" % (comp_range[j], scores[j])) f.write(kernel + ": Best n_comp = %f\n" % (bestNComp)) f.write(kernel + ": acc = %f\n" % (bestAcc)) return rbf_scores, linear_scores
def runSelectKBest(X_train, X_test, y_train, y_test, comp_range): rbf_scores = [] linear_scores = [] for n_comp in comp_range: print("\nn_comp=%d\n" % (n_comp)) selector = SelectKBest(score_func=f_classif, k=n_comp) selector.fit(X_train, y_train) X_train_sel = selector.transform(X_train) X_test_sel = selector.transform(X_test) if n_comp == 2: np.save('X_train_proj_2d_SelectKBest', X_train_sel) np.save('X_test_proj_2d_SelectKBest', X_test_sel) score_rbf = SVMmodel.runSVM(X_train_sel, X_test_sel, y_train, y_test, SVMmodel.getBestParam('rbf'), 'rbf') rbf_scores.append(score_rbf.mean()) score_linear = SVMmodel.runSVM(X_train_sel, X_test_sel, y_train, y_test, SVMmodel.getBestParam('linear'), 'linear') linear_scores.append(score_linear.mean()) return rbf_scores, linear_scores
def runTSNE(X_train, X_test, y_train, y_test, comp_range, ppl, m): rbf_scores = [] linear_scores = [] for n_comp in comp_range: print("\nn_comp=%d\n" % (n_comp)) transformer = TSNE(n_components=n_comp, perplexity=50.0, method=m, n_jobs=8) transformer.fit(X_train) X_train_proj = transformer.fit_transform(X_train) X_test_proj = transformer.fit_transform(X_test) if n_comp == 2 and m == 'barnes_hut': np.save('X_train_proj_2d_TSNE_' + str(ppl), X_train_proj) np.save('X_test_proj_2d_TSNE_' + str(ppl), X_test_proj) score_rbf = SVMmodel.runSVM(X_train_proj, X_test_proj, y_train, y_test, SVMmodel.getBestParam('rbf'), 'rbf') rbf_scores.append(score_rbf.mean()) score_linear = SVMmodel.runSVM(X_train_proj, X_test_proj, y_train, y_test, SVMmodel.getBestParam('linear'), 'linear') linear_scores.append(score_linear.mean()) return rbf_scores, linear_scores
def runVarianceThreshold(X_train, X_test, y_train, y_test, comp_range): rbf_scores = [] linear_scores = [] dimension = [] for n_comp in comp_range: print("\nn_comp=%f\n" % (n_comp)) selector = VarianceThreshold(threshold=n_comp) selector.fit(X_train) X_train_sel = selector.transform(X_train) X_test_sel = selector.transform(X_test) dimension.append(X_train_sel.shape[1]) score_rbf = SVMmodel.runSVM(X_train_sel, X_test_sel, y_train, y_test, SVMmodel.getBestParam('rbf'), 'rbf') rbf_scores.append(score_rbf.mean()) score_linear = SVMmodel.runSVM(X_train_sel, X_test_sel, y_train, y_test, SVMmodel.getBestParam('linear'), 'linear') linear_scores.append(score_linear.mean()) return rbf_scores, linear_scores, dimension
def runPCA(X_train, X_test, y_train, y_test, comp_range, Kernel): C = SVMmodel.getBestParam(Kernel) scores = [] for n_comp in comp_range: print("\nn_comp=%d\n" % (n_comp)) transformer = KernelPCA(n_components=n_comp, kernel=Kernel, copy_X=True, n_jobs=8) transformer.fit(X_train) X_train_proj = transformer.transform(X_train) X_test_proj = transformer.transform(X_test) if n_comp == 2: np.save('X_train_proj_2d_' + Kernel, X_train_proj) np.save('X_test_proj_2d_' + Kernel, X_test_proj) score = SVMmodel.runSVM(X_train_proj, X_test_proj, y_train, y_test, C, Kernel) scores.append(score.mean()) print(scores) return scores