def run_gamma(x, y): perc = 0.6 n = x.shape[0] gamma_list = (np.power(2.0, range(-4, 12))/(n*perc)).tolist() n_iter = 2 train_err_libsvm = np.zeros((len(gamma_list), n_iter)) test_err_libsvm = np.zeros((len(gamma_list), n_iter)) train_err_dsvm = np.zeros((len(gamma_list), n_iter)) test_err_dsvm = np.zeros((len(gamma_list), n_iter)) train_err_pegasos = np.zeros((len(gamma_list), n_iter)) test_err_pegasos = np.zeros((len(gamma_list), n_iter)) ss = cv.StratifiedShuffleSplit(y, n_iter=n_iter, test_size=1-perc, train_size=None, random_state=0) for k, (train, test) in enumerate(ss): ntr = len(train) lmda = 1.0 / ntr print "#iter: %d" % k x_train, x_test, y_train, y_test = x[train], x[test], y[train], y[test] mM_scale = preprocessing.MinMaxScaler(feature_range=(-1, 1)) x_train = mM_scale.fit_transform(x_train) x_test = mM_scale.transform(x_test) for j, gm in enumerate(gamma_list): print "check lamda %f, gamma %f" % (lmda, gm) clf = svm.SVC(C=lmda * ntr, kernel='rbf', gamma=gm, cache_size=600) clf.fit(x_train, y_train) pred = clf.predict(x_train) train_err_libsvm[j, k] = zero_one_loss(y_train, pred) pred = clf.predict(x_test) test_err_libsvm[j, k] = zero_one_loss(y_test, pred) dsvm = DualKSVM(lmda=lmda, gm=gm, kernelstr='rbf', nsweep=ntr/2, b=5, c=1) dsvm.fit(x_train, y_train, x_test, y_test, ) train_err_dsvm[j, k] = dsvm.err_tr[-1] test_err_dsvm[j, k] = dsvm.err_te[-1] kpega = Pegasos(ntr, lmda, gm, nsweep=2, batchsize=2) kpega.train_test(x_train, y_train, x_test, y_test) train_err_pegasos[j, k] = kpega.err_tr[-1] test_err_pegasos[j, k] = kpega.err_te[-1] avg_train_err_libsvm = np.mean(train_err_libsvm, axis=1) avg_test_err_libsvm = np.mean(test_err_libsvm, axis=1) avg_train_err_dsvm = np.mean(train_err_dsvm, axis=1) avg_test_err_dsvm = np.mean(test_err_dsvm, axis=1) avg_train_err_pegasos = np.mean(train_err_pegasos, axis=1) avg_test_err_pegasos = np.mean(test_err_pegasos, axis=1) plt.figure() # color_list = ['b', 'r', 'g', 'c', ] # marker_list = ['o', 'x', '>', 's'] plt.loglog(gamma_list, avg_train_err_libsvm, 'bo-', label='libsvm train') plt.loglog(gamma_list, avg_test_err_libsvm, 'ro-', label='libsvm test') plt.loglog(gamma_list, avg_train_err_dsvm, 'gx-', label='dsvm train') plt.loglog(gamma_list, avg_test_err_dsvm, 'cx-', label='dsvm test') plt.loglog(gamma_list, avg_train_err_pegasos, 'mD-', label='pegasos train') plt.loglog(gamma_list, avg_test_err_pegasos, 'kD-', label='pegasos test') plt.legend(bbox_to_anchor=(0, 1.17, 1, .1), loc=2, ncol=2, mode="expand", borderaxespad=0) plt.savefig('../output/usps_diff_gamma.pdf')
clf_md = {} for i, rho in enumerate(rholist): clf_md[i] = DualKSVM(lmda=C / n, kernelstr='rbf', gm=gamma, nsweep=n, b=b, c=c, verbose=True, rho=rho, algo_type='sbmd') clf_md[i].fit(x[train_index, :], y[train_index], x[test_index, :], y[test_index]) trainerr_md[i].append(clf_md[i].err_tr) testerr_md[i].append(clf_md[i].err_te) obj_md[i].append(clf_md[i].obj) # trainerr_md2[i].append(clf_md[i].err_tr2) # testerr_md2[i].append(clf_md[i].err_te2) obj_md2[i].append(clf_md[i].obj2) print "dualsvm time %f " % (time.time() - start_t) start_t = time.time() clf_cd = DualKSVM(lmda=C / n, kernelstr='rbf', gm=gamma, nsweep=int(4 * n), algo_type='cd') clf_cd.fit(x[train_index, :], y[train_index], x[test_index, :], y[test_index]) trainerr_cd.append(clf_cd.err_tr) testerr_cd.append(clf_cd.err_te) obj_cd.append(clf_cd.obj) print "cd svm time %f" % (time.time() - start_t) start_t = time.time() clf_pega = Pegasos(lmda=C / n, gm=gamma, kernelstr='rbf', nsweep=5, batchsize=1) clf_pega.fit(x[train_index, :], y[train_index], x[test_index, :], y[test_index]) trainerr_pega.append(clf_pega.err_tr) testerr_pega.append(clf_pega.err_te) obj_pega.append(clf_pega.obj) trainerr_pega.append(clf_pega.err_tr) testerr_pega.append(clf_pega.err_te) obj_pega.append(clf_pega.obj) print "pegasos time %f" % (time.time() - start_t)
trainerr_cd = {} testerr_cd = {} obj_cd = {} trainerr_pega = {} testerr_pega = {} obj_pega = {} b = 4 c = 1 rs = cv.ShuffleSplit(x.shape[0], n_iter=n_iter, train_size=0.3, test_size=0.5, random_state=None) for k, (train_index, test_index) in enumerate(rs): print "iter # %d" % k n = train_index.size start_t = time.time() clf = DualKSVM(lmda=C/n, kernelstr='rbf', gm=gamma, nsweep=n, b=b, c=c, verbose=True, rho=rho1, algo_type='scg_da') clf.fit(x[train_index, :], y[train_index], x[test_index, :], y[test_index]) if C in trainerr_dasvm: trainerr_dasvm[C] = np.vstack((trainerr_dasvm[C], clf.err_tr)) testerr_dasvm[C] = np.vstack((testerr_dasvm[C], clf.err_te)) obj_dasvm[C] = np.vstack((obj_dasvm[C], clf.obj)) else: trainerr_dasvm[C] = clf.err_tr testerr_dasvm[C] = clf.err_te obj_dasvm[C] = clf.obj print "dualsvm time %f " % (time.time()-start_t) start_t = time.time() clf2 = DualKSVM(lmda=C/n, kernelstr='rbf', gm=gamma, nsweep=int(4*n), algo_type='cd') clf2.fit(x[train_index, :], y[train_index], x[test_index, :], y[test_index]) if C in trainerr_cd: trainerr_cd[C] = np.vstack((trainerr_cd[C], clf2.err_tr))
random_state = np.random.random_integers(low=0, high=1000) x_train, x_test, y_train, y_test = cv.train_test_split(x, y, test_size=perc, random_state=random_state) # scalar = preprocessing.StandardScaler().fit(x_train) # x_test = scalar.transform(x_test) # x_train = scalar.transform(x_train) mm_scale = preprocessing.MinMaxScaler(feature_range=(-1, 1)) x_train = mm_scale.fit_transform(x_train) x_test = mm_scale.transform(x_test) ntr = x_train.shape[0] # gm = 1.0/1 gm = 1.0/ntr # lmda = 1/float(ntr) lmda = 1000/float(ntr) print('train dual svm') dsvm = DualKSVM(lmda=lmda, gm=gm, kernelstr='rbf', nsweep=0.8 * ntr, b=5, c=1) dsvm.fit(x_train, y_train, x_test, y_test, ) print ('train Pegasos') kpega = Pegasos(lmda=lmda, gm=gm, kernelstr='rbf', nsweep=3) kpega.fit(x_train, y_train, x_test, y_test) clf = svm.SVC(C=lmda*ntr, kernel='rbf', gamma=gm, verbose=True,) # clf_da = svm.SVC(C=lmda*ntr, kernel='rbf', gamma=gm, fit_intercept=False) clf.fit(x_train, y_train) pred = clf.predict(x_test) err_libsvm = zero_one_loss(pred, y_test) print "sklearn err %f" % err_libsvm plt.figure() plt.plot(dsvm.nker_opers, dsvm.err_tr, 'rx-', label='dc train error') plt.plot(kpega.nker_opers, kpega.err_tr, 'b.-', label='pegasos train error')