def main(args): # load the dataset fpath = "../../../Experiment/Dataset/dat/{0}.dat".format(args.dname) X, y = load_dat(fpath, minmax=(0, 1), normalize=False, bias_term=True) N = X.shape[0] eps_total = args.eps delta = args.delta obj_clip = args.obj_clip grad_clip = args.grad_clip # for svm only y[y < 1] = -1 rho = dp_to_zcdp(eps_total, delta) print "rho = {:.5f}".format(rho) start_time = time.clock() w = agd_rho(X, y, rho, eps_total, delta, svm_grad, svm_loss, svm_test, obj_clip, grad_clip, reg_coeff=0.01, exp_dec=args.exp_dec, verbose=True) print "time = ", time.clock() - start_time loss = svm_loss(w, X, y) / N acc = svm_test(w, X, y) print "loss: {:.5f}\t acc: {:5.2f}".format(loss, acc * 100)
def main(args): fpath = "./dataset/{0}.dat".format(args.dname) X, y = load_dat(fpath, minmax=(0, 1), bias_term=True) N, dim = X.shape y[y < 1] = -1 nrep = args.rep T = [1, 100, 1000, 10000, 20000] epsilon = [] nT = len(T) learning_rate = 0.05 sigma = 4 # fixed sigma as in MA paper K = 5 # 5-folds cross-validation cv_rep = 2 k = 0 acc = np.zeros((nT, nrep, K*cv_rep)) obj = np.zeros((nT, nrep, K*cv_rep)) rkf = RepeatedKFold(n_splits=K, n_repeats=cv_rep) for train, test in rkf.split(X): train_X, train_y = X[train, :], y[train] test_X, test_y = X[test, :], y[test] n_train = train_X.shape[0] batch_size = int(np.sqrt(n_train) + 10) if args.batch_size > 0: batch_size = args.batch_size for i in range(nT): for j in range(nrep): sol, eps = dpsgd_ma(train_X, train_y, svm_grad, sigma, T[i], learning_rate, batch_size, reg_coeff=args.reg_coeff, delta=args.delta) obj[i, j, k] = svm_loss(sol, train_X, train_y) / n_train acc[i, j, k] = svm_test(sol, test_X, test_y) * 100.0 # print "acc[{},{},{}]={}".format(i, j, k, acc[i, j, k]) if j == 0 and k == 0: epsilon.append(eps) k += 1 avg_acc = np.vstack([np.array(epsilon), np.mean(acc, axis=(1, 2)), np.std(acc, axis=(1, 2))]) avg_obj = np.vstack([np.array(epsilon), np.mean(obj, axis=(1, 2)), np.std(obj, axis=(1, 2))]) filename = "sgdma_svm_{0}".format(args.dname) np.savetxt("{0}_acc.out".format(filename), avg_acc, fmt='%.5f') np.savetxt("{0}_obj.out".format(filename), avg_obj, fmt='%.5f')
def main(args): fpath = "./dataset/{0}.dat".format(args.dname) X, y = load_dat(fpath, minmax=(0, 1), bias_term=True) y[y < 1] = -1 N, dim = X.shape nrep = args.rep epsilon = [0.05, 0.1, 0.2, 0.4, 0.8, 1.6] neps = len(epsilon) learning_rate = 0.1 K = 5 # 5-folds cross-validation cv_rep = 3 k = 0 acc = np.zeros((neps, nrep, K * cv_rep)) obj = np.zeros((neps, nrep, K * cv_rep)) rkf = RepeatedKFold(n_splits=K, n_repeats=cv_rep) for train, test in rkf.split(X): train_X, train_y = X[train, :], y[train] test_X, test_y = X[test, :], y[test] n_train = train_X.shape[0] batch_size = int(np.sqrt(n_train) + 10) if args.batch_size > 0: batch_size = args.batch_size for i, eps in enumerate(epsilon): # number of iterations T = max(int(round((n_train * eps) / 500.0)), 1) if args.T > 0: T = int(args.T * eps) for j in range(nrep): sol = dpsgd_adv(train_X, train_y, svm_grad, eps, T, learning_rate, batch_size, reg_coeff=args.reg_coeff) obj[i, j, k] = svm_loss(sol, train_X, train_y) / n_train acc[i, j, k] = svm_test(sol, test_X, test_y) * 100.0 k += 1 avg_acc = np.vstack([np.mean(acc, axis=(1, 2)), np.std(acc, axis=(1, 2))]) avg_obj = np.vstack([np.mean(obj, axis=(1, 2)), np.std(obj, axis=(1, 2))]) filename = "sgdadv_svm_{0}".format(args.dname) np.savetxt("{0}_acc.out".format(filename), avg_acc, fmt='%.5f') np.savetxt("{0}_obj.out".format(filename), avg_obj, fmt='%.5f')
def main(args): fpath = "./dataset/{0}.dat".format(args.dname) X, y = load_dat(fpath, minmax=(0, 1), bias_term=True) y[y < 1] = -1 N, dim = X.shape nrep = args.rep delta = args.delta epsilon = [0.05, 0.1, 0.2, 0.4, 0.8, 1.6] neps = len(epsilon) K = 5 # 5-folds cross-validation cv_rep = 2 k = 0 acc = np.zeros((neps, nrep, K * cv_rep)) obj = np.zeros((neps, nrep, K * cv_rep)) rkf = RepeatedKFold(n_splits=K, n_repeats=cv_rep) for train, test in rkf.split(X): train_X, train_y = X[train, :], y[train] test_X, test_y = X[test, :], y[test] n_train = train_X.shape[0] for i, eps in enumerate(epsilon): # number of iterations r = max(int(round((n_train * eps) / 800.0)), 1) if args.T > 0: r = int(args.T * eps) rho = dp_to_zcdp(eps, delta) eps_iter = np.sqrt((2. * rho) / r) # eps_iter = eps / r for j in range(nrep): sol = privgene(train_X, train_y, eps_iter, r, svm_score, C=10, batch_size=args.batch_size) obj[i, j, k] = svm_loss(sol, train_X, train_y) / n_train acc[i, j, k] = svm_test(sol, test_X, test_y) * 100.0 # print "acc[{},{},{}]={}".format(i, j, k, acc[i, j, k]) k += 1 avg_acc = np.vstack([np.mean(acc, axis=(1, 2)), np.std(acc, axis=(1, 2))]) avg_obj = np.vstack([np.mean(obj, axis=(1, 2)), np.std(obj, axis=(1, 2))]) filename = "pgene_svm_{0}".format(args.dname) np.savetxt("{0}_acc.out".format(filename), avg_acc, fmt='%.5f') np.savetxt("{0}_obj.out".format(filename), avg_obj, fmt='%.5f')
def main(args): fpath = "./dataset/{0}.dat".format(args.dname) X, y = load_dat(fpath, minmax=(0, 1), normalize=False, bias_term=True) N, dim = X.shape y[y < 0.5] = -1 nrep = args.rep delta = args.delta L = args.L step_size = args.step_size T = args.T epsilon = [0.05, 0.1, 0.2, 0.4, 0.8, 1.6] neps = len(epsilon) K = 5 # 5-folds cross-validation cv_rep = 2 k = 0 acc = np.zeros((neps, nrep, K * cv_rep)) obj = np.zeros((neps, nrep, K * cv_rep)) rkf = RepeatedKFold(n_splits=K, n_repeats=cv_rep) for train, test in rkf.split(X): train_X, train_y = X[train, :], y[train] test_X, test_y = X[test, :], y[test] n_train = train_X.shape[0] for i, eps in enumerate(epsilon): for j in range(nrep): sol = outpert_gd(X, y, svm_grad, eps, T, L, step_size, delta=delta, reg_coeff=args.reg_coeff) obj[i, j, k] = svm_loss(sol, train_X, train_y) / n_train acc[i, j, k] = svm_test(sol, test_X, test_y) * 100.0 # print "acc[{},{},{}]={}".format(i, j, k, acc[i, j, k]) k += 1 avg_acc = np.vstack([np.mean(acc, axis=(1, 2)), np.std(acc, axis=(1, 2))]) avg_obj = np.vstack([np.mean(obj, axis=(1, 2)), np.std(obj, axis=(1, 2))]) filename = "outpert_svm_{0}".format(args.dname) np.savetxt("{0}_acc.out".format(filename), avg_acc, fmt='%.5f') np.savetxt("{0}_obj.out".format(filename), avg_obj, fmt='%.5f')
sigma = 4 batch_size = 1000 learning_rate = 0.05 reg_coeff = 0.001 print "SGD with moments accountant" for T in [1, 100, 1000, 10000, 20000]: w, eps = dpsgd_ma(X, y, svm_grad, sigma, T, learning_rate, batch_size, reg_coeff=reg_coeff) loss = svm_loss(w, X, y) / N acc = svm_test(w, X, y) print "[T={:5d}] eps: {:.5f}\tloss: {:.5f}\tacc: {:5.2f}".format( T, eps, loss, acc * 100) print "\nSGD with advanced composition" for eps in [0.05, 0.1, 0.2, 0.4, 0.8, 1.6]: # used the same heuristic as in PrivGene T = max(int(round((N * eps) / 500.0)), 1) w = dpsgd_adv(X, y, svm_grad, eps, T, 0.1, batch_size) loss = svm_loss(w, X, y) / N acc = svm_test(w, X, y) print "eps: {:4.2f}\tloss: {:.5f}\tacc: {:5.2f}".format( eps, loss, acc * 100)