def compute_privacy_cost_sgld(dp_sgld_params, Ts): from privacy.analysis.compute_dp_sgd_privacy import compute_rdp, get_privacy_spent ## Compute privacy budget N = 1e6 batch_size = dp_sgld_params['tau'] noise_sigma = dp_sgld_params['noise_sigma'] q = batch_size / N delta = 2 / N rdp_orders = range(2, 500) rdp_eps0 = compute_rdp(q, noise_sigma, 1, rdp_orders) epsilons = np.zeros(len(Ts)) for i_T, T in enumerate(Ts): epsilons[i_T] = get_privacy_spent(rdp_orders, rdp_eps0 * T, target_delta=delta)[0] return epsilons, delta
def find_sigma(target_eps, U, L, q, T, target_delta, ant_T=1): max_iter = 100 n_iter = 0 while True: m = (U + L) / 2 rdp_eps = compute_rdp(q, m, T, range(2, 500)) eps = get_privacy_spent( range(2, 500), rdp_eps, target_delta=target_delta)[0] * 2 * ant_T if np.abs(eps - target_eps) < 0.01 and eps > target_eps: return m, eps if eps > target_eps: L = m else: U = m n_iter += 1 if n_iter == max_iter: break print("max nmbr of iter exceed") return m, eps
def main(): # Set DPVI params T = 10000 C = 1.0 lr = 1e-2 # set number of mixture components female_k = 10 q = 0.005 sigma = float(sys.argv[1]) n_runs = int(sys.argv[2]) seed = int(sys.argv[3]) delta = 1e-6 # Set optimizer optimizer = torch.optim.Adam ## Set random seed npr.seed(seed) if use_cuda: torch.set_default_tensor_type('torch.cuda.DoubleTensor') torch.cuda.manual_seed(seed) else: torch.set_default_tensor_type('torch.DoubleTensor') torch.manual_seed(seed) ## Compute privacy budget from privacy.analysis.compute_dp_sgd_privacy import compute_rdp, get_privacy_spent rdp_orders = range(2, 500) rdp_eps = compute_rdp(q, sigma, T, rdp_orders) epsilon = 2 * get_privacy_spent( rdp_orders, rdp_eps, target_delta=delta / 2)[0] print("Epsilon : {}".format(epsilon)) ## Save parameters res_dir = './res/' params = {'T':T, 'C':C, 'lr':lr, 'female_k':female_k,\ 'q':q, 'sigma':sigma, 'epsilon':epsilon, 'n_runs':n_runs, 'seed':seed} ## Determine filename fname_i = 0 date = datetime.date.today().isoformat() fname = '{}_{}'.format(date, seed) while True: try: param_file = open( res_dir + 'params_{}_{}.p'.format(fname, np.round(epsilon, 2)), 'r') param_file.close() if fname_i == 0: fname += '_({})'.format(fname_i) else: fname = fname[:-4] + '_({})'.format(fname_i) fname_i += 1 except: break pickle.dump( params, open(res_dir + 'params_{}_{}.p'.format(fname, np.round(epsilon, 2)), 'wb')) learn_counter = count() alive_female_models = [] dead_female_models = [] out_file = open( res_dir + 'out_{}_{}.txt'.format(fname, np.round(epsilon, 2)), 'w') for i in range(n_runs): start_time = time.time() print(learn_counter.__next__()) # train female and models # alives alive_female_model = infer(T, C, float(sigma), int(q*len(alive_female_df)),\ optimizer, lr, alive_female_df, alive_female_variable_types, female_k) alive_female_models.append(alive_female_model) pickle.dump(alive_female_models, open('./female_models/'+'alive_female_models_{}_{}.p'\ .format(fname, np.round(epsilon, 2)), 'wb')) # deads dead_female_model = infer(T, C, float(sigma), int(q*len(dead_female_df)),\ optimizer, lr, dead_female_df, dead_female_variable_types, female_k) dead_female_models.append(dead_female_model) pickle.dump(dead_female_models, open('./female_models/'+'dead_female_models_{}_{}.p'\ .format(fname, np.round(epsilon, 2)), 'wb')) stop_time = time.time() time_delta = stop_time - start_time out_file.writelines( "Took {} seconds to learn alive and dead\n".format(time_delta)) print("Took {} seconds to learn alive and dead\n".format(time_delta)) out_file.close()
def main(): ### Set number of mixture components (k) k = 20 ## Training parameters T = 30000 C = 1.0 q = .001 lr = .001 ### Pick dimension from argv d = int(sys.argv[1]) ### Compute privacy budget from privacy.analysis.compute_dp_sgd_privacy import compute_rdp, get_privacy_spent delta = 1e-5 rdp_orders = range(2, 500) sigma = 2.0 if sigma > 0: from privacy.analysis.compute_dp_sgd_privacy import get_privacy_spent, compute_rdp rdp_alpha = range(2, 500) delta = 1e-5 print(sigma) rdp_eps = compute_rdp(q, sigma, T, rdp_alpha) epsilon = 2 * get_privacy_spent( rdp_alpha, rdp_eps, target_delta=delta / 2)[0] ### Check that epsilon < 1.0 assert (epsilon < 1.0) ### Save log date = datetime.date.today().isoformat() wall_start = time.time() cpu_start = time.clock() out_file = open("out_file_{}_{}.txt".format(date, d), "a") sys.stdout = out_file ### Load carat-data import pandas as pd app_data = pd.read_csv('../data/subsets/carat_apps_sub{}.dat'.format(d), sep=' ', header=None)\ .astype('float').values N = len(app_data) batch_size = int(N * q) X_apps = torch.tensor(app_data).view([N, 1, d]) models = [] ## container to save gen_models for run in range(10): from torch.optim import Adam gen_model = infer(T, C, float(sigma), batch_size, Adam, lr, X_apps, k) models.append(gen_model) wall_end = time.time() cpu_end = time.clock() pickle.dump(models, open('models_{}_{}.p'.format(date, d), 'wb')) print('Wall time {}'.format(wall_end - wall_start)) print('CPU time {}'.format(cpu_end - cpu_start)) out_file.close() params = { 'T': T, 'C': C, 'q': q, 'lr': lr, 'sigma': sigma, 'epsilon': epsilon, 'd': d } pickle.dump(params, open('params_{}_{}.p'.format(date, d), 'wb'))
def main(): # Set DPVI params T = 80000 C = 2.0 lr = .0005 q = 0.005 batch_size = int(q * N) sigma = float(sys.argv[1]) income = sys.argv[2] seed = int(sys.argv[3]) torch.manual_seed(seed) npr.seed(seed) # Set number of mixture components k = 10 param_dims = OrderedDict() for key, value in variable_types.items(): if key == 'pi_unconstrained': param_dims[key] = [k - 1] else: if value == 'Bernoulli': param_dims[key] = [k] elif value == 'Categorical': param_dims[key] = [k, len(np.unique(data[key]))] elif value == 'Beta': param_dims[key] = [2, k] input_dim = int(np.sum([np.prod(value) for value in param_dims.values()])) flat_param_dims = np.array( [np.prod(value) for value in param_dims.values()]) rich_data = data[data['Target'] == 1] batch_size_rich = int(q * len(rich_data)) poor_data = data[data['Target'] == 0] batch_size_poor = int(q * len(poor_data)) ### Save log date = datetime.date.today().isoformat() wall_start = time.time() cpu_start = time.clock() out_file = open("out_file_{}_{}_{}.txt".format(income, date, sigma), "a") sys.stdout = out_file print("Sigma : {}".format(sigma)) ## Containers for models models = [] from torch.optim import Adam as Optimizer from dpvi import DPVI ## Repeat inference 10 times if income == "rich": rich_model = ReparamXpand(batch_size_rich, input_dim, param_dims, flat_param_dims) optimizer_rich = Optimizer(rich_model.parameters(), lr=lr) # Init mixture fractions to N(0, exp(-2.0)) rich_model.reparam.bias.data[:, -(k - 1):] = 0.0 * torch.ones_like( rich_model.reparam.bias.data[:, -(k - 1):]) rich_model.reparam.weight.data[:, -(k - 1):] = -2.0 * torch.ones_like( rich_model.reparam.weight.data[:, -(k - 1):]) rich_model_ = DPVI(rich_model, T, rich_data, batch_size_rich,\ optimizer_rich, C, sigma, variable_types) models.append(rich_model_) else: poor_model = ReparamXpand(batch_size_poor, input_dim, param_dims, flat_param_dims) optimizer_poor = Optimizer(poor_model.parameters(), lr=lr) poor_model.reparam.bias.data[:, -(k - 1):] = 0.0 * torch.ones_like( poor_model.reparam.bias.data[:, -(k - 1):]) poor_model.reparam.weight.data[:, -(k - 1):] = -2.0 * torch.ones_like( poor_model.reparam.weight.data[:, -(k - 1):]) poor_model_ = DPVI(poor_model, T, poor_data, batch_size_poor,\ optimizer_poor, C, sigma, variable_types) models.append(poor_model_) wall_end = time.time() cpu_end = time.clock() print('Wall time {}'.format(wall_end - wall_start)) print('CPU time {}'.format(cpu_end - cpu_start)) ## Compute privacy budget from privacy.analysis.compute_dp_sgd_privacy import compute_rdp, get_privacy_spent delta = 1e-5 rdp_orders = range(2, 500) rdp_eps = compute_rdp(q, sigma, T, rdp_orders) epsilon = 2 * get_privacy_spent( rdp_orders, rdp_eps, target_delta=delta / 2)[0] pickle.dump( models, open('./res/models_{}_{}_{}_{}.p'.format(income, date, sigma, seed), 'wb')) params = { 'T': T, 'C': C, 'lr': lr, 'k': k, 'q': q, 'sigma': sigma, 'epsilon': epsilon, 'seed': seed } pickle.dump( params, open('./res/params_{}_{}_{}_{}.p'.format(income, date, sigma, seed), 'wb')) out_file.close()