def naive_eps_simple(func, prob, k, delta): # naive simple composition # input x is log delta, it needs to be negative tmp_acct = rdp_acct.anaRDPacct() tmp_acct.compose_mechanism(func) eps = tmp_acct.get_eps(delta/k/prob) eps1, delta1 = rdp_acct.subsample_epsdelta(eps, delta/k/prob, prob) return eps1*k
def single_release_comp(sigma_1, sigma_2=None, delta=1e-5): """ input arguments """ acct = rdp_acct.anaRDPacct() acct.compose_subsampled_mechanism(lambda x: rdp_bank.RDP_gaussian({'sigma': sigma_1}, x), prob=1.) if sigma_2 is not None: acct.compose_subsampled_mechanism(lambda x: rdp_bank.RDP_gaussian({'sigma': sigma_2}, x), prob=1.) print("Privacy loss is", acct.get_eps(delta))
def get_eps_rdp(func, delta): """ This is the generic function that uses RDP accountant and RDP function to solve for eps given delta :param func: :param delta: :return: The corresponding epsilon """ assert (delta >= 0) acct = rdp_acct.anaRDPacct(m=10, m_max=10) acct.compose_mechanism(func) return acct.get_eps(delta)
def conservative_analysis(): """ input arguments """ # (1) privacy parameters for four types of Gaussian mechanisms sigma = 10. # (2) desired delta level delta = 1e-5 n_epochs = 10 # 5 for DP-MERF and 17 for DP-MERF+AE batch_size = 64 # the same across experiments acct = rdp_acct.anaRDPacct() n_data_by_class = [ 5923, 6742, 5958, 6131, 5842, 5421, 5918, 6265, 5851, 5949 ] start_time = time.time() subset_count = 0 for n_data in n_data_by_class: steps_per_epoch = int(np.ceil(n_data / batch_size)) n_steps = steps_per_epoch * n_epochs sampling_rate = batch_size / n_data epoch_last_batch_size = n_data % batch_size epoch_last_sampling_rate = epoch_last_batch_size / n_data # old_time = start_time old_time = time.time() for i in range(1, n_steps + 1): sampling_rate_i = epoch_last_sampling_rate if i % steps_per_epoch == 0 else sampling_rate acct.compose_subsampled_mechanism( lambda x: rdp_bank.RDP_gaussian({'sigma': sigma}, x), sampling_rate_i) if i % steps_per_epoch == 0: new_time = time.time() epochs_done = i // steps_per_epoch t_used = new_time - old_time t_total = new_time - start_time t_total_min = t_total / 60 print( f'Epoch {epochs_done} done - Time used: {t_used:.2f}, Total: {t_total:.2f} ({t_total_min:.2f} minutes)' ) old_time = new_time if i == n_steps: pre_eps_time = time.time() subset_count += 1 print("[", i, "]Privacy loss is", (acct.get_eps(delta))) post_eps_time = time.time() print('time to get_eps: ', post_eps_time - pre_eps_time) old_time = post_eps_time print(f'data subset {subset_count} done')
def main(config): delta = 1e-5 batch_size = config['batchsize'] prob = 1. / config['num_discriminators'] # subsampling rate n_steps = config['iterations'] # training iterations sigma = 0.4859#config['noise_multiplier'] # noise scale func = lambda x: rdp_bank.RDP_gaussian({'sigma': sigma}, x) acct = rdp_acct.anaRDPacct() acct.compose_subsampled_mechanism(func, prob, coeff=n_steps * batch_size) epsilon = acct.get_eps(delta) print("Privacy cost is: epsilon={}, delta={}".format(epsilon, delta))
def conservative_analysis_syn2d(sigma, delta, n_epochs, batch_size, n_data_per_class, n_classes, print_intermediate_results): """ input arguments """ # (2) desired delta level # delta = 1e-5 # n_epochs = 20 # batch_size = 256 acct = rdp_acct.anaRDPacct() n_data_by_class = [n_data_per_class] * n_classes start_time = time.time() subset_count = 0 for model_idx, n_data in enumerate(n_data_by_class): steps_per_epoch = int(np.ceil(n_data / batch_size)) n_steps = steps_per_epoch * n_epochs sampling_rate = batch_size / n_data epoch_last_batch_size = n_data % batch_size epoch_last_sampling_rate = epoch_last_batch_size / n_data # old_time = start_time old_time = time.time() for i in range(1, n_steps + 1): sampling_rate_i = epoch_last_sampling_rate if i % steps_per_epoch == 0 else sampling_rate acct.compose_subsampled_mechanism( lambda x: rdp_bank.RDP_gaussian({'sigma': sigma}, x), sampling_rate_i) if i % steps_per_epoch == 0: new_time = time.time() epochs_done = i // steps_per_epoch t_used = new_time - old_time t_total = new_time - start_time t_total_min = t_total / 60 print( f'Epoch {epochs_done} done - Time used: {t_used:.2f}, Total: {t_total:.2f} ({t_total_min:.2f} minutes)' ) old_time = new_time if i == n_steps and (print_intermediate_results or model_idx + 1 == len(n_data_by_class)): pre_eps_time = time.time() subset_count += 1 print("[", i, "]Privacy loss is", (acct.get_eps(delta))) post_eps_time = time.time() print(f'time to get_eps: {post_eps_time - pre_eps_time:.2f}') old_time = post_eps_time print(f'data subset {subset_count} done')
def main(): """ input arguments """ # (1) privacy parameters for four types of Gaussian mechanisms sigma = 1.2 # (2) desired delta level delta = 1e-5 # (5) number of training steps n_epochs = 10 # 5 for DP-MERF and 17 for DP-MERF+AE batch_size = 64 # the same across experiments dataset = "intrusion" if dataset == "epileptic": n_data = 8049 elif dataset == "isolet": n_data = 4366 elif dataset == "adult": n_data = 11077 elif dataset == "census": n_data = 199523 elif dataset == "cervical": n_data = 753 elif dataset == "credit": n_data = 2668 elif dataset == "intrusion": n_data = 394021 elif dataset == "covtype": n_data = 9217 steps_per_epoch = n_data // batch_size n_steps = steps_per_epoch * n_epochs # n_steps = 1 # (6) sampling rate prob = batch_size / n_data # prob = 1 """ end of input arguments """ """ now use autodp to calculate the cumulative privacy loss """ # declare the moment accountants acct = rdp_acct.anaRDPacct() eps_seq = [] for i in range(1, n_steps + 1): acct.compose_subsampled_mechanism( lambda x: rdp_bank.RDP_gaussian({'sigma': sigma}, x), prob) if i % steps_per_epoch == 0 or i == n_steps: eps_seq.append(acct.get_eps(delta)) print("[", i, "]Privacy loss is", (eps_seq[-1]))
def direct_readout(ar): delta = 1e-5 batch_size = ar.batchsize prob = 1. / ar.num_discriminators # subsampling rate n_steps = ar.iterations # training iterations print(n_steps, batch_size, prob) sigma = ar.noise_multiplier # noise scale func = lambda x: rdp_bank.RDP_gaussian({'sigma': sigma}, x) acct = rdp_acct.anaRDPacct() acct.compose_subsampled_mechanism(func, prob, coeff=n_steps * batch_size) epsilon = acct.get_eps(delta) print("Privacy cost is: epsilon={}, delta={}".format(epsilon, delta))
def func(x): # We assume that the rdp_func and param_name is chosen such that this function is either monotonically # increasing or decreasing. params[param_name] = x rdp = lambda alpha: rdp_func(params, alpha) tmp_acct = rdp_acct.anaRDPacct() if prob < 1.0 and prob > 0: tmp_acct.compose_subsampled_mechanism(rdp, prob, coeff=k) else: tmp_acct.compose_mechanism(rdp, coeff=k) eps_tmp = tmp_acct.get_eps(delta) return eps_tmp - eps
def naive_eps(x, func, prob, k, delta): # naive strong composition # input x is log delta, it needs to be negative #t1 = time.time() tmp_acct = rdp_acct.anaRDPacct() tmp_acct.compose_mechanism(func) eps = tmp_acct.get_eps(np.exp(x)) eps1, delta1 = rdp_acct.subsample_epsdelta(eps, np.exp(x), prob) eps_1 = k*eps1 deltatilde = 1 - np.exp(np.log(1-delta) - k*np.log(1-delta1)) eps_2 = k*eps1**2+ eps1 *(2*k*np.log(np.exp(1) +(k*eps1**2)**0.5/deltatilde))**0.5 eps_3 = k*eps1**2 + eps1*(2*k*np.log(1 / deltatilde))**0.5 eps_all = np.min([eps_1,eps_2,eps_3]) if eps_all < 0: # it will be -1 return np.inf else: return eps_all
def get_eps_rdp_subsampled(func, delta, prob): """ This is the generic function that uses RDP accountant and RDP function to solve for eps given delta :param func: :param delta: :return: The corresponding epsilon """ assert (delta >= 0) assert (prob >= 0) if prob == 0: return 0 elif prob == 1: return get_eps_rdp(func, delta) else: acct = rdp_acct.anaRDPacct() acct.compose_subsampled_mechanism(func, prob) return acct.get_eps(delta)
def main(): """ input arguments """ # (1) privacy parameters for four types of Gaussian mechanisms sigma1 = 2. sigma2 = 200.0 sigma3 = 200.0 sigma4 = 200.0 # (2) number of clusters in MoG num_Clust = 1 # (3) number of iterations in EM updates num_iter_EM = 1 # (4) desired delta level delta = 1e-5 # (5) number of training steps k = 4000 # (6) sampling rate prob = 512./60000. """ end of input arguments """ """ now use autodp to calculate the cumulative privacy loss """ # declare the moment accountants acct = rdp_acct.anaRDPacct() # define the functional form of uppder bound of RDP func = CGF_func(sigma1, sigma2, sigma3, sigma4, num_Clust, num_iter_EM) eps_seq = [] print_every_n = 100 for i in range(1, k+1): acct.compose_subsampled_mechanism(func, prob) eps_seq.append(acct.get_eps(delta)) if i % print_every_n == 0 or i == k: print("[", i, "]Privacy loss is", (eps_seq[-1])) print("Composition of 1000 subsampled Gaussian mechanisms gives ", (acct.get_eps(delta), delta))
from torch.optim import lr_scheduler import network #from utils import Hamming_Score as hamming_accuracy import os from dataset_loader import ImageDataset import aggregation import autodp from autodp import rdp_bank, dp_acct, rdp_acct, privacy_calibrator #from utils import Hamming_Score as hamming_accuracy from utils import hamming_precision as hamming_accuracy from knn_attribute import tau_limit import sys sys.path.append('../dataset/duke') from datafolder.folder import Test_Dataset nb_teachers = config.nb_teachers acct = rdp_acct.anaRDPacct() gaussian = lambda x: rdp_bank.RDP_gaussian( {'sigma': int(config.gau_scale / config.tau)}, x) #acct.compose_mechanism(gaussian,coeff=config.tau*config.stdnt_share) #print('privacy loss', acct.get_eps(config.delta)) dataset_dict = { 'market': 'Market-1501', 'duke': 'DukeMTMC-reID', } def ensemble_preds(nb_teachers, stdnt_data): """ Given a dataset, a number of teachers, and some input data, this helper function queries each teacher for predictions on the data and returns all predictions in a single array. (That can then be aggregated into
data_iterator.reset() for i, batch in enumerate(data_iterator): data = batch.data[0].as_in_context(ctx).reshape((-1, 784)) label = batch.label[0].as_in_context(ctx) output = net(data) predictions = nd.argmax(output, axis=1) acc.update(preds=predictions, labels=label) loss = softmax_cross_entropy(output, label) loss_fun = loss_fun * i / (i + 1) + nd.mean(loss).asscalar() / (i + 1) return acc.get()[1], loss_fun # ## Now let's try attaching a privacy accountant to this data set # declare a moment accountant from pydiffpriv DPobject = rdp_acct.anaRDPacct() # Specify privacy specific inputs thresh = 4.0 # limit the norm of individual gradient sigma = thresh delta = 1e-5 func = lambda x: rdp_bank.RDP_gaussian({'sigma': sigma / thresh}, x) # ## We now specify the parameters needed for learning # epochs = 10 learning_rate = .1
part_sum = 4 * alpha * (alpha - 2) * gamma**3 / (3 * sigma**3) return bound dense = 1.07 alpha_list = [ int(dense**i + 1) for i in range(int(math.floor(math.log(M, dense))) + 1) ] alpha_list = np.unique(alpha_list) for name, func in funcs.items(): figure_2 = [] cgf_poisson = [] # Declare the analytical CGF accountant acgfacct = rdp_acct.anaRDPacct(m=m, m_max=1000, m_lin_max=M) # Declare another analytical CGF accountant for calculating the lower bound acgfacct3 = rdp_acct.anaRDPacct(m=m, m_max=1000, m_lin_max=M) # general_acct tracks the general upperbound, we set approx=True for approximate methods general_acct = rdp_acct.anaRDPacct(m=m, m_max=1000, m_lin_max=M, approx=True) def cgf(x): return (x - 1) * func(x) moment = [] # only for gaussian if name == 'gaussian':
def amplify(self, mechanism, prob, improved_bound_flag=False): # If you know that your mechanism # - (for PoissonSampling) satisfies the the conditions in Theorem 8 of http://proceedings.mlr.press/v97/zhu19c/zhu19c.pdf # - or (for subsampling) satisfies the conditions of Theorem 27 of https://arxiv.org/pdf/1808.00087.pdf # then you may switch general_bound_flag to False to get a tighter bound. # Else, for all mechanisms with RDP bounds, the general upper bounds are used by default. newmech = Mechanism() # privacy amplification via approx-dp # Amplification of RDP # propagate to approxDP as well. if self.PoissonSampling: assert not mechanism.replace_one, "mechanism's replace_one notion of DP is " \ "incompatible with Privacy Amplification " \ "by Poisson sampling" # check that the input mechanism uses the standard add-or-remove notion of DP. # If not, there actually isn't a way to convert it from replace-one notation, # unless a "dummy" user exists in the space. newmech.replace_one = False else: # if we want subsampled DP assert mechanism.replace_one, "mechanism's add-remove notion of DP is " \ "incompatible with Privacy Amplification " \ "by subsampling without replacements" # TODO: implement a transformer that convert add/remove to replace_one notion of DP. newmech.replace_one = True if prob == 0: new_approxDP = lambda delta:0 else: new_approxDP = lambda delta: np.log(1 + prob*(np.exp(mechanism.approxDP(delta/prob))-1)) newmech.approxDP = new_approxDP acct = rdp_acct.anaRDPacct() if self.PoissonSampling: if improved_bound_flag: acct.compose_poisson_subsampled_mechanisms(mechanism.RenyiDP,prob) else: acct.compose_poisson_subsampled_mechanisms1(mechanism.RenyiDP,prob) else: # subsampling if improved_bound_flag: acct.compose_subsampled_mechanism(mechanism.RenyiDP, prob, improved_bound_flag=True) else: acct.compose_subsampled_mechanism(mechanism.RenyiDP, prob) acct.build_zeroth_oracle() new_rdp = acct.evalRDP newmech.propagate_updates(new_rdp,'RDP') #TODO: Implement the amplification of f-DP # propagate to approxDP, or simply get the f-DP from approximate-DP. # book keeping key = self.name + '_' + str(prob) num = 0 newname = self.name # the following handles the case when key is already in the params while key in mechanism.params: num = num+1 newname = self.name+str(num) key = newname + '_' + str(prob) newmech.name = newname +':'+mechanism.name newmech.params = mechanism.params new_params = {newname:prob} newmech.params.update(new_params) return newmech # TODO: implement other transformers: # - amplification by shuffling # - parallel composition # - group composition # - private selection of private candidates # - amplification by overwhelmingly large-probability event.
def func(x): rdp = lambda alpha: rdp_func(params, alpha) tmp_acct = rdp_acct.anaRDPacct() tmp_acct.compose_subsampled_mechanism(rdp, x, coeff=k) eps_tmp = tmp_acct.get_eps(delta) return eps_tmp - eps
def calibrate_epsilon(params, delta): #lemma_8 # We use approximate-CDP for the composition, and then calculate the \epsilon parameters as a function of \delta # Input 'params' should contain the following fields # params['config'] keeps the integer denoting which configuration it is # params['eps_sigma'] keeps the epsilon parameter used by the Laplace mechanism when releasing M2's eigenvalue # params['delta_sigma'] denotes the failure probability for the high-probability upper bound of LS # params['eps_gamma'] and params['delta_gamma'] are similarly for M3's eigenvalue # params['Gaussian'] contains a list of tuples each containing (sensitivity, variance) # this is because each config often release more than one quantities config = params['config'] eps_edge_dist = params['eps_dist'] acct = rdp_acct.anaRDPacct() if not config: return 0 delta0 = 0 if config == 'config4': eps_e9 = eps_edge_dist['e9'] eps_sigma = eps_e9 / 4 eps_gamma = eps_e9 / 4 delta_sigma = delta / 4 delta_gamma = delta / 4 delta0 = delta_sigma + delta_gamma acct.compose_mechanism( lambda x: rdp_bank.RDP_pureDP({'eps': eps_sigma}, x)) acct.compose_mechanism( lambda x: rdp_bank.RDP_pureDP({'eps': eps_gamma}, x)) if config == 'config3': eps_e7 = eps_edge_dist['e7'] eps_sigma = eps_e7 / 3 eps_gamma = eps_e7 / 3 delta_sigma = delta / 3 delta_gamma = delta / 3 delta0 = delta_sigma + delta_gamma acct.compose_mechanism( lambda x: rdp_bank.RDP_pureDP({'eps': eps_sigma}, x)) acct.compose_mechanism( lambda x: rdp_bank.RDP_pureDP({'eps': eps_gamma}, x)) if config == 'config2': eps_e6 = eps_edge_dist['e6'] eps_sigma = eps_e6 / 2 delta_sigma = delta / 2 delta0 = delta_sigma acct.compose_mechanism( lambda x: rdp_bank.RDP_pureDP({'eps': eps_sigma}, x)) print('delta0:', delta0) if delta0 >= delta: return np.inf for sensitivity, variance in params['gaussian']: ## often we pre-emptively calculate sensitivities, ## so they might not be zero in places where we aren;t adding noise. ## variance provides a better check for this. if sensitivity == 0 or variance == 0: continue std = np.sqrt(variance) # CDP of gaussian mechanism conditioning on the event is the same as its RDP. acct.compose_mechanism(lambda x: rdp_bank.RDP_gaussian( {'sigma': std / max(sensitivity, np.finfo(np.float32).eps)}, x)) # This privacy calcluation follows from Lemma 8.8 of Bun et al. (2016) https://arxiv.org/pdf/1605.02065.pdf return acct.get_eps((delta - delta0) / (1 - delta0))
def run(self): # Helper methods def get_random_lot(data_loader): return next(iter(data_loader)) # Data importing, pre-processing, and loading num_training_examples, num_testing_examples, train_data_lot_iterator, train_data_eval_iterator, test_data = self._load_data( ) # parameters calculated from loaded data self._num_training_examples = num_training_examples self._num_testing_examples = num_testing_examples self._hyperparams[ 'sample_fraction'] = self._lot_size / num_training_examples rounds_per_epoch = round(num_training_examples / self._lot_size) # Set up privacy accountant accountant = rdp_acct.anaRDPacct() # dpacct.anaCGFAcct() eps_sequence = [] # Network structure creation self._create_network_params() # Loss function loss_func = self._get_loss_func() # Optimization procedure trainer = self._optimizer(self._hyperparams, self._net, self._params, loss_func, self._model_ctx, accountant) # begin profiling if enabled if self._enable_mxnet_profiling: from mxnet import profiler profiler.set_config(profile_all=True, aggregate_stats=True, filename='profile_output.json') profiler.set_state('run') # Training sequence rounds = round(self._epochs * rounds_per_epoch) loss_sequence = [] current_epoch_loss = mx.nd.zeros(1, ctx=self._model_ctx) for t in range(1, rounds + 1): if self._verbose and self._print_epoch_status: # show current epoch progress epoch_number = 1 + (t - 1) // rounds_per_epoch epoch_progress = 1 + (t - 1) % rounds_per_epoch printProgressBar( epoch_progress, rounds_per_epoch, prefix='Epoch {} progress:'.format(epoch_number), length=50) if self._run_training: # prepare random lot of data for DPSGD step data, labels = get_random_lot(train_data_lot_iterator) data = data.as_in_context(self._model_ctx).reshape( (-1, 1, self._input_layer)) labels = labels.as_in_context(self._model_ctx) else: data, labels = [], [] # perform DPSGD step lot_mean_loss = trainer.step( data, labels, accumulate_privacy=self._accumulate_privacy, run_training=self._run_training) loss_sequence.append(lot_mean_loss) current_epoch_loss += lot_mean_loss # no need to continue running training if NaNs are present if not np.isfinite(lot_mean_loss): self._run_training = False if self._verbose: print("NaN loss on round {}.".format(t)) if self._params_not_finite(): self._run_training = False if self._verbose: print("Non-finite parameters on round {}.".format(t)) if self._accumulate_privacy and self._debugging: eps_sequence.append(accountant.get_eps(self._fixed_delta)) # print some stats after an "epoch" if t % rounds_per_epoch == 0: if self._verbose: print("Epoch {} (round {}) complete.".format( t / rounds_per_epoch, t)) if self._run_training: print("mean epoch loss: {}".format( current_epoch_loss.asscalar() * self._lot_size / self._num_training_examples)) if self._compute_epoch_accuracy: print("training accuracy: {}".format( self._evaluate_accuracy( train_data_eval_iterator))) print("testing accuracy: {}".format( self._evaluate_accuracy(test_data))) if self._accumulate_privacy and self._debugging: print("eps used: {}\n".format(eps_sequence[-1])) print() current_epoch_loss = mx.nd.zeros(1, ctx=self._model_ctx) # end profiling if enabled if self._enable_mxnet_profiling: mx.nd.waitall() profiler.set_state('stop') print(profiler.dumps()) # Make sure we don't report a bogus number if self._accumulate_privacy: final_eps = accountant.get_eps(self._fixed_delta) else: final_eps = -1 test_accuracy = self._evaluate_accuracy(test_data) if self._save_plots or self._debugging: self._create_and_save_plots(t, eps_sequence, loss_sequence, final_eps, test_accuracy) return final_eps, test_accuracy
prob is the sample ratio alpha_limit computes the maximum available alpha for moment method in Abadi et. al.2016 We declare 6 moment account here acgfacct: Sample w/o Replacement [WBK’18] acgfacct3:via tight upper/lower bound (Theorem 2/3) acgfacct5: general upperbound eps_seq_simple: naive composition eps_seq_naive: strong composition [Kairouz et al.KOV15] moment_cache: moment method in Abadi et. al.2016 We compare their privacy loss after k's iteration """ m=100 # for small \eps, m needs to be large for big \eps, m needs to be delta = 1e-8 cgfacct = rdp_acct.anaRDPacct(m) k= 60000 sigma = 5 b = 2 p = 0.6 prob=0.001 # sampling probability alpha_limit = int(sigma ** 2 * np.log(1 / (prob * sigma))) # maximum alpha for moment method def naive_eps_simple(func, prob, k, delta): # naive simple composition # input x is log delta, it needs to be negative tmp_acct = rdp_acct.anaRDPacct() tmp_acct.compose_mechanism(func) eps = tmp_acct.get_eps(delta/k/prob) eps1, delta1 = rdp_acct.subsample_epsdelta(eps, delta/k/prob, prob)