def test_check_composition(self): orders = (1.25, 1.5, 1.75, 2., 2.5, 3., 4., 5., 6., 7., 8., 10., 12., 14., 16., 20., 24., 28., 32., 64., 256.) rdp = rdp_accountant.compute_rdp(q=1e-4, noise_multiplier=.4, steps=40000, orders=orders) eps, _, _ = rdp_accountant.get_privacy_spent(orders, rdp, target_delta=1e-6) rdp += rdp_accountant.compute_rdp(q=0.1, noise_multiplier=2, steps=100, orders=orders) eps, _, _ = rdp_accountant.get_privacy_spent(orders, rdp, target_delta=1e-5) # These tests use the old RDP -> approx DP conversion # self.assertAlmostEqual(eps, 8.509656, places=5) # self.assertEqual(opt_order, 2.5) # But these still provide an upper bound self.assertLessEqual(eps, 8.509656)
def search_optimal_noise_multiplier(self, target_epsilon): """ Performs binary search to get the optimal value for noise multiplier (sigma) for RDP and GDP accounting mechanisms. Functionality adapted from Opacus (https://github.com/pytorch/opacus). """ eps_high = float("inf") sigma_low, sigma_high = 0, 10 orders = [1 + x / 100.0 for x in range(1, 1000)] + list(range(12, 1200)) while eps_high > target_epsilon: sigma_high = 2 * sigma_high if self.dp_type == 'rdp': rdp = compute_rdp(self.sampling_rate, sigma_high, self.steps, orders) eps_high, _, _ = get_privacy_spent( orders, rdp, target_delta=self.target_delta) else: # if self.dp_type == 'gdp' mu = compute_gdp_mu(self.sampling_rate, sigma_high, self.steps) eps_high, delta = get_gdp_privacy_spent( mu, target_delta=self.target_delta) if delta > self.target_delta: raise ValueError( "Could not find suitable privacy parameters.") if sigma_high > MAX_SIGMA: raise ValueError("The privacy budget is too low.") while target_epsilon - eps_high > EPS_TOLERANCE * target_epsilon: sigma = (sigma_low + sigma_high) / 2 if self.dp_type == 'rdp': rdp = compute_rdp(self.sampling_rate, sigma, self.steps, orders) eps, _, _ = get_privacy_spent(orders, rdp, target_delta=self.target_delta) else: # if self.dp_type == 'gdp' mu = compute_gdp_mu(self.sampling_rate, sigma, self.steps) eps, delta = get_gdp_privacy_spent( mu, target_delta=self.target_delta) if eps < target_epsilon: sigma_high = sigma eps_high = eps else: sigma_low = sigma return sigma_high
def test_get_privacy_spent_check_target_eps(self): orders = range(2, 33) rdp = rdp_accountant.compute_rdp(0.01, 4, 10000, orders) _, delta, opt_order = rdp_accountant.get_privacy_spent( orders, rdp, target_eps=1.258575) self.assertAlmostEqual(delta, 1e-5) self.assertEqual(opt_order, 20)
def test_get_privacy_spent_check_target_delta(self): orders = range(2, 33) rdp = rdp_accountant.compute_rdp(0.01, 4, 10000, orders) eps, _, opt_order = rdp_accountant.get_privacy_spent( orders, rdp, target_delta=1e-5) self.assertAlmostEqual(eps, 1.258575, places=5) self.assertEqual(opt_order, 20)
def _compute_privacy_budget_spent(self): """Compute the epsilon value representing the privacy budget spent up to now.""" current_rdp = self._rdp * self._num_rounds_completed eps, _, _ = rdp_accountant.get_privacy_spent(orders=self.RDP_ORDERS, rdp=current_rdp, target_delta=self._delta) return eps
def analysis_privacy(lot_size, data_size, sgd_sigma, gmm_sigma, gmm_iter, gmm_n_comp, sgd_epoch, pca_eps, delta=1e-5): q = lot_size / data_size sgd_steps = int(math.ceil(sgd_epoch * data_size / lot_size)) gmm_steps = gmm_iter * (2 * gmm_n_comp + 1) orders = ([1.25, 1.5, 1.75, 2., 2.25, 2.5, 3., 3.5, 4., 4.5] + list(range(5, 64)) + [128, 256, 512]) pca_rdp = np.array(orders) * 2 * (pca_eps**2) sgd_rdp = compute_rdp(q, sgd_sigma, sgd_steps, orders) gmm_rdp = compute_rdp(1, gmm_sigma, gmm_steps, orders) rdp = pca_rdp + gmm_rdp + sgd_rdp eps, _, opt_order = get_privacy_spent(orders, rdp, target_delta=delta) index = orders.index(opt_order) print( f"ratio(pca:gmm:sgd):{pca_rdp[index]/rdp[index]}:{gmm_rdp[index]/rdp[index]}:{sgd_rdp[index]/rdp[index]}" ) print(f"GMM + SGD + PCA (MA): {eps}, {delta}-DP") return eps, [ pca_rdp[index] / rdp[index], gmm_rdp[index] / rdp[index], sgd_rdp[index] / rdp[index] ]
def test_get_privacy_spent_gaussian(self): # Compare the optimal bound for Gaussian with the one derived from RDP. # Also compare the RDP upper bound with the "standard" upper bound. orders = [0.1 * x for x in range(10, 505)] eps_vec = [0.1 * x for x in range(500)] rdp = rdp_accountant.compute_rdp(1, 1, 1, orders) for eps in eps_vec: _, delta, _ = rdp_accountant.get_privacy_spent(orders, rdp, target_eps=eps) # For comparison, we compute the optimal guarantee for Gaussian # using https://arxiv.org/abs/1805.06530 Theorem 8 (in v2). delta0 = math.erfc((eps - .5) / math.sqrt(2)) / 2 delta0 = delta0 - math.exp(eps) * math.erfc( (eps + .5) / math.sqrt(2)) / 2 self.assertLessEqual(delta0, delta + 1e-300) # need tolerance 10^-300 # Compute the "standard" upper bound, which should be an upper bound. # Note, if orders is too sparse, this will NOT be an upper bound. if eps >= 0.5: delta1 = math.exp(-0.5 * (eps - 0.5)**2) else: delta1 = 1 self.assertLessEqual(delta, delta1 + 1e-300)
def compute_epsilon(steps, target_delta=1e-5): if NUM_EXAMPLES * target_delta > 1.: warnings.warn('Your delta might be too high.') q = FLAGS.batch_size / float(NUM_EXAMPLES) orders = list(jnp.linspace(1.1, 10.9, 99)) + list(range(11, 64)) rdp_const = compute_rdp(q, FLAGS.noise_multiplier, steps, orders) eps, _, _ = get_privacy_spent(orders, rdp_const, target_delta=target_delta) return eps
def TF_MA(q, sigmas, nc, target_delta=None, target_epsilon=None, max_order=32): sp = q*np.ones(len(sigmas)) steps_list = nc*np.ones(len(sigmas)) orders = range(2, max_order + 1) rdp = np.zeros_like(orders, dtype=float) rdp += compute_heterogenous_rdp(sp, sigmas, steps_list, orders) eps, delta, opt_order = rdp_accountant.get_privacy_spent(orders, rdp, target_delta=target_delta, target_eps=target_epsilon) return (eps, delta)
def end(self, session): orders = [1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)) samples = session.run(self._samples) queries = session.run(self._queries) formatted_ledger = privacy_ledger.format_ledger(samples, queries) rdp = compute_rdp_from_ledger(formatted_ledger, orders) eps = get_privacy_spent(orders, rdp, target_delta=1e-5)[0] print('For delta=1e-5, the current epsilon is: %.2f' % eps)
def compute_epsilon(steps, num_examples=60000, target_delta=1e-5): if num_examples * target_delta > 1.: warnings.warn('Your delta might be too high.') q = FLAGS.batch_size / float(num_examples) orders = list(np.linspace(1.1, 10.9, 99)) + range(11, 64) rdp_const = compute_rdp(q, FLAGS.noise_multiplier, steps, orders) eps, _, _ = get_privacy_spent(orders, rdp_const, target_delta=target_delta) return eps
def test_get_privacy_spent_check_target_eps(self): orders = range(2, 33) rdp = [1.1 for o in orders] # Constant corresponds to pure DP. _, delta, opt_order = rdp_accountant.get_privacy_spent( orders, rdp, target_eps=1.32783806176) # Since rdp is constant, it should always pick the largest order. self.assertEqual(opt_order, 32) self.assertAlmostEqual(delta, 1e-5) # Second test for Gaussian noise (with no subsampling): orders = [0.001 * i for i in range(1000, 100000)] # Pick fine set of order. rdp = rdp_accountant.compute_rdp(1, 4.530877117, 1, orders) # Scale is chosen to obtain exactly (1,1e-6)-DP. _, delta, _ = rdp_accountant.get_privacy_spent(orders, rdp, target_eps=1) self.assertAlmostEqual(delta, 1e-6)
def compute_epsilon(epoch,noise_multi,N,batch_size,delta): """Computes epsilon value for given hyperparameters.""" orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) sampling_probability = batch_size / N rdp = compute_rdp(q=sampling_probability, noise_multiplier=noise_multi, steps=epoch*N/batch_size, orders=orders) return get_privacy_spent(orders, rdp, target_delta=delta)[0]
def end(self, session): orders = [1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)) samples = session.run(self._samples) queries = session.run(self._queries) formatted_ledger = privacy_ledger.format_ledger(samples, queries) rdp = compute_rdp_from_ledger(formatted_ledger, orders) eps = get_privacy_spent(orders, rdp, target_delta=1e-5)[0] sys.stdout.write(',%s' % eps) sys.stdout.flush()
def compute_epsilon(steps): orders = [1 + x / 10.0 for x in range(1, 1200)] rdp = rdp_accountant.compute_rdp(q=mb_size / N, noise_multiplier=noise_multiplier, steps=steps, orders=orders) eps, _, _ = rdp_accountant.get_privacy_spent(orders=orders, rdp=rdp, target_delta=1 / (2 * N)) return eps
def compute_epsilon(self, steps): if self.noise_multiplier == 0.0: return float('inf') orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) sampling_probability = self.batch_size / self.total_data_size rdp = compute_rdp(q=sampling_probability, noise_multiplier=self.noise_multiplier, steps=steps, orders=orders) return get_privacy_spent(orders, rdp, target_delta=self.delta)[0]
def compute_epsilon(steps, noise_multiplier, batch_size, input_size, delta): """Computes epsilon value for given hyperparameters.""" if noise_multiplier == 0.0: return float('inf') orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) sampling_probability = batch_size / input_size rdp = compute_rdp(q=sampling_probability, noise_multiplier=noise_multiplier, steps=steps, orders=orders) return get_privacy_spent(orders, rdp, target_delta=delta)[0]
def test_check_composition(self): orders = (1.25, 1.5, 1.75, 2., 2.5, 3., 4., 5., 6., 7., 8., 10., 12., 14., 16., 20., 24., 28., 32., 64., 256.) rdp = rdp_accountant.compute_rdp(q=1e-4, noise_multiplier=.4, steps=40000, orders=orders) eps, _, opt_order = rdp_accountant.get_privacy_spent(orders, rdp, target_delta=1e-6) rdp += rdp_accountant.compute_rdp(q=0.1, noise_multiplier=2, steps=100, orders=orders) eps, _, opt_order = rdp_accountant.get_privacy_spent(orders, rdp, target_delta=1e-5) self.assertAlmostEqual(eps, 8.509656, places=5) self.assertEqual(opt_order, 2.5)
def compute_renyi_privacy(num_examples, batch_size, steps, sigma, delta): """compute privacy loss using Renyi Differential-Privacy estimate""" sampling_ratio = batch_size / num_examples orders = [1.25, 1.5, 1.75, 2., 2.25, 2.5, 3., 3.5, 4., 4.5] \ + list(range(5, 64)) + [128, 256, 512] rdp = compute_rdp(sampling_ratio, sigma, steps, orders) epsilon, _, alpha = get_privacy_spent(orders, rdp, target_delta=delta) return SpentDP(epsilon, delta)
def test_compute_eps_tree(self, noise_multiplier, eps): orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) # This tests is based on the StackOverflow setting in "Practical and # Private (Deep) Learning without Sampling or Shuffling". The calculated # epsilon could be better as the method in this package keeps improving. steps_list, target_delta = 1600, 1e-6 rdp = tree_aggregation_accountant.compute_rdp_tree_restart( noise_multiplier, steps_list, orders) new_eps = rdp_accountant.get_privacy_spent( orders, rdp, target_delta=target_delta)[0] self.assertLess(new_eps, eps)
def compute_epsilon(steps): """Computes epsilon value for given hyperparameters.""" if FLAGS.noise_multiplier == 0.0: return float('inf') orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) sampling_probability = FLAGS.batch_size / 60000 rdp = compute_rdp(q=sampling_probability, noise_multiplier=FLAGS.noise_multiplier, steps=steps, orders=orders) # Delta is set to 1e-5 because MNIST has 60000 training points. return get_privacy_spent(orders, rdp, target_delta=1e-5)[0]
def apply_dp_sgd_analysis(q, sigma, steps, orders, delta): """Compute and print results of DP-SGD analysis.""" # compute_rdp requires that sigma be the ratio of the standard deviation of # the Gaussian noise to the l2-sensitivity of the function to which it is # added. Hence, sigma here corresponds to the `noise_multiplier` parameter # in the DP-SGD implementation found in privacy.optimizers.dp_optimizer rdp = compute_rdp(q, sigma, steps, orders) eps, _, opt_order = get_privacy_spent(orders, rdp, target_delta=delta) return eps, opt_order
def compute_epsilon(steps): """Computes epsilon value for given hyperparameters.""" if FLAGS.noise_multiplier == 0.0: return float('inf') orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) sampling_probability = FLAGS.batch_size / NUM_TRAIN_EXAMPLES rdp = compute_rdp(q=sampling_probability, noise_multiplier=FLAGS.noise_multiplier, steps=steps, orders=orders) # Delta is set to approximate 1 / (number of training points). return get_privacy_spent(orders, rdp, target_delta=1e-5)[0]
def test_get_privacy_spent_consistency(self): orders = range(2, 50) # Large range of orders (helps test for overflows). for q in [0.01, 0.1, 0.8, 1.]: # Different subsampling rates. for multiplier in [0.1, 1., 3., 10., 100.]: # Different noise scales. rdp = rdp_accountant.compute_rdp(q, multiplier, 1, orders) for delta in [ .9, .5, .1, .01, 1e-3, 1e-4, 1e-5, 1e-6, 1e-9, 1e-12 ]: eps1, delta1, ord1 = rdp_accountant.get_privacy_spent( orders, rdp, target_delta=delta) eps2, delta2, ord2 = rdp_accountant.get_privacy_spent( orders, rdp, target_eps=eps1) self.assertEqual(delta1, delta) self.assertEqual(eps2, eps1) if eps1 != 0: self.assertEqual(ord1, ord2) self.assertAlmostEqual(delta, delta2) else: # This is a degenerate case; we won't have consistency. self.assertLessEqual(delta2, delta)
def compute_epsilon(epoch, num_train_eg, args): """Computes epsilon value for given hyperparameters.""" steps = epoch * num_train_eg // args.batch_size if args.noise_multiplier == 0.0: return float('inf') orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) sampling_probability = args.batch_size / num_train_eg rdp = compute_rdp(q=sampling_probability, noise_multiplier=args.noise_multiplier, steps=steps, orders=orders) # Delta is set to approximate 1 / (number of training points). return get_privacy_spent(orders, rdp, target_delta=1e-5)[0]
def compute_epsilon(epochs=epochs, mb_size=mb_size, N=N, noise_multiplier=noise_multiplier): orders = [1 + x / 10.0 for x in range(1, 800)] steps = (N / mb_size) * epochs rdp = rdp_accountant.compute_rdp(q=mb_size / N, noise_multiplier=noise_multiplier, steps=steps, orders=orders) eps, _, _ = rdp_accountant.get_privacy_spent(orders=orders, rdp=rdp, target_delta=1 / (2 * N)) return eps
def test_compute_eps_tree_decreasing(self, steps_list): # Test privacy epsilon decreases with noise multiplier increasing when # keeping other parameters the same. orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64)) target_delta = 1e-6 prev_eps = tree_aggregation_accountant.compute_rdp_tree_restart( 0, steps_list, orders) for noise_multiplier in [0.1 * x for x in range(1, 100, 5)]: rdp = tree_aggregation_accountant.compute_rdp_tree_restart( noise_multiplier, steps_list, orders) eps = rdp_accountant.get_privacy_spent( orders, rdp, target_delta=target_delta)[0] self.assertLess(eps, prev_eps) prev_eps = eps
def print_privacy_guarantees(epochs, batch_size, samples, noise_multiplier): """Tabulating position-dependent privacy guarantees.""" if noise_multiplier == 0: print('No differential privacy (additive noise is 0).') return print( 'In the conditions of Theorem 34 (https://arxiv.org/abs/1808.06651) ' 'the training procedure results in the following privacy guarantees.') print('Out of the total of {} samples:'.format(samples)) steps_per_epoch = samples // batch_size orders = np.concatenate( [np.linspace(2, 20, num=181), np.linspace(20, 100, num=81)]) delta = 1e-5 for p in (.5, .9, .99): steps = math.ceil(steps_per_epoch * p) # Steps in the last epoch. coef = 2 * (noise_multiplier * batch_size)**-2 * ( # Accounting for privacy loss (epochs - 1) / steps_per_epoch + # ... from all-but-last epochs 1 / (steps_per_epoch - steps + 1)) # ... due to the last epoch # Using RDP accountant to compute eps. Doing computation analytically is # an option. rdp = [order * coef for order in orders] eps, _, _ = get_privacy_spent(orders, rdp, target_delta=delta) print('\t{:g}% enjoy at least ({:.2f}, {})-DP'.format( p * 100, eps, delta)) # Compute privacy guarantees for the Sampled Gaussian Mechanism. rdp_sgm = compute_rdp(batch_size / samples, noise_multiplier, epochs * steps_per_epoch, orders) eps_sgm, _, _ = get_privacy_spent(orders, rdp_sgm, target_delta=delta) print('By comparison, DP-SGD analysis for training done with the same ' 'parameters and random shuffling in each epoch guarantees ' '({:.2f}, {})-DP for all samples.'.format(eps_sgm, delta))
def get_delta_spent(self, target_epsilon): """ Computes the epsilon budget spent by a DP optimizer. :param target_epsilon: fixed epsilon of an (\eps, \delta)-DP guarantee :return: delta """ rdp, orders = self._get_rdp_and_orders() _, delta, opt_order = get_privacy_spent(orders, rdp, target_eps=target_epsilon) if opt_order == max(orders) or opt_order == min(orders): print( "The privacy estimate is likely to be improved by expanding " "the set of orders." ) return delta
def TF_MA(sigma, T, target_delta=None, target_epsilon=None, max_order=32): orders = range(2, max_order + 1) rdp = np.zeros_like(orders, dtype=float) #print(rdp) #print(size(rdp)) for i in orders: # RDP for the Gaussian mechanism rdp[i - 2] += (T / 2) * i / (2 * sigma**2) # RDP for the randomised response rdp[i - 2] += (T / 2) * (1 / (i - 1)) * np.log((p**i) * (1 - p)**(1 - i) + (1 - p)**i * p**(1 - i)) eps, delta, opt_order = rdp_accountant.get_privacy_spent( orders, rdp, target_delta=target_delta, target_eps=target_epsilon) return (eps, delta)