def testBaseline(self, cls, num_microbatches, expected_answer): with tf.GradientTape(persistent=True) as gradient_tape: var0 = tf.Variable([1.0, 2.0]) data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]]) dp_sum_query = gaussian_query.GaussianSumQuery(1.0e9, 0.0) dp_sum_query = privacy_ledger.QueryWithLedger( dp_sum_query, 1e6, num_microbatches / 1e6) opt = cls(dp_sum_query, num_microbatches=num_microbatches, learning_rate=2.0) self.evaluate(tf.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) # Expected gradient is sum of differences divided by number of # microbatches. grads_and_vars = opt.compute_gradients( lambda: self._loss_fn(var0, data0), [var0], gradient_tape=gradient_tape) self.assertAllCloseAccordingToType(expected_answer, grads_and_vars[0][0])
def __init__( self, l2_norm_clip, noise_multiplier, num_microbatches=None, ledger=None, unroll_microbatches=False, *args, # pylint: disable=keyword-arg-before-vararg **kwargs): self._l2_norm_clip = l2_norm_clip self._noise_multiplier = noise_multiplier self._num_microbatches = num_microbatches self._base_optimizer_class = cls dp_sum_query = gaussian_query.GaussianSumQuery( l2_norm_clip, l2_norm_clip * noise_multiplier) if ledger: dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, ledger=ledger) super(DPGaussianOptimizerClass, self).__init__( dp_sum_query, num_microbatches, unroll_microbatches, *args, **kwargs)
def testUnrollMicrobatches(self, cls): with self.cached_session() as sess: var0 = tf.Variable([1.0, 2.0]) data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0], [-1.0, 0.0]]) num_microbatches = 4 dp_sum_query = gaussian_query.GaussianSumQuery(1.0e9, 0.0) dp_sum_query = privacy_ledger.QueryWithLedger( dp_sum_query, 1e6, num_microbatches / 1e6) opt = cls( dp_sum_query, num_microbatches=num_microbatches, learning_rate=2.0, unroll_microbatches=True) self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], self.evaluate(var0)) # Expected gradient is sum of differences divided by number of # microbatches. gradient_op = opt.compute_gradients(self._loss(data0, var0), [var0]) grads_and_vars = sess.run(gradient_op) self.assertAllCloseAccordingToType([-2.5, -2.5], grads_and_vars[0][0])
def linear_model_fn(features, labels, mode): preds = tf.keras.layers.Dense( 1, activation='linear', name='dense').apply(features['x']) vector_loss = tf.math.squared_difference(labels, preds) scalar_loss = tf.reduce_mean(input_tensor=vector_loss) dp_sum_query = gaussian_query.GaussianSumQuery(1.0, 0.0) dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6) optimizer = dp_optimizer.DPGradientDescentOptimizer( dp_sum_query, num_microbatches=1, learning_rate=1.0) global_step = tf.compat.v1.train.get_global_step() train_op = optimizer.minimize(loss=vector_loss, global_step=global_step) return tf.estimator.EstimatorSpec( mode=mode, loss=scalar_loss, train_op=train_op)
def test_ledger(self): record1 = tf.constant([8.5]) record2 = tf.constant([-7.25]) population_size = tf.Variable(0) selection_probability = tf.Variable(1.0) query = quantile_adaptive_clip_sum_query.QuantileAdaptiveClipSumQuery( initial_l2_norm_clip=10.0, noise_multiplier=1.0, target_unclipped_quantile=0.0, learning_rate=1.0, clipped_count_stddev=0.0, expected_num_records=2.0, geometric_update=False) query = privacy_ledger.QueryWithLedger(query, population_size, selection_probability) # First sample. tf.assign(population_size, 10) tf.assign(selection_probability, 0.1) _, global_state = test_utils.run_query(query, [record1, record2]) expected_queries = [[10.0, 10.0], [0.5, 0.0]] formatted = query.ledger.get_formatted_ledger_eager() sample_1 = formatted[0] self.assertAllClose(sample_1.population_size, 10.0) self.assertAllClose(sample_1.selection_probability, 0.1) self.assertAllClose(sample_1.queries, expected_queries) # Second sample. tf.assign(population_size, 20) tf.assign(selection_probability, 0.2) test_utils.run_query(query, [record1, record2], global_state) formatted = query.ledger.get_formatted_ledger_eager() sample_1, sample_2 = formatted self.assertAllClose(sample_1.population_size, 10.0) self.assertAllClose(sample_1.selection_probability, 0.1) self.assertAllClose(sample_1.queries, expected_queries) expected_queries_2 = [[9.0, 9.0], [0.5, 0.0]] self.assertAllClose(sample_2.population_size, 20.0) self.assertAllClose(sample_2.selection_probability, 0.2) self.assertAllClose(sample_2.queries, expected_queries_2)
def testClippingNorm(self, cls): with self.cached_session() as sess: var0 = tf.Variable([0.0, 0.0]) data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]]) dp_sum_query = gaussian_query.GaussianSumQuery(1.0, 0.0) dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6) opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0) self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([0.0, 0.0], self.evaluate(var0)) # Expected gradient is sum of differences. gradient_op = opt.compute_gradients(self._loss(data0, var0), [var0]) grads_and_vars = sess.run(gradient_op) self.assertAllCloseAccordingToType([-0.6, -0.8], grads_and_vars[0][0])
def test_nested_query(self): population_size = tf.Variable(0) selection_probability = tf.Variable(1.0) query1 = gaussian_query.GaussianAverageQuery(l2_norm_clip=4.0, sum_stddev=2.0, denominator=5.0) query2 = gaussian_query.GaussianAverageQuery(l2_norm_clip=5.0, sum_stddev=1.0, denominator=5.0) query = nested_query.NestedQuery([query1, query2]) query = privacy_ledger.QueryWithLedger(query, population_size, selection_probability) record1 = [1.0, [12.0, 9.0]] record2 = [5.0, [1.0, 2.0]] # First sample. tf.compat.v1.assign(population_size, 10) tf.compat.v1.assign(selection_probability, 0.1) test_utils.run_query(query, [record1, record2]) expected_queries = [[4.0, 2.0], [5.0, 1.0]] formatted = query.ledger.get_formatted_ledger_eager() sample_1 = formatted[0] self.assertAllClose(sample_1.population_size, 10.0) self.assertAllClose(sample_1.selection_probability, 0.1) self.assertAllClose(sorted(sample_1.queries), sorted(expected_queries)) # Second sample. tf.compat.v1.assign(population_size, 20) tf.compat.v1.assign(selection_probability, 0.2) test_utils.run_query(query, [record1, record2]) formatted = query.ledger.get_formatted_ledger_eager() sample_1, sample_2 = formatted self.assertAllClose(sample_1.population_size, 10.0) self.assertAllClose(sample_1.selection_probability, 0.1) self.assertAllClose(sorted(sample_1.queries), sorted(expected_queries)) self.assertAllClose(sample_2.population_size, 20.0) self.assertAllClose(sample_2.selection_probability, 0.2) self.assertAllClose(sorted(sample_2.queries), sorted(expected_queries))
def __init__( self, l2_norm_clip, exponents, noise_variance, num_microbatches=None, ledger=None, unroll_microbatches=False, *args, # pylint: disable=keyword-arg-before-vararg **kwargs): dp_sum_query = dp_fixedvariance_query.GeneralSumQuery( l2_norm_clip, exponents, noise_variance) if ledger: dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, ledger=ledger) super(DP_fixedvarianceoptimizerClass, self).__init__(dp_sum_query, num_microbatches, unroll_microbatches, *args, **kwargs)
def testClippingNorm(self, cls): with tf.GradientTape(persistent=True) as gradient_tape: var0 = tf.Variable([0.0, 0.0]) data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]]) dp_sum_query = gaussian_query.GaussianSumQuery(1.0, 0.0) dp_sum_query = privacy_ledger.QueryWithLedger( dp_sum_query, 1e6, 1 / 1e6) opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0) self.evaluate(tf.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([0.0, 0.0], self.evaluate(var0)) # Expected gradient is sum of differences. grads_and_vars = opt.compute_gradients( lambda: self._loss_fn(var0, data0), [var0], gradient_tape=gradient_tape) self.assertAllCloseAccordingToType([-0.6, -0.8], grads_and_vars[0][0])
def __init__( self, l2_norm_clip, noise_multiplier, num_microbatches=None, ledger=None, unroll_microbatches=False, *args, # pylint: disable=keyword-arg-before-vararg **kwargs): """Initializes the `DPGaussianOptimizerClass`. Args: l2_norm_clip: Clipping norm (max L2 norm of per microbatch gradients). noise_multiplier: Ratio of the standard deviation to the clipping norm. num_microbatches: Number of microbatches into which each minibatch is split. If `None`, will default to the size of the minibatch, and per-example gradients will be computed. ledger: Defaults to `None`. An instance of `tf_privacy.PrivacyLedger`. unroll_microbatches: If true, processes microbatches within a Python loop instead of a `tf.while_loop`. Can be used if using a `tf.while_loop` raises an exception. *args: These will be passed on to the base class `__init__` method. **kwargs: These will be passed on to the base class `__init__` method. """ self._l2_norm_clip = l2_norm_clip self._noise_multiplier = noise_multiplier self._num_microbatches = num_microbatches self._base_optimizer_class = cls dp_sum_query = gaussian_query.GaussianSumQuery( l2_norm_clip, l2_norm_clip * noise_multiplier) if ledger: dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, ledger=ledger) super(DPGaussianOptimizerClass, self).__init__(dp_sum_query, num_microbatches, unroll_microbatches, *args, **kwargs)
def testNoiseMultiplier(self, cls): with self.cached_session() as sess: var0 = tf.Variable([0.0]) data0 = tf.Variable([[0.0]]) dp_sum_query = gaussian_query.GaussianSumQuery(4.0, 8.0) dp_sum_query = privacy_ledger.QueryWithLedger(dp_sum_query, 1e6, 1 / 1e6) opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0) self.evaluate(tf.compat.v1.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([0.0], self.evaluate(var0)) gradient_op = opt.compute_gradients(self._loss(data0, var0), [var0]) grads = [] for _ in range(1000): grads_and_vars = sess.run(gradient_op) grads.append(grads_and_vars[0][0]) # Test standard deviation is close to l2_norm_clip * noise_multiplier. self.assertNear(np.std(grads), 2.0 * 4.0, 0.5)
def test_sum_query(self): record1 = tf.constant([2.0, 0.0]) record2 = tf.constant([-1.0, 1.0]) population_size = tf.Variable(0) selection_probability = tf.Variable(1.0) query = gaussian_query.GaussianSumQuery( l2_norm_clip=10.0, stddev=0.0) query = privacy_ledger.QueryWithLedger( query, population_size, selection_probability) # First sample. tf.assign(population_size, 10) tf.assign(selection_probability, 0.1) test_utils.run_query(query, [record1, record2]) expected_queries = [[10.0, 0.0]] formatted = query.ledger.get_formatted_ledger_eager() sample_1 = formatted[0] self.assertAllClose(sample_1.population_size, 10.0) self.assertAllClose(sample_1.selection_probability, 0.1) self.assertAllClose(sample_1.queries, expected_queries) # Second sample. tf.assign(population_size, 20) tf.assign(selection_probability, 0.2) test_utils.run_query(query, [record1, record2]) formatted = query.ledger.get_formatted_ledger_eager() sample_1, sample_2 = formatted self.assertAllClose(sample_1.population_size, 10.0) self.assertAllClose(sample_1.selection_probability, 0.1) self.assertAllClose(sample_1.queries, expected_queries) self.assertAllClose(sample_2.population_size, 20.0) self.assertAllClose(sample_2.selection_probability, 0.2) self.assertAllClose(sample_2.queries, expected_queries)
def testNoiseMultiplier(self, cls): with tf.GradientTape(persistent=True) as gradient_tape: var0 = tf.Variable([0.0]) data0 = tf.Variable([[0.0]]) dp_sum_query = gaussian_query.GaussianSumQuery(4.0, 8.0) dp_sum_query = privacy_ledger.QueryWithLedger( dp_sum_query, 1e6, 1 / 1e6) opt = cls(dp_sum_query, num_microbatches=1, learning_rate=2.0) self.evaluate(tf.global_variables_initializer()) # Fetch params to validate initial values self.assertAllClose([0.0], self.evaluate(var0)) grads = [] for _ in range(1000): grads_and_vars = opt.compute_gradients( lambda: self._loss_fn(var0, data0), [var0], gradient_tape=gradient_tape) grads.append(grads_and_vars[0][0]) # Test standard deviation is close to l2_norm_clip * noise_multiplier. self.assertNear(np.std(grads), 2.0 * 4.0, 0.5)
def __init__( self, l2_norm_clip, noise_multiplier, scalars, num_microbatches=None, ledger=None, unroll_microbatches=False, *args, # pylint: disable=keyword-arg-before-vararg **kwargs): query1 = gaussian_query.GaussianAverageQuery( l2_norm_clip[0], l2_norm_clip[0] * noise_multiplier[0], scalars[0]) query2 = gaussian_query.GaussianAverageQuery( l2_norm_clip[1], l2_norm_clip[1] * noise_multiplier[1], scalars[1]) query3 = gaussian_query.GaussianAverageQuery( l2_norm_clip[2], l2_norm_clip[2] * noise_multiplier[2], scalars[2]) query4 = gaussian_query.GaussianAverageQuery( l2_norm_clip[3], l2_norm_clip[3] * noise_multiplier[3], scalars[3]) query5 = gaussian_query.GaussianAverageQuery( l2_norm_clip[4], l2_norm_clip[4] * noise_multiplier[4], scalars[4]) query6 = gaussian_query.GaussianAverageQuery( l2_norm_clip[5], l2_norm_clip[5] * noise_multiplier[5], scalars[5]) query7 = gaussian_query.GaussianAverageQuery( l2_norm_clip[6], l2_norm_clip[6] * noise_multiplier[6], scalars[6]) query8 = gaussian_query.GaussianAverageQuery( l2_norm_clip[7], l2_norm_clip[7] * noise_multiplier[7], scalars[7]) query9 = gaussian_query.GaussianAverageQuery( l2_norm_clip[8], l2_norm_clip[8] * noise_multiplier[8], scalars[8]) query10 = gaussian_query.GaussianAverageQuery( l2_norm_clip[9], l2_norm_clip[9] * noise_multiplier[9], scalars[9]) query11 = gaussian_query.GaussianAverageQuery( l2_norm_clip[10], l2_norm_clip[10] * noise_multiplier[10], scalars[10]) query12 = gaussian_query.GaussianAverageQuery( l2_norm_clip[11], l2_norm_clip[11] * noise_multiplier[11], scalars[11]) dp_nested_query = nested_query.NestedQuery([ query1, query2, query3, query4, query5, query6, query7, query8, query9, query10, query11, query12 ]) #dp_nested_query = nested_query.NestedQuery([query1, query2, query3]) if ledger: dp_nested_query = privacy_ledger.QueryWithLedger( dp_nested_query, ledger=ledger) super(DPMultiGaussianOptimizerClass, self).__init__(dp_nested_query, num_microbatches, unroll_microbatches, *args, **kwargs)