def test_sum(self, records_num, record_dim, noise_multiplier, seed, total_steps, clip, use_efficient): record_specs = tf.TensorSpec(shape=[record_dim]) query = tree_aggregation_query.TreeResidualSumQuery.build_l2_gaussian_query( clip_norm=clip, noise_multiplier=noise_multiplier, record_specs=record_specs, noise_seed=seed, use_efficient=use_efficient) sum_query = tree_aggregation_query.TreeCumulativeSumQuery.build_l2_gaussian_query( clip_norm=clip, noise_multiplier=noise_multiplier, record_specs=record_specs, noise_seed=seed, use_efficient=use_efficient) global_state = query.initial_global_state() sum_global_state = sum_query.initial_global_state() cumsum_result = tf.zeros(shape=[record_dim]) for _ in range(total_steps): records = [ tf.random.uniform(shape=[record_dim], maxval=records_num) for _ in range(records_num) ] query_result, global_state = test_utils.run_query( query, records, global_state) sum_query_result, sum_global_state = test_utils.run_query( sum_query, records, sum_global_state) cumsum_result += query_result self.assertAllClose(cumsum_result, sum_query_result, rtol=1e-6)
def test_build_l2_gaussian_query(self, records_num, record_dim, noise_multiplier, seed, total_steps, clip, use_efficient): record_specs = tf.TensorSpec(shape=[record_dim]) query = tree_aggregation_query.TreeCumulativeSumQuery.build_l2_gaussian_query( clip_norm=clip, noise_multiplier=noise_multiplier, record_specs=record_specs, noise_seed=seed, use_efficient=use_efficient) reference_query = tree_aggregation_query.TreeCumulativeSumQuery( clip_fn=_get_l2_clip_fn(), clip_value=clip, noise_generator=_get_noise_generator(record_specs, clip * noise_multiplier, seed), record_specs=record_specs, use_efficient=use_efficient) global_state = query.initial_global_state() reference_global_state = reference_query.initial_global_state() for _ in range(total_steps): records = [ tf.random.uniform(shape=[record_dim], maxval=records_num) for _ in range(records_num) ] query_result, global_state = test_utils.run_query( query, records, global_state) reference_query_result, reference_global_state = test_utils.run_query( reference_query, records, reference_global_state) self.assertAllClose(query_result, reference_query_result, rtol=1e-6)
def test_adaptation_target_zero(self): record1 = tf.constant([8.5]) record2 = tf.constant([-7.25]) query = quantile_adaptive_clip_sum_query.QuantileAdaptiveClipSumQuery( initial_l2_norm_clip=10.0, noise_multiplier=0.0, target_unclipped_quantile=0.0, learning_rate=1.0, clipped_count_stddev=0.0, expected_num_records=2.0, geometric_update=False) global_state = query.initial_global_state() initial_clip = global_state.sum_state.l2_norm_clip self.assertAllClose(initial_clip, 10.0) # On the first two iterations, nothing is clipped, so the clip goes down # by 1.0 (the learning rate). When the clip reaches 8.0, one record is # clipped, so the clip goes down by only 0.5. After two more iterations, # both records are clipped, and the clip norm stays there (at 7.0). expected_sums = [1.25, 1.25, 0.75, 0.25, 0.0] expected_clips = [9.0, 8.0, 7.5, 7.0, 7.0] for expected_sum, expected_clip in zip(expected_sums, expected_clips): actual_sum, global_state = test_utils.run_query( query, [record1, record2], global_state) actual_clip = global_state.sum_state.l2_norm_clip self.assertAllClose(actual_clip.numpy(), expected_clip) self.assertAllClose(actual_sum.numpy(), (expected_sum, ))
def test_tree_noise_restart(self): sample_num, tolerance, stddev = 1000, 0.3, 0.1 initial_estimate, expected_num_records = 5., 2. record1 = tf.constant(1.) record2 = tf.constant(10.) query = _make_quantile_estimator_query( initial_estimate=initial_estimate, target_quantile=.5, learning_rate=1., below_estimate_stddev=stddev, expected_num_records=expected_num_records, geometric_update=False, tree_aggregation=True) global_state = query.initial_global_state() self.assertAllClose(global_state.current_estimate, initial_estimate) # As the target quantile is accurate, there is no signal and only noise. samples = [] for _ in range(sample_num): noised_estimate, global_state = test_utils.run_query( query, [record1, record2], global_state) samples.append(noised_estimate.numpy()) global_state = query.reset_state(noised_estimate, global_state) self.assertNotEqual(global_state.current_estimate, initial_estimate) global_state = global_state._replace( current_estimate=initial_estimate) self.assertAllClose(np.std(samples), stddev / expected_num_records, rtol=tolerance)
def test_adaptation_linspace(self): # 100 records equally spaced from 0 to 10 in 0.1 increments. # Test that with a decaying learning rate we converge to the correct # median with error at most 0.1. records = [tf.constant(x) for x in np.linspace( 0.0, 10.0, num=21, dtype=np.float32)] learning_rate = tf.Variable(1.0) query = quantile_adaptive_clip_sum_query.QuantileAdaptiveClipSumQuery( initial_l2_norm_clip=0.0, noise_multiplier=0.0, target_unclipped_quantile=0.5, learning_rate=learning_rate, clipped_count_stddev=0.0, expected_num_records=2.0) global_state = query.initial_global_state() for t in range(50): tf.compat.v1.assign(learning_rate, 1.0 / np.sqrt(t + 1)) _, global_state = test_utils.run_query(query, records, global_state) actual_clip = global_state.l2_norm_clip if t > 40: self.assertNear(actual_clip, 5.0, 0.25)
def test_target_one_geometric(self, exact): record1 = tf.constant(1.5) record2 = tf.constant(3.0) query = _make_quantile_estimator_query( initial_estimate=0.5, target_quantile=1.0, learning_rate=np.log(2.0), # Geometric steps in powers of 2. below_estimate_stddev=0.0, expected_num_records=(None if exact else 2.0), geometric_update=True) global_state = query.initial_global_state() initial_estimate = global_state.current_estimate self.assertAllClose(initial_estimate, 0.5) # On the first two iterations, both are above, so the estimate is doubled. # When the estimate reaches 2.0, only one record is above, so the estimate # is multiplied by sqrt(2.0). Still only one is above so it increases to # 4.0. Now both records are above, and the estimate stays there (at 4.0). two_times_root_two = 2 * np.sqrt(2.0) # approx 2.828 expected_estimates = [1.0, 2.0, two_times_root_two, 4.0, 4.0] for expected_estimate in expected_estimates: actual_estimate, global_state = test_utils.run_query( query, [record1, record2], global_state) self.assertAllClose(actual_estimate.numpy(), expected_estimate)
def test_complex_nested_query(self): with self.cached_session() as sess: query_ab = gaussian_query.GaussianSumQuery(l2_norm_clip=1.0, stddev=0.0) query_c = gaussian_query.GaussianAverageQuery(l2_norm_clip=10.0, sum_stddev=0.0, denominator=2.0) query_d = gaussian_query.GaussianSumQuery(l2_norm_clip=10.0, stddev=0.0) query = nested_query.NestedSumQuery( [query_ab, { 'c': query_c, 'd': [query_d] }]) record1 = [{ 'a': 0.0, 'b': 2.71828 }, { 'c': (-4.0, 6.0), 'd': [-4.0] }] record2 = [{ 'a': 3.14159, 'b': 0.0 }, { 'c': (6.0, -4.0), 'd': [5.0] }] query_result, _ = test_utils.run_query(query, [record1, record2]) result = sess.run(query_result) expected = [{'a': 1.0, 'b': 1.0}, {'c': (1.0, 1.0), 'd': [1.0]}] self.assertAllClose(result, expected)
def test_target_zero_geometric(self, exact): record1 = tf.constant(5.0) record2 = tf.constant(2.5) query = _make_quantile_estimator_query( initial_estimate=16.0, target_quantile=0.0, learning_rate=np.log(2.0), # Geometric steps in powers of 2. below_estimate_stddev=0.0, expected_num_records=(None if exact else 2.0), geometric_update=True) global_state = query.initial_global_state() initial_estimate = global_state.current_estimate self.assertAllClose(initial_estimate, 16.0) # For two iterations, both records are below, so the estimate is halved. # Then only one record is below, so the estimate goes down by only sqrt(2.0) # to 4 / sqrt(2.0). Still only one record is below, so it reduces to 2.0. # Now no records are below, and the estimate norm stays there (at 2.0). four_div_root_two = 4 / np.sqrt(2.0) # approx 2.828 expected_estimates = [8.0, 4.0, four_div_root_two, 2.0, 2.0] for expected_estimate in expected_estimates: actual_estimate, global_state = test_utils.run_query( query, [record1, record2], global_state) self.assertAllClose(actual_estimate.numpy(), expected_estimate)
def test_calls_inner_query(self): t = tf.constant([1, 2], dtype=tf.float32) record = [t, t] sample_size = 2 sample = [record] * sample_size # Mock inner query to check that the methods get called. in_q = ddg_sum_query(l2_norm_bound=self.DEFAULT_L2_NORM_BOUND, local_scale=0.0) in_q.initial_sample_state = mock.MagicMock( wraps=in_q.initial_sample_state) in_q.initial_global_state = mock.MagicMock( wraps=in_q.initial_global_state) in_q.derive_sample_params = mock.MagicMock( wraps=in_q.derive_sample_params) in_q.preprocess_record = mock.MagicMock(wraps=in_q.preprocess_record) in_q.get_noised_result = mock.MagicMock(wraps=in_q.get_noised_result) comp_query = compression_query.CompressionSumQuery( quantization_params=self.DEFAULT_QUANTIZATION_PARAMS, inner_query=in_q, record_template=record) query_result, _ = test_utils.run_query(comp_query, sample) result = self.evaluate(query_result) expected = self.evaluate([t * sample_size, t * sample_size]) self.assertAllClose(result, expected, atol=0) # Check calls self.assertEqual(in_q.initial_sample_state.call_count, 1) self.assertEqual(in_q.initial_global_state.call_count, 1) self.assertEqual(in_q.derive_sample_params.call_count, 1) self.assertEqual(in_q.preprocess_record.call_count, sample_size) self.assertEqual(in_q.get_noised_result.call_count, 1)
def test_all_equal(self, exact, start_low, geometric): # 20 equal records. Test that we converge to that record and bounce around # it. Unlike the linspace test, the quantile-matching objective is very # sharp at the optimum so a decaying learning rate is necessary. num_records = 20 records = [tf.constant(5.0)] * num_records learning_rate = tf.Variable(1.0) query = _make_quantile_estimator_query( initial_estimate=(1.0 if start_low else 10.0), target_quantile=0.5, learning_rate=learning_rate, below_estimate_stddev=(0.0 if exact else 1e-2), expected_num_records=(None if exact else num_records), geometric_update=geometric) global_state = query.initial_global_state() for t in range(50): tf.assign(learning_rate, 1.0 / np.sqrt(t + 1)) _, global_state = test_utils.run_query(query, records, global_state) actual_estimate = global_state.current_estimate if t > 40: self.assertNear(actual_estimate, 5.0, 0.5)
def test_noisy_sum(self, stddev): num_trials = 1000 record_1 = tf.zeros([num_trials], dtype=tf.int32) record_2 = tf.ones([num_trials], dtype=tf.int32) sample = [record_1, record_2] query = dg_sum_query(l2_norm_bound=num_trials, stddev=stddev) result, _ = test_utils.run_query(query, sample) sampled_noise = discrete_gaussian_utils.sample_discrete_gaussian( scale=tf.cast(stddev, tf.int32), shape=[num_trials], dtype=tf.int32) result, sampled_noise = self.evaluate([result, sampled_noise]) # The standard error of the stddev should be roughly sigma / sqrt(2N - 2), # (https://stats.stackexchange.com/questions/156518) so set a rtol to give # < 0.01% of failure (within ~4 standard errors). rtol = 4 / np.sqrt(2 * num_trials - 2) self.assertAllClose(np.std(result), stddev, rtol=rtol) # Use standard error of the mean to compare percentiles. stderr = stddev / np.sqrt(num_trials) self.assertAllClose(np.percentile(result, [25, 50, 75]), np.percentile(sampled_noise, [25, 50, 75]), atol=4 * stderr)
def test_adaptation_all_equal(self, start_low, geometric): # 20 equal records. Test that we converge to that record and bounce around # it. Unlike the linspace test, the quantile-matching objective is very # sharp at the optimum so a decaying learning rate is necessary. num_records = 20 records = [tf.constant(5.0)] * num_records learning_rate = tf.Variable(1.0) query = quantile_adaptive_clip_sum_query.QuantileAdaptiveClipSumQuery( initial_l2_norm_clip=(1.0 if start_low else 10.0), noise_multiplier=0.0, target_unclipped_quantile=0.5, learning_rate=learning_rate, clipped_count_stddev=0.0, expected_num_records=num_records, geometric_update=geometric) global_state = query.initial_global_state() for t in range(50): tf.compat.v1.assign(learning_rate, 1.0 / np.sqrt(t + 1)) _, global_state = test_utils.run_query(query, records, global_state) actual_clip = global_state.sum_state.l2_norm_clip if t > 40: self.assertNear(actual_clip, 5.0, 0.5)
def test_linspace(self, exact, start_low, geometric): # 100 records equally spaced from 0 to 10 in 0.1 increments. # Test that we converge to the correct median value and bounce around it. num_records = 21 records = [ tf.constant(x) for x in np.linspace(0.0, 10.0, num=num_records, dtype=np.float32) ] query = _make_quantile_estimator_query( initial_estimate=(1.0 if start_low else 10.0), target_quantile=0.5, learning_rate=1.0, below_estimate_stddev=(0.0 if exact else 1e-2), expected_num_records=(None if exact else num_records), geometric_update=geometric) global_state = query.initial_global_state() for t in range(50): _, global_state = test_utils.run_query(query, records, global_state) actual_estimate = global_state.current_estimate if t > 40: self.assertNear(actual_estimate, 5.0, 0.25)
def test_target_one(self, exact): record1 = tf.constant(1.5) record2 = tf.constant(2.75) query = _make_quantile_estimator_query( initial_estimate=0.0, target_quantile=1.0, learning_rate=1.0, below_estimate_stddev=0.0, expected_num_records=(None if exact else 2.0), geometric_update=False) global_state = query.initial_global_state() initial_estimate = global_state.current_estimate self.assertAllClose(initial_estimate, 0.0) # On the first two iterations, both are above, so the estimate goes up # by 1.0 (the learning rate). When it reaches 2.0, only one record is # above, so the estimate goes up by only 0.5. After two more iterations, # both records are below, and the estimate stays there (at 3.0). expected_estimates = [1.0, 2.0, 2.5, 3.0, 3.0] for expected_estimate in expected_estimates: actual_estimate, global_state = test_utils.run_query( query, [record1, record2], global_state) self.assertAllClose(actual_estimate.numpy(), expected_estimate)
def test_adaptation_target_one_geometric(self): record1 = tf.constant([-1.5]) record2 = tf.constant([3.0]) query = quantile_adaptive_clip_sum_query.QuantileAdaptiveClipSumQuery( initial_l2_norm_clip=0.5, noise_multiplier=0.0, target_unclipped_quantile=1.0, learning_rate=np.log(2.0), # Geometric steps in powers of 2. clipped_count_stddev=0.0, expected_num_records=2.0, geometric_update=True) global_state = query.initial_global_state() initial_clip = global_state.sum_state.l2_norm_clip self.assertAllClose(initial_clip, 0.5) # On the first two iterations, both are clipped, so the clip is doubled. # When the clip reaches 2.0, only one record is clipped, so the clip is # multiplied by sqrt(2.0). Still only one is clipped so it increases to 4.0. # Now both records are clipped, and the clip norm stays there (at 4.0). two_times_root_two = 2 * np.sqrt(2.0) # approx 2.828 expected_sums = [0.0, 0.0, 0.5, two_times_root_two - 1.5, 1.5] expected_clips = [1.0, 2.0, two_times_root_two, 4.0, 4.0] for expected_sum, expected_clip in zip(expected_sums, expected_clips): actual_sum, global_state = test_utils.run_query( query, [record1, record2], global_state) actual_clip = global_state.sum_state.l2_norm_clip self.assertAllClose(actual_clip.numpy(), expected_clip) self.assertAllClose(actual_sum.numpy(), (expected_sum, ))
def test_adaptation_linspace(self, start_low, geometric): # 100 records equally spaced from 0 to 10 in 0.1 increments. # Test that we converge to the correct median value and bounce around it. num_records = 21 records = [ tf.constant(x) for x in np.linspace(0.0, 10.0, num=num_records, dtype=np.float32) ] query = quantile_adaptive_clip_sum_query.QuantileAdaptiveClipSumQuery( initial_l2_norm_clip=(1.0 if start_low else 10.0), noise_multiplier=0.0, target_unclipped_quantile=0.5, learning_rate=1.0, clipped_count_stddev=0.0, expected_num_records=num_records, geometric_update=geometric) global_state = query.initial_global_state() for t in range(50): _, global_state = test_utils.run_query(query, records, global_state) actual_clip = global_state.sum_state.l2_norm_clip if t > 40: self.assertNear(actual_clip, 5.0, 0.25)
def test_adaptation_target_one(self): record1 = tf.constant([-1.5]) record2 = tf.constant([2.75]) query = quantile_adaptive_clip_sum_query.QuantileAdaptiveClipSumQuery( initial_l2_norm_clip=0.0, noise_multiplier=0.0, target_unclipped_quantile=1.0, learning_rate=1.0, clipped_count_stddev=0.0, expected_num_records=2.0) global_state = query.initial_global_state() initial_clip = global_state.sum_state.l2_norm_clip self.assertAllClose(initial_clip, 0.0) # On the first two iterations, both are clipped, so the clip goes up # by 1.0 (the learning rate). When the clip reaches 2.0, only one record is # clipped, so the clip goes up by only 0.5. After two more iterations, # both records are clipped, and the clip norm stays there (at 3.0). expected_sums = [0.0, 0.0, 0.5, 1.0, 1.25] expected_clips = [1.0, 2.0, 2.5, 3.0, 3.0] for expected_sum, expected_clip in zip(expected_sums, expected_clips): actual_sum, global_state = test_utils.run_query( query, [record1, record2], global_state) actual_clip = global_state.sum_state.l2_norm_clip self.assertAllClose(actual_clip.numpy(), expected_clip) self.assertAllClose(actual_sum.numpy(), (expected_sum, ))
def test_adaptation_target_zero_geometric(self): record1 = tf.constant([5.0]) record2 = tf.constant([-2.5]) query = quantile_adaptive_clip_sum_query.QuantileAdaptiveClipSumQuery( initial_l2_norm_clip=16.0, noise_multiplier=0.0, target_unclipped_quantile=0.0, learning_rate=np.log(2.0), # Geometric steps in powers of 2. clipped_count_stddev=0.0, expected_num_records=2.0, geometric_update=True) global_state = query.initial_global_state() initial_clip = global_state.sum_state.l2_norm_clip self.assertAllClose(initial_clip, 16.0) # For two iterations, nothing is clipped, so the clip is cut in half. # Then one record is clipped, so the clip goes down by only sqrt(2.0) to # 4 / sqrt(2.0). Still only one record is clipped, so it reduces to 2.0. # Now both records are clipped, and the clip norm stays there (at 2.0). four_div_root_two = 4 / np.sqrt(2.0) # approx 2.828 expected_sums = [2.5, 2.5, 1.5, four_div_root_two - 2.5, 0.0] expected_clips = [8.0, 4.0, four_div_root_two, 2.0, 2.0] for expected_sum, expected_clip in zip(expected_sums, expected_clips): actual_sum, global_state = test_utils.run_query( query, [record1, record2], global_state) actual_clip = global_state.sum_state.l2_norm_clip self.assertAllClose(actual_clip.numpy(), expected_clip) self.assertAllClose(actual_sum.numpy(), (expected_sum, ))
def test_adaptation_all_equal(self, geometric): # 20 equal records. Test that with a decaying learning rate we converge to # that record and bounce around it. records = [tf.constant(5.0)] * 20 learning_rate = tf.Variable(1.0) query = quantile_adaptive_clip_sum_query.QuantileAdaptiveClipSumQuery( initial_l2_norm_clip=1.0, noise_multiplier=0.0, target_unclipped_quantile=0.5, learning_rate=learning_rate, clipped_count_stddev=0.0, expected_num_records=2.0, geometric_update=geometric) global_state = query.initial_global_state() for t in range(50): tf.compat.v1.assign(learning_rate, 1.0 / np.sqrt(t + 1)) _, global_state = test_utils.run_query(query, records, global_state) actual_clip = global_state.l2_norm_clip if t > 40: self.assertNear(actual_clip, 5.0, 0.5)
def test_nested_query_with_noise(self): with self.cached_session() as sess: sum_stddev = 2.71828 denominator = 3.14159 query1 = gaussian_query.GaussianSumQuery(l2_norm_clip=1.5, stddev=sum_stddev) query2 = gaussian_query.GaussianAverageQuery( l2_norm_clip=0.5, sum_stddev=sum_stddev, denominator=denominator) query = nested_query.NestedSumQuery((query1, query2)) record1 = (3.0, [2.0, 1.5]) record2 = (0.0, [-1.0, -3.5]) query_result, _ = test_utils.run_query(query, [record1, record2]) noised_averages = [] for _ in range(1000): noised_averages.append(tf.nest.flatten(sess.run(query_result))) result_stddev = np.std(noised_averages, 0) avg_stddev = sum_stddev / denominator expected_stddev = [sum_stddev, avg_stddev, avg_stddev] self.assertArrayNear(result_stddev, expected_stddev, 0.1)
def test_target_zero(self, exact): record1 = tf.constant(8.5) record2 = tf.constant(7.25) query = _make_quantile_estimator_query( initial_estimate=10.0, target_quantile=0.0, learning_rate=1.0, below_estimate_stddev=0.0, expected_num_records=(None if exact else 2.0), geometric_update=False) global_state = query.initial_global_state() initial_estimate = global_state.current_estimate self.assertAllClose(initial_estimate, 10.0) # On the first two iterations, both records are below, so the estimate goes # down by 1.0 (the learning rate). When the estimate reaches 8.0, only one # record is below, so the estimate goes down by only 0.5. After two more # iterations, both records are below, and the estimate stays there (at 7.0). expected_estimates = [9.0, 8.0, 7.5, 7.0, 7.0] for expected_estimate in expected_estimates: actual_estimate, global_state = test_utils.run_query( query, [record1, record2], global_state) self.assertAllClose(actual_estimate.numpy(), expected_estimate)
def test_ledger(self): record1 = tf.constant([8.5]) record2 = tf.constant([-7.25]) population_size = tf.Variable(0) selection_probability = tf.Variable(1.0) query = quantile_adaptive_clip_sum_query.QuantileAdaptiveClipSumQuery( initial_l2_norm_clip=10.0, noise_multiplier=1.0, target_unclipped_quantile=0.0, learning_rate=1.0, clipped_count_stddev=0.0, expected_num_records=2.0, geometric_update=False) query = privacy_ledger.QueryWithLedger(query, population_size, selection_probability) # First sample. tf.assign(population_size, 10) tf.assign(selection_probability, 0.1) _, global_state = test_utils.run_query(query, [record1, record2]) expected_queries = [[10.0, 10.0], [0.5, 0.0]] formatted = query.ledger.get_formatted_ledger_eager() sample_1 = formatted[0] self.assertAllClose(sample_1.population_size, 10.0) self.assertAllClose(sample_1.selection_probability, 0.1) self.assertAllClose(sample_1.queries, expected_queries) # Second sample. tf.assign(population_size, 20) tf.assign(selection_probability, 0.2) test_utils.run_query(query, [record1, record2], global_state) formatted = query.ledger.get_formatted_ledger_eager() sample_1, sample_2 = formatted self.assertAllClose(sample_1.population_size, 10.0) self.assertAllClose(sample_1.selection_probability, 0.1) self.assertAllClose(sample_1.queries, expected_queries) expected_queries_2 = [[9.0, 9.0], [0.5, 0.0]] self.assertAllClose(sample_2.population_size, 20.0) self.assertAllClose(sample_2.selection_probability, 0.2) self.assertAllClose(sample_2.queries, expected_queries_2)
def test_sum_raise_on_l2_norm_excess(self, l2_norm_bound): with self.cached_session() as sess: record = tf.constant([10, 10], dtype=tf.int32) query = dg_sum_query(l2_norm_bound=l2_norm_bound, stddev=0.0) with self.assertRaises(tf.errors.InvalidArgumentError): query_result, _ = test_utils.run_query(query, [record]) sess.run(query_result)
def test_nested_query(self): population_size = tf.Variable(0) selection_probability = tf.Variable(1.0) query1 = gaussian_query.GaussianAverageQuery(l2_norm_clip=4.0, sum_stddev=2.0, denominator=5.0) query2 = gaussian_query.GaussianAverageQuery(l2_norm_clip=5.0, sum_stddev=1.0, denominator=5.0) query = nested_query.NestedQuery([query1, query2]) query = privacy_ledger.QueryWithLedger(query, population_size, selection_probability) record1 = [1.0, [12.0, 9.0]] record2 = [5.0, [1.0, 2.0]] # First sample. tf.compat.v1.assign(population_size, 10) tf.compat.v1.assign(selection_probability, 0.1) test_utils.run_query(query, [record1, record2]) expected_queries = [[4.0, 2.0], [5.0, 1.0]] formatted = query.ledger.get_formatted_ledger_eager() sample_1 = formatted[0] self.assertAllClose(sample_1.population_size, 10.0) self.assertAllClose(sample_1.selection_probability, 0.1) self.assertAllClose(sorted(sample_1.queries), sorted(expected_queries)) # Second sample. tf.compat.v1.assign(population_size, 20) tf.compat.v1.assign(selection_probability, 0.2) test_utils.run_query(query, [record1, record2]) formatted = query.ledger.get_formatted_ledger_eager() sample_1, sample_2 = formatted self.assertAllClose(sample_1.population_size, 10.0) self.assertAllClose(sample_1.selection_probability, 0.1) self.assertAllClose(sorted(sample_1.queries), sorted(expected_queries)) self.assertAllClose(sample_2.population_size, 20.0) self.assertAllClose(sample_2.selection_probability, 0.2) self.assertAllClose(sorted(sample_2.queries), sorted(expected_queries))
def test_gaussian_sum_no_clip_no_noise(self): with self.cached_session() as sess: record1 = tf.constant([2.0, 0.0]) record2 = tf.constant([-1.0, 1.0]) query = gaussian_query.GaussianSumQuery(l2_norm_clip=10.0, stddev=0.0) query_result, _ = test_utils.run_query(query, [record1, record2]) result = sess.run(query_result) expected = [1.0, 1.0] self.assertAllClose(result, expected)
def test_sum_raise_on_float_inputs(self): with self.cached_session() as sess: record1 = tf.constant([2, 0], dtype=tf.float32) record2 = tf.constant([-1, 1], dtype=tf.float32) query = dg_sum_query(l2_norm_bound=10, stddev=0.0) with self.assertRaises(TypeError): query_result, _ = test_utils.run_query(query, [record1, record2]) sess.run(query_result)
def test_sum_no_noise(self): with self.cached_session() as sess: record1 = tf.constant([2, 0], dtype=tf.int32) record2 = tf.constant([-1, 1], dtype=tf.int32) query = dg_sum_query(l2_norm_bound=10, stddev=0.0) query_result, _ = test_utils.run_query(query, [record1, record2]) result = sess.run(query_result) expected = [1, 1] self.assertAllEqual(result, expected)
def test_gaussian_sum_with_clip_no_noise(self): with self.cached_session() as sess: record1 = tf.constant([-6.0, 8.0]) # Clipped to [-3.0, 4.0]. record2 = tf.constant([4.0, -3.0]) # Not clipped. query = gaussian_query.GaussianSumQuery(l2_norm_clip=5.0, stddev=0.0) query_result, _ = test_utils.run_query(query, [record1, record2]) result = sess.run(query_result) expected = [1.0, 1.0] self.assertAllClose(result, expected)
def test_no_privacy_average(self): with self.cached_session() as sess: record1 = tf.constant([5.0, 0.0]) record2 = tf.constant([-1.0, 2.0]) query = no_privacy_query.NoPrivacyAverageQuery() query_result, _ = test_utils.run_query(query, [record1, record2]) result = sess.run(query_result) expected = [2.0, 1.0] self.assertAllClose(result, expected)
def test_sum_float_norm_not_rounded(self): """Test that the float L2 norm bound doesn't get rounded/casted to integers.""" with self.cached_session() as sess: # A casted/rounded norm bound would be insufficient. l2_norm_bound = 14.2 record = tf.constant([10, 10], dtype=tf.int32) query = dg_sum_query(l2_norm_bound=l2_norm_bound, stddev=0.0) query_result, _ = test_utils.run_query(query, [record]) result = sess.run(query_result) expected = [10, 10] self.assertAllEqual(result, expected)