def test_adaptation_linspace(self): # 100 records equally spaced from 0 to 10 in 0.1 increments. # Test that with a decaying learning rate we converge to the correct # median with error at most 0.1. records = [tf.constant(x) for x in np.linspace( 0.0, 10.0, num=21, dtype=np.float32)] learning_rate = tf.Variable(1.0) query = quantile_adaptive_clip_sum_query.QuantileAdaptiveClipSumQuery( initial_l2_norm_clip=0.0, noise_multiplier=0.0, target_unclipped_quantile=0.5, learning_rate=learning_rate, clipped_count_stddev=0.0, expected_num_records=2.0) global_state = query.initial_global_state() for t in range(50): tf.assign(learning_rate, 1.0 / np.sqrt(t+1)) _, global_state = test_utils.run_query(query, records, global_state) actual_clip = global_state.l2_norm_clip if t > 40: self.assertNear(actual_clip, 5.0, 0.25)
def test_adaptation_all_equal(self): # 100 equal records. Test that with a decaying learning rate we converge to # that record and bounce around it. records = [tf.constant(5.0)] * 20 learning_rate = tf.Variable(1.0) query = quantile_adaptive_clip_sum_query.QuantileAdaptiveClipSumQuery( initial_l2_norm_clip=0.0, noise_multiplier=0.0, target_unclipped_quantile=0.5, learning_rate=learning_rate, clipped_count_stddev=0.0, expected_num_records=2.0) global_state = query.initial_global_state() for t in range(50): tf.assign(learning_rate, 1.0 / np.sqrt(t+1)) _, global_state = test_utils.run_query(query, records, global_state) actual_clip = global_state.l2_norm_clip if t > 40: self.assertNear(actual_clip, 5.0, 0.25)
def test_adaptation_target_one(self): record1 = tf.constant([-1.5]) record2 = tf.constant([2.75]) query = quantile_adaptive_clip_sum_query.QuantileAdaptiveClipSumQuery( initial_l2_norm_clip=0.0, noise_multiplier=0.0, target_unclipped_quantile=1.0, learning_rate=1.0, clipped_count_stddev=0.0, expected_num_records=2.0) global_state = query.initial_global_state() initial_clip = global_state.l2_norm_clip self.assertAllClose(initial_clip, 0.0) # On the first two iterations, both are clipped, so the clip goes up # by 1.0 (the learning rate). When the clip reaches 2.0, only one record is # clipped, so the clip goes up by only 0.5. After two more iterations, # both records are clipped, and the clip norm stays there (at 3.0). expected_sums = [0.0, 0.0, 0.5, 1.0, 1.25] expected_clips = [1.0, 2.0, 2.5, 3.0, 3.0] for expected_sum, expected_clip in zip(expected_sums, expected_clips): actual_sum, global_state = test_utils.run_query( query, [record1, record2], global_state) actual_clip = global_state.l2_norm_clip self.assertAllClose(actual_clip.numpy(), expected_clip) self.assertAllClose(actual_sum.numpy(), (expected_sum,))
def test_ledger(self): record1 = tf.constant([8.5]) record2 = tf.constant([-7.25]) population_size = tf.Variable(0) selection_probability = tf.Variable(0.0) ledger = privacy_ledger.PrivacyLedger( population_size, selection_probability, 50, 50) query = quantile_adaptive_clip_sum_query.QuantileAdaptiveClipSumQuery( initial_l2_norm_clip=10.0, noise_multiplier=1.0, target_unclipped_quantile=0.0, learning_rate=1.0, clipped_count_stddev=0.0, expected_num_records=2.0, ledger=ledger) query = privacy_ledger.QueryWithLedger(query, ledger) # First sample. tf.assign(population_size, 10) tf.assign(selection_probability, 0.1) _, global_state = test_utils.run_query(query, [record1, record2]) expected_queries = [[10.0, 10.0], [0.5, 0.0]] formatted = ledger.get_formatted_ledger_eager() sample_1 = formatted[0] self.assertAllClose(sample_1.population_size, 10.0) self.assertAllClose(sample_1.selection_probability, 0.1) self.assertAllClose(sample_1.queries, expected_queries) # Second sample. tf.assign(population_size, 20) tf.assign(selection_probability, 0.2) test_utils.run_query(query, [record1, record2], global_state) formatted = ledger.get_formatted_ledger_eager() sample_1, sample_2 = formatted self.assertAllClose(sample_1.population_size, 10.0) self.assertAllClose(sample_1.selection_probability, 0.1) self.assertAllClose(sample_1.queries, expected_queries) expected_queries_2 = [[9.0, 9.0], [0.5, 0.0]] self.assertAllClose(sample_2.population_size, 20.0) self.assertAllClose(sample_2.selection_probability, 0.2) self.assertAllClose(sample_2.queries, expected_queries_2)
def test_no_privacy_average(self): with self.cached_session() as sess: record1 = tf.constant([5.0, 0.0]) record2 = tf.constant([-1.0, 2.0]) query = no_privacy_query.NoPrivacyAverageQuery() query_result, _ = test_utils.run_query(query, [record1, record2]) result = sess.run(query_result) expected = [2.0, 1.0] self.assertAllClose(result, expected)
def test_nested_query(self): population_size = tf.Variable(0) selection_probability = tf.Variable(0.0) ledger = privacy_ledger.PrivacyLedger( population_size, selection_probability, 50, 50) query1 = gaussian_query.GaussianAverageQuery( l2_norm_clip=4.0, sum_stddev=2.0, denominator=5.0, ledger=ledger) query2 = gaussian_query.GaussianAverageQuery( l2_norm_clip=5.0, sum_stddev=1.0, denominator=5.0, ledger=ledger) query = nested_query.NestedQuery([query1, query2]) query = privacy_ledger.QueryWithLedger(query, ledger) record1 = [1.0, [12.0, 9.0]] record2 = [5.0, [1.0, 2.0]] # First sample. tf.assign(population_size, 10) tf.assign(selection_probability, 0.1) test_utils.run_query(query, [record1, record2]) expected_queries = [[4.0, 2.0], [5.0, 1.0]] formatted = ledger.get_formatted_ledger_eager() sample_1 = formatted[0] self.assertAllClose(sample_1.population_size, 10.0) self.assertAllClose(sample_1.selection_probability, 0.1) self.assertAllClose(sorted(sample_1.queries), sorted(expected_queries)) # Second sample. tf.assign(population_size, 20) tf.assign(selection_probability, 0.2) test_utils.run_query(query, [record1, record2]) formatted = ledger.get_formatted_ledger_eager() sample_1, sample_2 = formatted self.assertAllClose(sample_1.population_size, 10.0) self.assertAllClose(sample_1.selection_probability, 0.1) self.assertAllClose(sorted(sample_1.queries), sorted(expected_queries)) self.assertAllClose(sample_2.population_size, 20.0) self.assertAllClose(sample_2.selection_probability, 0.2) self.assertAllClose(sorted(sample_2.queries), sorted(expected_queries))
def test_gaussian_sum_with_clip_no_noise(self): with self.cached_session() as sess: record1 = tf.constant([-6.0, 8.0]) # Clipped to [-3.0, 4.0]. record2 = tf.constant([4.0, -3.0]) # Not clipped. query = gaussian_query.GaussianSumQuery(l2_norm_clip=5.0, stddev=0.0) query_result, _ = test_utils.run_query(query, [record1, record2]) result = sess.run(query_result) expected = [1.0, 1.0] self.assertAllClose(result, expected)
def test_gaussian_sum_no_clip_no_noise(self): with self.cached_session() as sess: record1 = tf.constant([2.0, 0.0]) record2 = tf.constant([-1.0, 1.0]) query = gaussian_query.GaussianSumQuery(l2_norm_clip=10.0, stddev=0.0) query_result, _ = test_utils.run_query(query, [record1, record2]) result = sess.run(query_result) expected = [1.0, 1.0] self.assertAllClose(result, expected)
def test_gaussian_average_no_noise(self): with self.cached_session() as sess: record1 = tf.constant([5.0, 0.0]) # Clipped to [3.0, 0.0]. record2 = tf.constant([-1.0, 2.0]) # Not clipped. query = gaussian_query.GaussianAverageQuery( l2_norm_clip=3.0, sum_stddev=0.0, denominator=2.0) query_result, _ = test_utils.run_query(query, [record1, record2]) result = sess.run(query_result) expected_average = [1.0, 1.0] self.assertAllClose(result, expected_average)
def test_sum_query(self): record1 = tf.constant([2.0, 0.0]) record2 = tf.constant([-1.0, 1.0]) population_size = tf.Variable(0) selection_probability = tf.Variable(0.0) ledger = privacy_ledger.PrivacyLedger( population_size, selection_probability, 50, 50) query = gaussian_query.GaussianSumQuery( l2_norm_clip=10.0, stddev=0.0, ledger=ledger) query = privacy_ledger.QueryWithLedger(query, ledger) # First sample. tf.assign(population_size, 10) tf.assign(selection_probability, 0.1) test_utils.run_query(query, [record1, record2]) expected_queries = [[10.0, 0.0]] formatted = ledger.get_formatted_ledger_eager() sample_1 = formatted[0] self.assertAllClose(sample_1.population_size, 10.0) self.assertAllClose(sample_1.selection_probability, 0.1) self.assertAllClose(sample_1.queries, expected_queries) # Second sample. tf.assign(population_size, 20) tf.assign(selection_probability, 0.2) test_utils.run_query(query, [record1, record2]) formatted = ledger.get_formatted_ledger_eager() sample_1, sample_2 = formatted self.assertAllClose(sample_1.population_size, 10.0) self.assertAllClose(sample_1.selection_probability, 0.1) self.assertAllClose(sample_1.queries, expected_queries) self.assertAllClose(sample_2.population_size, 20.0) self.assertAllClose(sample_2.selection_probability, 0.2) self.assertAllClose(sample_2.queries, expected_queries)
def test_no_privacy_weighted_average(self): with self.cached_session() as sess: record1 = tf.constant([4.0, 0.0]) record2 = tf.constant([-1.0, 1.0]) weights = [1, 3] query = no_privacy_query.NoPrivacyAverageQuery() query_result, _ = test_utils.run_query( query, [record1, record2], weights=weights) result = sess.run(query_result) expected = [0.25, 0.75] self.assertAllClose(result, expected)
def test_normalization(self): with self.cached_session() as sess: record1 = tf.constant([-6.0, 8.0]) # Clipped to [-3.0, 4.0]. record2 = tf.constant([4.0, -3.0]) # Not clipped. sum_query = gaussian_query.GaussianSumQuery(l2_norm_clip=5.0, stddev=0.0) query = normalized_query.NormalizedQuery(numerator_query=sum_query, denominator=2.0) query_result, _ = test_utils.run_query(query, [record1, record2]) result = sess.run(query_result) expected = [0.5, 0.5] self.assertAllClose(result, expected)
def test_gaussian_sum_with_noise(self): with self.cached_session() as sess: record1, record2 = 2.71828, 3.14159 stddev = 1.0 query = gaussian_query.GaussianSumQuery(l2_norm_clip=5.0, stddev=stddev) query_result, _ = test_utils.run_query(query, [record1, record2]) noised_sums = [] for _ in xrange(1000): noised_sums.append(sess.run(query_result)) result_stddev = np.std(noised_sums) self.assertNear(result_stddev, stddev, 0.1)
def test_sum_no_clip_no_noise(self): record1 = tf.constant([2.0, 0.0]) record2 = tf.constant([-1.0, 1.0]) query = quantile_adaptive_clip_sum_query.QuantileAdaptiveClipSumQuery( initial_l2_norm_clip=10.0, noise_multiplier=0.0, target_unclipped_quantile=1.0, learning_rate=0.0, clipped_count_stddev=0.0, expected_num_records=2.0) query_result, _ = test_utils.run_query(query, [record1, record2]) result = query_result.numpy() expected = [1.0, 1.0] self.assertAllClose(result, expected)
def test_nested_gaussian_sum_no_clip_no_noise(self): with self.cached_session() as sess: query1 = gaussian_query.GaussianSumQuery( l2_norm_clip=10.0, stddev=0.0) query2 = gaussian_query.GaussianSumQuery( l2_norm_clip=10.0, stddev=0.0) query = nested_query.NestedQuery([query1, query2]) record1 = [1.0, [2.0, 3.0]] record2 = [4.0, [3.0, 2.0]] query_result, _ = test_utils.run_query(query, [record1, record2]) result = sess.run(query_result) expected = [5.0, [5.0, 5.0]] self.assertAllClose(result, expected)
def test_nested_gaussian_average_with_clip_no_noise(self): with self.cached_session() as sess: query1 = gaussian_query.GaussianAverageQuery( l2_norm_clip=4.0, sum_stddev=0.0, denominator=5.0) query2 = gaussian_query.GaussianAverageQuery( l2_norm_clip=5.0, sum_stddev=0.0, denominator=5.0) query = nested_query.NestedQuery([query1, query2]) record1 = [1.0, [12.0, 9.0]] # Clipped to [1.0, [4.0, 3.0]] record2 = [5.0, [1.0, 2.0]] # Clipped to [4.0, [1.0, 2.0]] query_result, _ = test_utils.run_query(query, [record1, record2]) result = sess.run(query_result) expected = [1.0, [1.0, 1.0]] self.assertAllClose(result, expected)
def test_average_no_noise(self): record1 = tf.constant([5.0, 0.0]) # Clipped to [3.0, 0.0]. record2 = tf.constant([-1.0, 2.0]) # Not clipped. query = quantile_adaptive_clip_sum_query.QuantileAdaptiveClipAverageQuery( initial_l2_norm_clip=3.0, noise_multiplier=0.0, denominator=2.0, target_unclipped_quantile=1.0, learning_rate=0.0, clipped_count_stddev=0.0, expected_num_records=2.0) query_result, _ = test_utils.run_query(query, [record1, record2]) result = query_result.numpy() expected_average = [1.0, 1.0] self.assertAllClose(result, expected_average)
def test_gaussian_average_with_noise(self): with self.cached_session() as sess: record1, record2 = 2.71828, 3.14159 sum_stddev = 1.0 denominator = 2.0 query = gaussian_query.GaussianAverageQuery( l2_norm_clip=5.0, sum_stddev=sum_stddev, denominator=denominator) query_result, _ = test_utils.run_query(query, [record1, record2]) noised_averages = [] for _ in range(1000): noised_averages.append(sess.run(query_result)) result_stddev = np.std(noised_averages) avg_stddev = sum_stddev / denominator self.assertNear(result_stddev, avg_stddev, 0.1)
def test_complex_nested_query(self): with self.cached_session() as sess: query_ab = gaussian_query.GaussianSumQuery( l2_norm_clip=1.0, stddev=0.0) query_c = gaussian_query.GaussianAverageQuery( l2_norm_clip=10.0, sum_stddev=0.0, denominator=2.0) query_d = gaussian_query.GaussianSumQuery( l2_norm_clip=10.0, stddev=0.0) query = nested_query.NestedQuery( [query_ab, {'c': query_c, 'd': [query_d]}]) record1 = [{'a': 0.0, 'b': 2.71828}, {'c': (-4.0, 6.0), 'd': [-4.0]}] record2 = [{'a': 3.14159, 'b': 0.0}, {'c': (6.0, -4.0), 'd': [5.0]}] query_result, _ = test_utils.run_query(query, [record1, record2]) result = sess.run(query_result) expected = [{'a': 1.0, 'b': 1.0}, {'c': (1.0, 1.0), 'd': [1.0]}] self.assertAllClose(result, expected)
def test_sum_with_noise(self): record1, record2 = 2.71828, 3.14159 stddev = 1.0 clip = 5.0 query = quantile_adaptive_clip_sum_query.QuantileAdaptiveClipSumQuery( initial_l2_norm_clip=clip, noise_multiplier=stddev / clip, target_unclipped_quantile=1.0, learning_rate=0.0, clipped_count_stddev=0.0, expected_num_records=2.0) noised_sums = [] for _ in xrange(1000): query_result, _ = test_utils.run_query(query, [record1, record2]) noised_sums.append(query_result.numpy()) result_stddev = np.std(noised_sums) self.assertNear(result_stddev, stddev, 0.1)
def test_gaussian_sum_with_changing_clip_no_noise(self): with self.cached_session() as sess: record1 = tf.constant([-6.0, 8.0]) # Clipped to [-3.0, 4.0]. record2 = tf.constant([4.0, -3.0]) # Not clipped. l2_norm_clip = tf.Variable(5.0) l2_norm_clip_placeholder = tf.placeholder(tf.float32) assign_l2_norm_clip = tf.assign(l2_norm_clip, l2_norm_clip_placeholder) query = gaussian_query.GaussianSumQuery( l2_norm_clip=l2_norm_clip, stddev=0.0) query_result, _ = test_utils.run_query(query, [record1, record2]) self.evaluate(tf.global_variables_initializer()) result = sess.run(query_result) expected = [1.0, 1.0] self.assertAllClose(result, expected) sess.run(assign_l2_norm_clip, {l2_norm_clip_placeholder: 0.0}) result = sess.run(query_result) expected = [0.0, 0.0] self.assertAllClose(result, expected)
def test_average_with_noise(self): record1, record2 = 2.71828, 3.14159 sum_stddev = 1.0 denominator = 2.0 clip = 3.0 query = quantile_adaptive_clip_sum_query.QuantileAdaptiveClipAverageQuery( initial_l2_norm_clip=clip, noise_multiplier=sum_stddev / clip, denominator=denominator, target_unclipped_quantile=1.0, learning_rate=0.0, clipped_count_stddev=0.0, expected_num_records=2.0) noised_averages = [] for _ in range(1000): query_result, _ = test_utils.run_query(query, [record1, record2]) noised_averages.append(query_result.numpy()) result_stddev = np.std(noised_averages) avg_stddev = sum_stddev / denominator self.assertNear(result_stddev, avg_stddev, 0.1)
def test_nested_query_with_noise(self): with self.cached_session() as sess: sum_stddev = 2.71828 denominator = 3.14159 query1 = gaussian_query.GaussianSumQuery( l2_norm_clip=1.5, stddev=sum_stddev) query2 = gaussian_query.GaussianAverageQuery( l2_norm_clip=0.5, sum_stddev=sum_stddev, denominator=denominator) query = nested_query.NestedQuery((query1, query2)) record1 = (3.0, [2.0, 1.5]) record2 = (0.0, [-1.0, -3.5]) query_result, _ = test_utils.run_query(query, [record1, record2]) noised_averages = [] for _ in range(1000): noised_averages.append(nest.flatten(sess.run(query_result))) result_stddev = np.std(noised_averages, 0) avg_stddev = sum_stddev / denominator expected_stddev = [sum_stddev, avg_stddev, avg_stddev] self.assertArrayNear(result_stddev, expected_stddev, 0.1)
def test_incompatible_records(self, record1, record2, error_type): query = gaussian_query.GaussianSumQuery(1.0, 0.0) with self.assertRaises(error_type): test_utils.run_query(query, [record1, record2])
def test_record_incompatible_with_query( self, queries, record, error_type): with self.assertRaises(error_type): test_utils.run_query(nested_query.NestedQuery(queries), [record])
def test_incompatible_records(self, record1, record2, error_type): query = no_privacy_query.NoPrivacySumQuery() with self.assertRaises(error_type): test_utils.run_query(query, [record1, record2])