def test_dp_sum_structure_complex(self): query = tensorflow_privacy.GaussianSumQuery(5.0, 0.0) def datapoint(a, b, c): return collections.OrderedDict(a=(a, ), bc=([b], (c, ))) data = [ datapoint(1.0, 2.0, 1.0), datapoint(2.0, 3.0, 1.0), datapoint(6.0, 8.0, 0.0), # Clipped to 3.0, 4.0, 0.0 ] value_type = type_conversions.type_from_tensors(data[0]) dp_aggregate_process = differential_privacy.build_dp_aggregate_process( value_type, query) global_state = dp_aggregate_process.initialize() output = dp_aggregate_process.next(global_state, data, [1.0, 1.0, 1.0]) self.assertEqual(output.state.l2_norm_clip, 5.0) self.assertEqual(output.state.stddev, 0.0) self.assertEqual(output.result['a'][0], 6.0) self.assertEqual(output.result['bc'][0][0], 9.0) self.assertEqual(output.result['bc'][1][0], 2.0)
def gaussian_fixed(cls, noise_multiplier: float, clients_per_round: float, clip: float) -> factory.UnweightedAggregationFactory: """`DifferentiallyPrivateFactory` with fixed clipping and Gaussian noise. Performs fixed clipping and addition of Gaussian noise for differentially private learning. For details of the DP algorithm see McMahan et. al (2017) https://arxiv.org/abs/1710.06963. Args: noise_multiplier: A float specifying the noise multiplier for the Gaussian mechanism for model updates. A value of 1.0 or higher may be needed for strong privacy. See above mentioned paper to compute (epsilon, delta) privacy guarantee. clients_per_round: A float specifying the expected number of clients per round. Must be positive. clip: The value of the clipping norm. Returns: A `DifferentiallyPrivateFactory` with fixed clipping and Gaussian noise. """ if isinstance(clients_per_round, int): clients_per_round = float(clients_per_round) _check_float_nonnegative(noise_multiplier, 'noise_multiplier') _check_float_positive(clients_per_round, 'clients_per_round') _check_float_positive(clip, 'clip') query = tfp.NormalizedQuery(tfp.GaussianSumQuery(l2_norm_clip=clip, stddev=clip * noise_multiplier), denominator=clients_per_round) return cls(query)
def test_process_type_signature(self, value_template): query = tensorflow_privacy.GaussianSumQuery(4.0, 0.0) value_type = type_conversions.type_from_tensors(value_template) dp_aggregate_process = differential_privacy.build_dp_aggregate_process( value_type, query) server_state_type = computation_types.FederatedType( computation_types.NamedTupleType([('l2_norm_clip', tf.float32), ('stddev', tf.float32)]), placements.SERVER) self.assertEqual( dp_aggregate_process.initialize.type_signature, computation_types.FunctionType( parameter=None, result=server_state_type)) client_value_type = computation_types.FederatedType(value_type, placements.CLIENTS) client_value_weight_type = computation_types.FederatedType( tf.float32, placements.CLIENTS) server_result_type = computation_types.FederatedType( value_type, placements.SERVER) server_metrics_type = computation_types.FederatedType((), placements.SERVER) self.assertEqual( dp_aggregate_process.next.type_signature, computation_types.FunctionType( parameter=computation_types.NamedTupleType([ (None, server_state_type), (None, client_value_type), (None, client_value_weight_type) ]), result=computation_types.NamedTupleType([ ('state', server_state_type), ('result', server_result_type), ('measurements', server_metrics_type) ])))
def test_process_type_signature(self, value_template): query = tensorflow_privacy.GaussianSumQuery(4.0, 0.0) value_type = type_conversions.type_from_tensors(value_template) dp_aggregate_process = differential_privacy.build_dp_aggregate_process( value_type, query) global_state = query.initial_global_state() server_state_type = computation_types.FederatedType( type_conversions.type_from_tensors(global_state), placements.SERVER) self.assertEqual( dp_aggregate_process.initialize.type_signature, computation_types.FunctionType( parameter=None, result=server_state_type)) metrics_type = type_conversions.type_from_tensors( query.derive_metrics(global_state)) client_value_type = computation_types.FederatedType(value_type, placements.CLIENTS) client_value_weight_type = computation_types.FederatedType( tf.float32, placements.CLIENTS) server_result_type = computation_types.FederatedType( value_type, placements.SERVER) server_metrics_type = computation_types.FederatedType( metrics_type, placements.SERVER) self.assertEqual( dp_aggregate_process.next.type_signature, computation_types.FunctionType( parameter=(server_state_type, client_value_type, client_value_weight_type), result=collections.OrderedDict( state=server_state_type, result=server_result_type, measurements=server_metrics_type)))
def test_execution_with_custom_dp_query(self): client_data = create_emnist_client_data() train_data = [client_data(), client_data()] def loss_fn(): return tf.keras.losses.SparseCategoricalCrossentropy() def metrics_fn(): return [ NumExamplesCounter(), NumBatchesCounter(), tf.keras.metrics.SparseCategoricalAccuracy() ] # No values should be changed, but working with inf directly zeroes out all # updates. Preferring very large value, but one that can be handled in # multiplication/division gaussian_sum_query = tfp.GaussianSumQuery(l2_norm_clip=1e10, stddev=0) dp_sum_factory = tff.aggregators.DifferentiallyPrivateFactory( query=gaussian_sum_query, record_aggregation_factory=tff.aggregators.SumFactory()) dp_mean_factory = _DPMean(dp_sum_factory) # Disable reconstruction via 0 learning rate to ensure post-recon loss # matches exact expectations round 0 and decreases by the next round. trainer = training_process.build_federated_reconstruction_process( MnistModel, loss_fn=loss_fn, metrics_fn=metrics_fn, server_optimizer_fn=functools.partial(tf.keras.optimizers.SGD, 0.01), client_optimizer_fn=functools.partial(tf.keras.optimizers.SGD, 0.001), reconstruction_optimizer_fn=functools.partial(tf.keras.optimizers.SGD, 0.0), aggregation_factory=dp_mean_factory, ) state = trainer.initialize() outputs = [] states = [] for _ in range(2): state, output = trainer.next(state, train_data) outputs.append(output) states.append(state) # All weights and biases are initialized to 0, so initial logits are all 0 # and softmax probabilities are uniform over 10 classes. So negative log # likelihood is -ln(1/10). This is on expectation, so increase tolerance. self.assertAllClose(outputs[0]['loss'], tf.math.log(10.0), rtol=1e-4) self.assertLess(outputs[1]['loss'], outputs[0]['loss']) self.assertNotAllClose(states[0].model.trainable, states[1].model.trainable) # Expect 6 reconstruction examples, 6 training examples. Only training # included in metrics. self.assertEqual(outputs[0]['num_examples_total'], 6.0) self.assertEqual(outputs[1]['num_examples_total'], 6.0) # Expect 4 reconstruction batches and 4 training batches. Only training # included in metrics. self.assertEqual(outputs[0]['num_batches_total'], 4.0) self.assertEqual(outputs[1]['num_batches_total'], 4.0)
def test_dp_sum_structure_list(self): query = tensorflow_privacy.GaussianSumQuery(5.0, 0.0) def _value_type_fn(value): del value return [ computation_types.TensorType(tf.float32), computation_types.TensorType(tf.float32), ] dp_aggregate_fn, _ = differential_privacy.build_dp_aggregate( query, value_type_fn=_value_type_fn) def datapoint(a, b): return [tf.Variable(a, name='a'), tf.Variable(b, name='b')] data = [ datapoint(1.0, 2.0), datapoint(2.0, 3.0), datapoint(6.0, 8.0), # Clipped to 3.0, 4.0 ] initialize, aggregate = wrap_aggregate_fn(dp_aggregate_fn, data[0]) global_state = initialize() global_state, result = aggregate(global_state, data) self.assertEqual(global_state.l2_norm_clip, 5.0) self.assertEqual(global_state.stddev, 0.0) result = list(result) self.assertEqual(result[0], 6.0) self.assertEqual(result[1], 9.0)
def test_bad_query(self): non_quantile_estimator_query = tfp.GaussianSumQuery( l2_norm_clip=1.0, stddev=1.0) with self.assertRaises(TypeError): quantile_estimation.PrivateQuantileEstimationProcess( non_quantile_estimator_query)
def test_dp_global_state_type(self): query = tensorflow_privacy.GaussianSumQuery(5.0, 0.0) _, dp_global_state_type = differential_privacy.build_dp_aggregate(query) self.assertEqual(dp_global_state_type.__class__.__name__, 'NamedTupleTypeWithPyContainerType')
def test_iterative_process_fails_with_dp_agg_and_none_client_weighting(self): def loss_fn(): return tf.keras.losses.SparseCategoricalCrossentropy() def metrics_fn(): return [ NumExamplesCounter(), NumBatchesCounter(), tf.keras.metrics.SparseCategoricalAccuracy() ] # No values should be changed, but working with inf directly zeroes out all # updates. Preferring very large value, but one that can be handled in # multiplication/division gaussian_sum_query = tfp.GaussianSumQuery(l2_norm_clip=1e10, stddev=0) dp_sum_factory = differential_privacy.DifferentiallyPrivateFactory( query=gaussian_sum_query, record_aggregation_factory=sum_factory.SumFactory()) dp_mean_factory = _DPMean(dp_sum_factory) with self.assertRaisesRegex(ValueError, 'unweighted aggregator'): training_process.build_training_process( MnistModel, loss_fn=loss_fn, metrics_fn=metrics_fn, server_optimizer_fn=_get_keras_optimizer_fn(0.01), client_optimizer_fn=_get_keras_optimizer_fn(0.001), reconstruction_optimizer_fn=_get_keras_optimizer_fn(0.0), aggregation_factory=dp_mean_factory, client_weighting=None, dataset_split_fn=reconstruction_utils.simple_dataset_split_fn)
def test_dp_sum_structure_nested_odict(self): query = tensorflow_privacy.GaussianSumQuery(5.0, 0.0) def datapoint(a, b, c): return collections.OrderedDict([('a', (a,)), ('bc', collections.OrderedDict([('b', [b]), ('c', (c,))]))]) data = [ datapoint(1.0, 2.0, 1.0), datapoint(2.0, 3.0, 1.0), datapoint(6.0, 8.0, 0.0), # Clipped to 3.0, 4.0, 0.0 ] value_type = type_conversions.type_from_tensors(data[0]) dp_aggregate_process = differential_privacy.build_dp_aggregate_process( value_type, query) global_state = dp_aggregate_process.initialize() output = dp_aggregate_process.next(global_state, data, [1.0, 1.0, 1.0]) self.assertEqual(output['state']['l2_norm_clip'], 5.0) self.assertEqual(output['state']['stddev'], 0.0) self.assertEqual(output['result']['a'][0], 6.0) self.assertEqual(output['result']['bc']['b'][0], 9.0) self.assertEqual(output['result']['bc']['c'][0], 2.0)
def test_dp_sum_structure_list(self): query = tensorflow_privacy.GaussianSumQuery(5.0, 0.0) def datapoint(a, b): return [tf.Variable(a, name='a'), tf.Variable(b, name='b')] data = [ datapoint(1.0, 2.0), datapoint(2.0, 3.0), datapoint(6.0, 8.0), # Clipped to 3.0, 4.0 ] value_type = type_conversions.type_from_tensors(data[0]) dp_aggregate_process = differential_privacy.build_dp_aggregate_process( value_type, query) global_state = dp_aggregate_process.initialize() output = dp_aggregate_process.next(global_state, data, [1.0, 1.0, 1.0]) self.assertEqual(output.state.l2_norm_clip, 5.0) self.assertEqual(output.state.stddev, 0.0) result = list(output.result) self.assertEqual(result[0], 6.0) self.assertEqual(result[1], 9.0)
def test_dp_global_state_type(self): query = tensorflow_privacy.GaussianSumQuery(5.0, 0.0) _, dp_global_state_type = differential_privacy.build_dp_aggregate(query) self.assertIsInstance(dp_global_state_type, computation_types.StructWithPythonType)
def test_dp_sum(self): query = tensorflow_privacy.GaussianSumQuery(4.0, 0.0) dp_aggregate_fn, _ = differential_privacy.build_dp_aggregate(query) initialize, aggregate = wrap_aggregate_fn(dp_aggregate_fn, 0.0) global_state = initialize() global_state, result = aggregate(global_state, [1.0, 3.0, 5.0]) self.assertEqual(global_state.l2_norm_clip, 4.0) self.assertEqual(global_state.stddev, 0.0) self.assertEqual(result, 8.0)
def test_dp_sum(self): query = tensorflow_privacy.GaussianSumQuery(4.0, 0.0) value_type = type_conversions.type_from_tensors(0.0) dp_aggregate_process = differential_privacy.build_dp_aggregate_process( value_type, query) global_state = dp_aggregate_process.initialize() output = dp_aggregate_process.next(global_state, [1.0, 3.0, 5.0], [1.0, 1.0, 1.0]) self.assertEqual(output.state.l2_norm_clip, 4.0) self.assertEqual(output.state.stddev, 0.0) self.assertEqual(output.result, 8.0)
def test_iterative_process_builds_with_dp_agg_and_client_weight_fn(self): def loss_fn(): return tf.keras.losses.SparseCategoricalCrossentropy() def metrics_fn(): return [ NumExamplesCounter(), NumBatchesCounter(), tf.keras.metrics.SparseCategoricalAccuracy() ] # No values should be changed, but working with inf directly zeroes out all # updates. Preferring very large value, but one that can be handled in # multiplication/division gaussian_sum_query = tfp.GaussianSumQuery(l2_norm_clip=1e10, stddev=0) dp_sum_factory = tff.aggregators.DifferentiallyPrivateFactory( query=gaussian_sum_query, record_aggregation_factory=tff.aggregators.SumFactory()) dp_mean_factory = _DPMean(dp_sum_factory) def client_weight_fn(local_outputs): del local_outputs # Unused return 1.0 # Ensure this builds, as some builders raise if an unweighted aggregation is # specified with a client_weight_fn. trainer = training_process.build_federated_reconstruction_process( MnistModel, loss_fn=loss_fn, metrics_fn=metrics_fn, server_optimizer_fn=functools.partial(tf.keras.optimizers.SGD, 0.01), client_optimizer_fn=functools.partial(tf.keras.optimizers.SGD, 0.001), reconstruction_optimizer_fn=functools.partial( tf.keras.optimizers.SGD, 0.0), aggregation_factory=dp_mean_factory, client_weight_fn=client_weight_fn, ) self.assertIsInstance(trainer, tff.templates.IterativeProcess)
def get_dp_query(mechanism, l2_norm_bound, noise_scale): """Factory for DPQuery instances. Args: mechanism: The mechanism name string. l2_norm_bound: The L2 norm bound to be checked by the DPQueries. Note that for discrete queries, these are the bounds after scaling. noise_scale: The noise scale (stddev) for the mechanism. Note that for central queries, this is the central stddev; for distributed queries, this is the local stddev for each client. Returns: A DPQuery object. """ mechanism = mechanism.lower() if mechanism == 'gauss': return tfp.GaussianSumQuery(l2_norm_clip=l2_norm_bound, stddev=noise_scale) elif mechanism == 'distributed_dgauss': return distributed_discrete_gaussian_query.DistributedDiscreteGaussianSumQuery( l2_norm_bound=l2_norm_bound, local_scale=noise_scale) else: raise ValueError(f'Not yet implemented: {mechanism}')
def test_dp_sum_structure_odict(self): query = tensorflow_privacy.GaussianSumQuery(5.0, 0.0) dp_aggregate_fn, _ = differential_privacy.build_dp_aggregate(query) def datapoint(a, b): return collections.OrderedDict([('a', (a,)), ('b', [b])]) data = [ datapoint(1.0, 2.0), datapoint(2.0, 3.0), datapoint(6.0, 8.0), # Clipped to 3.0, 4.0 ] initialize, aggregate = wrap_aggregate_fn(dp_aggregate_fn, data[0]) global_state = initialize() global_state, result = aggregate(global_state, data) self.assertEqual(global_state.l2_norm_clip, 5.0) self.assertEqual(global_state.stddev, 0.0) self.assertEqual(result['a'][0], 6.0) self.assertEqual(result['b'][0], 9.0)
import numpy as np import tensorflow as tf import tensorflow_privacy as tfp from tensorflow_federated.python.aggregators import differential_privacy from tensorflow_federated.python.aggregators import factory from tensorflow_federated.python.aggregators import test_utils from tensorflow_federated.python.core.api import computation_types from tensorflow_federated.python.core.api import placements from tensorflow_federated.python.core.api import test_case from tensorflow_federated.python.core.backends.native import execution_contexts from tensorflow_federated.python.core.impl.types import type_conversions from tensorflow_federated.python.core.templates import aggregation_process from tensorflow_federated.python.core.templates import measured_process _test_dp_query = tfp.GaussianSumQuery(l2_norm_clip=1.0, stddev=0.0) _test_struct_type = [(tf.float32, (2,)), tf.float32] _test_inner_agg_factory = test_utils.SumPlusOneFactory() class DPFactoryComputationTest(test_case.TestCase, parameterized.TestCase): @parameterized.named_parameters( ('float_simple', tf.float32, None), ('struct_simple', _test_struct_type, None), ('float_inner', tf.float32, _test_inner_agg_factory), ('struct_inner', _test_struct_type, _test_inner_agg_factory)) def test_type_properties(self, value_type, inner_agg_factory): factory_ = differential_privacy.DifferentiallyPrivateFactory( _test_dp_query, inner_agg_factory)