def test_adaptive_query(self): query = tfp.QuantileAdaptiveClipSumQuery(initial_l2_norm_clip=1.0, noise_multiplier=0.0, target_unclipped_quantile=1.0, learning_rate=1.0, clipped_count_stddev=0.0, expected_num_records=3.0, geometric_update=False) factory_ = differential_privacy.DifferentiallyPrivateFactory(query) value_type = computation_types.to_type(tf.float32) process = factory_.create(value_type) state = process.initialize() client_data = [0.5, 1.5, 2.0] # Two clipped on first round. expected_result = 0.5 + 1.0 + 1.0 output = process.next(state, client_data) self.assertAllClose(expected_result, output.result) # Clip is increased by 2/3 to 5/3. expected_result = 0.5 + 1.5 + 5 / 3 output = process.next(output.state, client_data) self.assertAllClose(expected_result, output.result)
def gaussian_adaptive( cls, noise_multiplier: float, clients_per_round: float, initial_l2_norm_clip: float = 0.1, target_unclipped_quantile: float = 0.5, learning_rate: float = 0.2, clipped_count_stddev: Optional[float] = None ) -> factory.UnweightedAggregationFactory: """`DifferentiallyPrivateFactory` with adaptive clipping and Gaussian noise. Performs adaptive clipping and addition of Gaussian noise for differentially private learning. For details of the DP algorithm see McMahan et. al (2017) https://arxiv.org/abs/1710.06963. The adaptive clipping uses the geometric method described in Thakkar et al. (2019) https://arxiv.org/abs/1905.03871. The adaptive clipping parameters have been chosen to yield a process that starts small and adapts relatively quickly to the median, without using much of the privacy budget. This works well on most problems. Args: noise_multiplier: A float specifying the noise multiplier for the Gaussian mechanism for model updates. A value of 1.0 or higher may be needed for strong privacy. See above mentioned papers to compute (epsilon, delta) privacy guarantee. Note that this is the effective total noise multiplier, accounting for the privacy loss due to adaptive clipping. The noise actually added to the aggregated values will be slightly higher. clients_per_round: A float specifying the expected number of clients per round. Must be positive. initial_l2_norm_clip: The initial value of the adaptive clipping norm. target_unclipped_quantile: The quantile to which the clipping norm should adapt. learning_rate: The learning rate for the adaptive clipping process. clipped_count_stddev: The stddev of the noise added to the clipped counts in the adaptive clipping algorithm. If None, defaults to `0.05 * clients_per_round` (unless `noise_multiplier` is 0, in which case it is also 0). Returns: A `DifferentiallyPrivateFactory` with adaptive clipping and Gaussian noise. """ if isinstance(clients_per_round, int): clients_per_round = float(clients_per_round) _check_float_nonnegative(noise_multiplier, 'noise_multiplier') _check_float_positive(clients_per_round, 'clients_per_round') _check_float_positive(initial_l2_norm_clip, 'initial_l2_norm_clip') _check_float_probability(target_unclipped_quantile, 'target_unclipped_quantile') _check_float_nonnegative(learning_rate, 'learning_rate') if clipped_count_stddev is not None: _check_float_nonnegative(clipped_count_stddev, 'clipped_count_stddev') value_noise_multiplier, clipped_count_stddev = adaptive_clip_noise_params( noise_multiplier, clients_per_round, clipped_count_stddev) query = tfp.QuantileAdaptiveClipSumQuery( initial_l2_norm_clip=initial_l2_norm_clip, noise_multiplier=value_noise_multiplier, target_unclipped_quantile=target_unclipped_quantile, learning_rate=learning_rate, clipped_count_stddev=clipped_count_stddev, expected_num_records=clients_per_round, geometric_update=True) query = tfp.NormalizedQuery(query, denominator=clients_per_round) return cls(query)
def gaussian_adaptive( cls, noise_multiplier: float, clients_per_round: float, initial_l2_norm_clip: float = 0.1, target_unclipped_quantile: float = 0.5, learning_rate: float = 0.2, clipped_count_stddev: Optional[float] = None ) -> factory.UnweightedAggregationFactory: """`DifferentiallyPrivateFactory` with adaptive clipping and Gaussian noise. Performs adaptive clipping and addition of Gaussian noise for differentially private learning. For details of the DP algorithm see McMahan et. al (2017) https://arxiv.org/abs/1710.06963. The adaptive clipping uses the geometric method described in Thakkar et al. (2019) https://arxiv.org/abs/1905.03871. The adaptive clipping parameters have been chosen to yield a process that starts small and adapts relatively quickly to the median, without using much of the privacy budget. This works well on most problems. Args: noise_multiplier: A float specifying the noise multiplier for the Gaussian mechanism for model updates. A value of 1.0 or higher may be needed for strong privacy. See above mentioned papers to compute (epsilon, delta) privacy guarantee. Note that this is the effective total noise multiplier, accounting for the privacy loss due to adaptive clipping. The noise actually added to the aggregated values will be slightly higher. clients_per_round: A float specifying the expected number of clients per round. Must be positive. initial_l2_norm_clip: The initial value of the adaptive clipping norm. target_unclipped_quantile: The quantile to which the clipping norm should adapt. learning_rate: The learning rate for the adaptive clipping process. clipped_count_stddev: The stddev of the noise added to the clipped counts in the adaptive clipping algorithm. If None, defaults to `0.05 * clients_per_round`. Returns: A `DifferentiallyPrivateFactory` with adaptive clipping and Gaussian noise. """ if isinstance(clients_per_round, int): clients_per_round = float(clients_per_round) _check_float_positive(noise_multiplier, 'noise_multiplier') _check_float_positive(clients_per_round, 'clients_per_round') _check_float_positive(initial_l2_norm_clip, 'initial_l2_norm_clip') _check_float_probability(target_unclipped_quantile, 'target_unclipped_quantile') _check_float_nonnegative(learning_rate, 'learning_rate') if clipped_count_stddev is None: # Defaults to 0.05 * clients_per_round. The noised fraction of unclipped # updates will be within 0.1 of the true fraction with 95.4% probability, # and will be within 0.15 of the true fraction with 99.7% probability. # Even in this unlikely case, the error on the update would be a factor of # exp(0.2 * 0.15) = 1.03, a small deviation. So this default gives maximal # privacy for acceptable probability of deviation. clipped_count_stddev = 0.05 * clients_per_round if noise_multiplier >= 2 * clipped_count_stddev: raise ValueError( f'Default value of `clipped_count_stddev` ({clipped_count_stddev}) ' f'is too low to achieve the desired effective noise multiplier ' f'({noise_multiplier}). You may increase `clients_per_round`, ' f'specify a larger value of `clipped_count_stddev`, or decrease ' f'`noise_multiplier`.') else: if noise_multiplier >= 2 * clipped_count_stddev: raise ValueError( f'`clipped_count_stddev` ({clipped_count_stddev}) is too low to ' f'achieve the desired effective noise multiplier ' f'({noise_multiplier}). You must either increase ' f'`clipped_count_stddev` or decrease `noise_multiplier`.') _check_float_nonnegative(clipped_count_stddev, 'clipped_count_stddev') value_noise_multiplier = (noise_multiplier**-2 - (2 * clipped_count_stddev)**-2)**-0.5 added_noise_factor = value_noise_multiplier / noise_multiplier if added_noise_factor >= 2: warnings.warn( f'A significant amount of noise ({added_noise_factor:.2f}x) has to ' f'be added to achieve the desired effective noise multiplier ' f'({noise_multiplier}). If you are manually specifying ' f'`clipped_count_stddev` you may want to increase it. Or you may ' f'need more `clients_per_round`.') query = tfp.QuantileAdaptiveClipSumQuery( initial_l2_norm_clip=initial_l2_norm_clip, noise_multiplier=value_noise_multiplier, target_unclipped_quantile=target_unclipped_quantile, learning_rate=learning_rate, clipped_count_stddev=clipped_count_stddev, expected_num_records=clients_per_round, geometric_update=True) query = tfp.NormalizedQuery(query, denominator=clients_per_round) return cls(query)