示例#1
0
    def test_adaptive_query(self):
        query = tfp.QuantileAdaptiveClipSumQuery(initial_l2_norm_clip=1.0,
                                                 noise_multiplier=0.0,
                                                 target_unclipped_quantile=1.0,
                                                 learning_rate=1.0,
                                                 clipped_count_stddev=0.0,
                                                 expected_num_records=3.0,
                                                 geometric_update=False)
        factory_ = differential_privacy.DifferentiallyPrivateFactory(query)
        value_type = computation_types.to_type(tf.float32)
        process = factory_.create(value_type)

        state = process.initialize()

        client_data = [0.5, 1.5, 2.0]  # Two clipped on first round.
        expected_result = 0.5 + 1.0 + 1.0
        output = process.next(state, client_data)
        self.assertAllClose(expected_result, output.result)

        # Clip is increased by 2/3 to 5/3.
        expected_result = 0.5 + 1.5 + 5 / 3
        output = process.next(output.state, client_data)
        self.assertAllClose(expected_result, output.result)
示例#2
0
    def gaussian_adaptive(
        cls,
        noise_multiplier: float,
        clients_per_round: float,
        initial_l2_norm_clip: float = 0.1,
        target_unclipped_quantile: float = 0.5,
        learning_rate: float = 0.2,
        clipped_count_stddev: Optional[float] = None
    ) -> factory.UnweightedAggregationFactory:
        """`DifferentiallyPrivateFactory` with adaptive clipping and Gaussian noise.

    Performs adaptive clipping and addition of Gaussian noise for differentially
    private learning. For details of the DP algorithm see McMahan et. al (2017)
    https://arxiv.org/abs/1710.06963. The adaptive clipping uses the geometric
    method described in Thakkar et al. (2019) https://arxiv.org/abs/1905.03871.

    The adaptive clipping parameters have been chosen to yield a process that
    starts small and adapts relatively quickly to the median, without using
    much of the privacy budget. This works well on most problems.

    Args:
      noise_multiplier: A float specifying the noise multiplier for the Gaussian
        mechanism for model updates. A value of 1.0 or higher may be needed for
        strong privacy. See above mentioned papers to compute (epsilon, delta)
        privacy guarantee. Note that this is the effective total noise
        multiplier, accounting for the privacy loss due to adaptive clipping.
        The noise actually added to the aggregated values will be slightly
        higher.
      clients_per_round: A float specifying the expected number of clients per
        round. Must be positive.
      initial_l2_norm_clip: The initial value of the adaptive clipping norm.
      target_unclipped_quantile: The quantile to which the clipping norm should
        adapt.
      learning_rate: The learning rate for the adaptive clipping process.
      clipped_count_stddev: The stddev of the noise added to the clipped counts
        in the adaptive clipping algorithm. If None, defaults to `0.05 *
        clients_per_round` (unless `noise_multiplier` is 0, in which case it is
        also 0).

    Returns:
      A `DifferentiallyPrivateFactory` with adaptive clipping and Gaussian
        noise.
    """

        if isinstance(clients_per_round, int):
            clients_per_round = float(clients_per_round)

        _check_float_nonnegative(noise_multiplier, 'noise_multiplier')
        _check_float_positive(clients_per_round, 'clients_per_round')
        _check_float_positive(initial_l2_norm_clip, 'initial_l2_norm_clip')
        _check_float_probability(target_unclipped_quantile,
                                 'target_unclipped_quantile')
        _check_float_nonnegative(learning_rate, 'learning_rate')
        if clipped_count_stddev is not None:
            _check_float_nonnegative(clipped_count_stddev,
                                     'clipped_count_stddev')

        value_noise_multiplier, clipped_count_stddev = adaptive_clip_noise_params(
            noise_multiplier, clients_per_round, clipped_count_stddev)

        query = tfp.QuantileAdaptiveClipSumQuery(
            initial_l2_norm_clip=initial_l2_norm_clip,
            noise_multiplier=value_noise_multiplier,
            target_unclipped_quantile=target_unclipped_quantile,
            learning_rate=learning_rate,
            clipped_count_stddev=clipped_count_stddev,
            expected_num_records=clients_per_round,
            geometric_update=True)
        query = tfp.NormalizedQuery(query, denominator=clients_per_round)

        return cls(query)
示例#3
0
    def gaussian_adaptive(
        cls,
        noise_multiplier: float,
        clients_per_round: float,
        initial_l2_norm_clip: float = 0.1,
        target_unclipped_quantile: float = 0.5,
        learning_rate: float = 0.2,
        clipped_count_stddev: Optional[float] = None
    ) -> factory.UnweightedAggregationFactory:
        """`DifferentiallyPrivateFactory` with adaptive clipping and Gaussian noise.

    Performs adaptive clipping and addition of Gaussian noise for differentially
    private learning. For details of the DP algorithm see McMahan et. al (2017)
    https://arxiv.org/abs/1710.06963. The adaptive clipping uses the geometric
    method described in Thakkar et al. (2019) https://arxiv.org/abs/1905.03871.

    The adaptive clipping parameters have been chosen to yield a process that
    starts small and adapts relatively quickly to the median, without using
    much of the privacy budget. This works well on most problems.

    Args:
      noise_multiplier: A float specifying the noise multiplier for the Gaussian
        mechanism for model updates. A value of 1.0 or higher may be needed for
        strong privacy. See above mentioned papers to compute (epsilon, delta)
        privacy guarantee. Note that this is the effective total noise
        multiplier, accounting for the privacy loss due to adaptive clipping.
        The noise actually added to the aggregated values will be slightly
        higher.
      clients_per_round: A float specifying the expected number of clients per
        round. Must be positive.
      initial_l2_norm_clip: The initial value of the adaptive clipping norm.
      target_unclipped_quantile: The quantile to which the clipping norm should
        adapt.
      learning_rate: The learning rate for the adaptive clipping process.
      clipped_count_stddev: The stddev of the noise added to the clipped counts
        in the adaptive clipping algorithm. If None, defaults to `0.05 *
        clients_per_round`.

    Returns:
      A `DifferentiallyPrivateFactory` with adaptive clipping and Gaussian
        noise.
    """

        if isinstance(clients_per_round, int):
            clients_per_round = float(clients_per_round)

        _check_float_positive(noise_multiplier, 'noise_multiplier')
        _check_float_positive(clients_per_round, 'clients_per_round')
        _check_float_positive(initial_l2_norm_clip, 'initial_l2_norm_clip')
        _check_float_probability(target_unclipped_quantile,
                                 'target_unclipped_quantile')
        _check_float_nonnegative(learning_rate, 'learning_rate')

        if clipped_count_stddev is None:
            # Defaults to 0.05 * clients_per_round. The noised fraction of unclipped
            # updates will be within 0.1 of the true fraction with 95.4% probability,
            # and will be within 0.15 of the true fraction with 99.7% probability.
            # Even in this unlikely case, the error on the update would be a factor of
            # exp(0.2 * 0.15) = 1.03, a small deviation. So this default gives maximal
            # privacy for acceptable probability of deviation.
            clipped_count_stddev = 0.05 * clients_per_round
            if noise_multiplier >= 2 * clipped_count_stddev:
                raise ValueError(
                    f'Default value of `clipped_count_stddev` ({clipped_count_stddev}) '
                    f'is too low to achieve the desired effective noise multiplier '
                    f'({noise_multiplier}). You may increase `clients_per_round`, '
                    f'specify a larger value of `clipped_count_stddev`, or decrease '
                    f'`noise_multiplier`.')
        else:
            if noise_multiplier >= 2 * clipped_count_stddev:
                raise ValueError(
                    f'`clipped_count_stddev` ({clipped_count_stddev}) is too low to '
                    f'achieve the desired effective noise multiplier '
                    f'({noise_multiplier}). You must either increase '
                    f'`clipped_count_stddev` or decrease `noise_multiplier`.')

        _check_float_nonnegative(clipped_count_stddev, 'clipped_count_stddev')

        value_noise_multiplier = (noise_multiplier**-2 -
                                  (2 * clipped_count_stddev)**-2)**-0.5

        added_noise_factor = value_noise_multiplier / noise_multiplier
        if added_noise_factor >= 2:
            warnings.warn(
                f'A significant amount of noise ({added_noise_factor:.2f}x) has to '
                f'be added to achieve the desired effective noise multiplier '
                f'({noise_multiplier}). If you are manually specifying '
                f'`clipped_count_stddev` you may want to increase it. Or you may '
                f'need more `clients_per_round`.')

        query = tfp.QuantileAdaptiveClipSumQuery(
            initial_l2_norm_clip=initial_l2_norm_clip,
            noise_multiplier=value_noise_multiplier,
            target_unclipped_quantile=target_unclipped_quantile,
            learning_rate=learning_rate,
            clipped_count_stddev=clipped_count_stddev,
            expected_num_records=clients_per_round,
            geometric_update=True)
        query = tfp.NormalizedQuery(query, denominator=clients_per_round)

        return cls(query)