示例#1
0
    def test_nested_query_with_noise(self):
        with self.cached_session() as sess:
            sum_stddev = 2.71828
            denominator = 3.14159

            query1 = gaussian_query.GaussianSumQuery(l2_norm_clip=1.5,
                                                     stddev=sum_stddev)
            query2 = gaussian_query.GaussianAverageQuery(
                l2_norm_clip=0.5,
                sum_stddev=sum_stddev,
                denominator=denominator)
            query = nested_query.NestedSumQuery((query1, query2))

            record1 = (3.0, [2.0, 1.5])
            record2 = (0.0, [-1.0, -3.5])

            query_result, _ = test_utils.run_query(query, [record1, record2])

            noised_averages = []
            for _ in range(1000):
                noised_averages.append(tf.nest.flatten(sess.run(query_result)))

            result_stddev = np.std(noised_averages, 0)
            avg_stddev = sum_stddev / denominator
            expected_stddev = [sum_stddev, avg_stddev, avg_stddev]
            self.assertArrayNear(result_stddev, expected_stddev, 0.1)
示例#2
0
    def test_complex_nested_query(self):
        with self.cached_session() as sess:
            query_ab = gaussian_query.GaussianSumQuery(l2_norm_clip=1.0,
                                                       stddev=0.0)
            query_c = gaussian_query.GaussianAverageQuery(l2_norm_clip=10.0,
                                                          sum_stddev=0.0,
                                                          denominator=2.0)
            query_d = gaussian_query.GaussianSumQuery(l2_norm_clip=10.0,
                                                      stddev=0.0)

            query = nested_query.NestedSumQuery(
                [query_ab, {
                    'c': query_c,
                    'd': [query_d]
                }])

            record1 = [{
                'a': 0.0,
                'b': 2.71828
            }, {
                'c': (-4.0, 6.0),
                'd': [-4.0]
            }]
            record2 = [{
                'a': 3.14159,
                'b': 0.0
            }, {
                'c': (6.0, -4.0),
                'd': [5.0]
            }]

            query_result, _ = test_utils.run_query(query, [record1, record2])
            result = sess.run(query_result)
            expected = [{'a': 1.0, 'b': 1.0}, {'c': (1.0, 1.0), 'd': [1.0]}]
            self.assertAllClose(result, expected)
示例#3
0
  def test_nested_gaussian_average_with_clip_no_noise(self):
    with self.cached_session() as sess:
      query1 = gaussian_query.GaussianAverageQuery(
          l2_norm_clip=4.0, sum_stddev=0.0, denominator=5.0)
      query2 = gaussian_query.GaussianAverageQuery(
          l2_norm_clip=5.0, sum_stddev=0.0, denominator=5.0)

      query = nested_query.NestedQuery([query1, query2])

      record1 = [1.0, [12.0, 9.0]]  # Clipped to [1.0, [4.0, 3.0]]
      record2 = [5.0, [1.0, 2.0]]   # Clipped to [4.0, [1.0, 2.0]]

      query_result, _ = test_utils.run_query(query, [record1, record2])
      result = sess.run(query_result)
      expected = [1.0, [1.0, 1.0]]
      self.assertAllClose(result, expected)
示例#4
0
    def test_nested_query(self):
        population_size = tf.Variable(0)
        selection_probability = tf.Variable(1.0)

        query1 = gaussian_query.GaussianAverageQuery(l2_norm_clip=4.0,
                                                     sum_stddev=2.0,
                                                     denominator=5.0)
        query2 = gaussian_query.GaussianAverageQuery(l2_norm_clip=5.0,
                                                     sum_stddev=1.0,
                                                     denominator=5.0)

        query = nested_query.NestedQuery([query1, query2])
        query = privacy_ledger.QueryWithLedger(query, population_size,
                                               selection_probability)

        record1 = [1.0, [12.0, 9.0]]
        record2 = [5.0, [1.0, 2.0]]

        # First sample.
        tf.compat.v1.assign(population_size, 10)
        tf.compat.v1.assign(selection_probability, 0.1)
        test_utils.run_query(query, [record1, record2])

        expected_queries = [[4.0, 2.0], [5.0, 1.0]]
        formatted = query.ledger.get_formatted_ledger_eager()
        sample_1 = formatted[0]
        self.assertAllClose(sample_1.population_size, 10.0)
        self.assertAllClose(sample_1.selection_probability, 0.1)
        self.assertAllClose(sorted(sample_1.queries), sorted(expected_queries))

        # Second sample.
        tf.compat.v1.assign(population_size, 20)
        tf.compat.v1.assign(selection_probability, 0.2)
        test_utils.run_query(query, [record1, record2])

        formatted = query.ledger.get_formatted_ledger_eager()
        sample_1, sample_2 = formatted
        self.assertAllClose(sample_1.population_size, 10.0)
        self.assertAllClose(sample_1.selection_probability, 0.1)
        self.assertAllClose(sorted(sample_1.queries), sorted(expected_queries))

        self.assertAllClose(sample_2.population_size, 20.0)
        self.assertAllClose(sample_2.selection_probability, 0.2)
        self.assertAllClose(sorted(sample_2.queries), sorted(expected_queries))
示例#5
0
  def test_gaussian_average_no_noise(self):
    with self.cached_session() as sess:
      record1 = tf.constant([5.0, 0.0])   # Clipped to [3.0, 0.0].
      record2 = tf.constant([-1.0, 2.0])  # Not clipped.

      query = gaussian_query.GaussianAverageQuery(
          l2_norm_clip=3.0, sum_stddev=0.0, denominator=2.0)
      query_result, _ = test_utils.run_query(query, [record1, record2])
      result = sess.run(query_result)
      expected_average = [1.0, 1.0]
      self.assertAllClose(result, expected_average)
    def __init__(self,
                 initial_l2_norm_clip,
                 noise_multiplier,
                 target_unclipped_quantile,
                 learning_rate,
                 clipped_count_stddev,
                 expected_num_records,
                 geometric_update=False):
        """Initializes the QuantileAdaptiveClipSumQuery.

    Args:
      initial_l2_norm_clip: The initial value of clipping norm.
      noise_multiplier: The multiplier of the l2_norm_clip to make the stddev of
        the noise added to the output of the sum query.
      target_unclipped_quantile: The desired quantile of updates which should be
        unclipped. I.e., a value of 0.8 means a value of l2_norm_clip should be
        found for which approximately 20% of updates are clipped each round.
      learning_rate: The learning rate for the clipping norm adaptation. A
        rate of r means that the clipping norm will change by a maximum of r at
        each step. This maximum is attained when |clip - target| is 1.0.
      clipped_count_stddev: The stddev of the noise added to the clipped_count.
        Since the sensitivity of the clipped count is 0.5, as a rule of thumb it
        should be about 0.5 for reasonable privacy.
      expected_num_records: The expected number of records per round, used to
        estimate the clipped count quantile.
      geometric_update: If True, use geometric updating of clip.
    """
        self._initial_l2_norm_clip = initial_l2_norm_clip
        self._noise_multiplier = noise_multiplier
        self._target_unclipped_quantile = target_unclipped_quantile
        self._learning_rate = learning_rate

        # Initialize sum query's global state with None, to be set later.
        self._sum_query = gaussian_query.GaussianSumQuery(None, None)

        # self._clipped_fraction_query is a DPQuery used to estimate the fraction of
        # records that are clipped. It accumulates an indicator 0/1 of whether each
        # record is clipped, and normalizes by the expected number of records. In
        # practice, we accumulate clipped counts shifted by -0.5 so they are
        # centered at zero. This makes the sensitivity of the clipped count query
        # 0.5 instead of 1.0, since the maximum that a single record could affect
        # the count is 0.5. Note that although the l2_norm_clip of the clipped
        # fraction query is 0.5, no clipping will ever actually occur because the
        # value of each record is always +/-0.5.
        self._clipped_fraction_query = gaussian_query.GaussianAverageQuery(
            l2_norm_clip=0.5,
            sum_stddev=clipped_count_stddev,
            denominator=expected_num_records)

        self._geometric_update = geometric_update
 def _construct_below_estimate_query(self, below_estimate_stddev,
                                     expected_num_records):
     # A DPQuery used to estimate the fraction of records that are less than the
     # current quantile estimate. It accumulates an indicator 0/1 of whether each
     # record is below the estimate, and normalizes by the expected number of
     # records. In practice, we accumulate counts shifted by -0.5 so they are
     # centered at zero. This makes the sensitivity of the below_estimate count
     # query 0.5 instead of 1.0, since the maximum that a single record could
     # affect the count is 0.5. Note that although the l2_norm_clip of the
     # below_estimate query is 0.5, no clipping will ever actually occur
     # because the value of each record is always +/-0.5.
     return gaussian_query.GaussianAverageQuery(
         l2_norm_clip=0.5,
         sum_stddev=below_estimate_stddev,
         denominator=expected_num_records)
示例#8
0
    def __init__(self,
                 initial_estimate,
                 target_quantile,
                 learning_rate,
                 below_estimate_stddev,
                 expected_num_records,
                 geometric_update=False):
        """Initializes the QuantileAdaptiveClipSumQuery.

    Args:
      initial_estimate: The initial estimate of the quantile.
      target_quantile: The target quantile. I.e., a value of 0.8 means a value
        should be found for which approximately 80% of updates are
        less than the estimate each round.
      learning_rate: The learning rate. A rate of r means that the estimate
        will change by a maximum of r at each step (for arithmetic updating) or
        by a maximum factor of exp(r) (for geometric updating).
      below_estimate_stddev: The stddev of the noise added to the count of
        records currently below the estimate. Since the sensitivity of the count
        query is 0.5, as a rule of thumb it should be about 0.5 for reasonable
        privacy.
      expected_num_records: The expected number of records per round.
      geometric_update: If True, use geometric updating of estimate. Geometric
        updating is preferred for non-negative records like vector norms that
        could potentially be very large or very close to zero.
    """
        self._initial_estimate = initial_estimate
        self._target_quantile = target_quantile
        self._learning_rate = learning_rate

        # A DPQuery used to estimate the fraction of records that are less than the
        # current quantile estimate. It accumulates an indicator 0/1 of whether each
        # record is below the estimate, and normalizes by the expected number of
        # records. In practice, we accumulate counts shifted by -0.5 so they are
        # centered at zero. This makes the sensitivity of the below_estimate count
        # query 0.5 instead of 1.0, since the maximum that a single record could
        # affect the count is 0.5. Note that although the l2_norm_clip of the
        # below_estimate query is 0.5, no clipping will ever actually occur
        # because the value of each record is always +/-0.5.
        self._below_estimate_query = gaussian_query.GaussianAverageQuery(
            l2_norm_clip=0.5,
            sum_stddev=below_estimate_stddev,
            denominator=expected_num_records)

        self._geometric_update = geometric_update

        assert isinstance(self._below_estimate_query,
                          dp_query.SumAggregationDPQuery)
示例#9
0
  def test_gaussian_average_with_noise(self):
    with self.cached_session() as sess:
      record1, record2 = 2.71828, 3.14159
      sum_stddev = 1.0
      denominator = 2.0

      query = gaussian_query.GaussianAverageQuery(
          l2_norm_clip=5.0, sum_stddev=sum_stddev, denominator=denominator)
      query_result, _ = test_utils.run_query(query, [record1, record2])

      noised_averages = []
      for _ in range(1000):
        noised_averages.append(sess.run(query_result))

      result_stddev = np.std(noised_averages)
      avg_stddev = sum_stddev / denominator
      self.assertNear(result_stddev, avg_stddev, 0.1)
示例#10
0
        def __init__(
                self,
                l2_norm_clip,
                noise_multiplier,
                scalars,
                num_microbatches=None,
                ledger=None,
                unroll_microbatches=False,
                *args,  # pylint: disable=keyword-arg-before-vararg
                **kwargs):

            query1 = gaussian_query.GaussianAverageQuery(
                l2_norm_clip[0], l2_norm_clip[0] * noise_multiplier[0],
                scalars[0])
            query2 = gaussian_query.GaussianAverageQuery(
                l2_norm_clip[1], l2_norm_clip[1] * noise_multiplier[1],
                scalars[1])
            query3 = gaussian_query.GaussianAverageQuery(
                l2_norm_clip[2], l2_norm_clip[2] * noise_multiplier[2],
                scalars[2])
            query4 = gaussian_query.GaussianAverageQuery(
                l2_norm_clip[3], l2_norm_clip[3] * noise_multiplier[3],
                scalars[3])
            query5 = gaussian_query.GaussianAverageQuery(
                l2_norm_clip[4], l2_norm_clip[4] * noise_multiplier[4],
                scalars[4])
            query6 = gaussian_query.GaussianAverageQuery(
                l2_norm_clip[5], l2_norm_clip[5] * noise_multiplier[5],
                scalars[5])
            query7 = gaussian_query.GaussianAverageQuery(
                l2_norm_clip[6], l2_norm_clip[6] * noise_multiplier[6],
                scalars[6])
            query8 = gaussian_query.GaussianAverageQuery(
                l2_norm_clip[7], l2_norm_clip[7] * noise_multiplier[7],
                scalars[7])
            query9 = gaussian_query.GaussianAverageQuery(
                l2_norm_clip[8], l2_norm_clip[8] * noise_multiplier[8],
                scalars[8])
            query10 = gaussian_query.GaussianAverageQuery(
                l2_norm_clip[9], l2_norm_clip[9] * noise_multiplier[9],
                scalars[9])
            query11 = gaussian_query.GaussianAverageQuery(
                l2_norm_clip[10], l2_norm_clip[10] * noise_multiplier[10],
                scalars[10])
            query12 = gaussian_query.GaussianAverageQuery(
                l2_norm_clip[11], l2_norm_clip[11] * noise_multiplier[11],
                scalars[11])

            dp_nested_query = nested_query.NestedQuery([
                query1, query2, query3, query4, query5, query6, query7, query8,
                query9, query10, query11, query12
            ])

            #dp_nested_query = nested_query.NestedQuery([query1, query2, query3])

            if ledger:
                dp_nested_query = privacy_ledger.QueryWithLedger(
                    dp_nested_query, ledger=ledger)

            super(DPMultiGaussianOptimizerClass,
                  self).__init__(dp_nested_query, num_microbatches,
                                 unroll_microbatches, *args, **kwargs)