Пример #1
0
 def obj(sigma):
     rdp1 = compute_rdp(1.0, sigma / sens1, 1, orders)
     rdp2 = compute_rdp(1.0, sigma / sens2, 1, orders)
     rdp = rdp1 + rdp2
     if (sens1 <= 0.000000001):
         rdp = rdp2
     privacy = get_privacy_spent(orders, rdp, target_delta=delta)
     return privacy[0] - eps + 1e-8
Пример #2
0
 def test_compute_rdp_sequence(self):
     rdp_vec = rdp_accountant.compute_rdp(0.01, 2.5, 50,
                                          [1.5, 2.5, 5, 50, 100, np.inf])
     self.assertSequenceAlmostEqual(
         rdp_vec,
         [0.00065, 0.001085, 0.00218075, 0.023846, 167.416307, np.inf],
         delta=1e-5)
def find_eps(multiplier):
    rdp = compute_rdp(q=sampling_probability,
                      noise_multiplier=multiplier,
                      steps=steps,
                      orders=orders)
    return (get_privacy_spent(orders, rdp, target_delta=delta)[0] -
            float(target_eps))
Пример #4
0
 def test_get_privacy_spent_check_target_eps(self):
     orders = range(2, 33)
     rdp = rdp_accountant.compute_rdp(0.01, 4, 10000, orders)
     _, delta, opt_order = rdp_accountant.get_privacy_spent(
         orders, rdp, target_eps=1.258575)
     self.assertAlmostEqual(delta, 1e-5)
     self.assertEqual(opt_order, 20)
Пример #5
0
def compute_epsilon(steps, num_examples=60000, target_delta=1e-5):
  if num_examples * target_delta > 1.:
    warnings.warn('Your delta might be too high.')
  q = FLAGS.batch_size / float(num_examples)
  orders = list(np.linspace(1.1, 10.9, 99)) + range(11, 64)
  rdp_const = compute_rdp(q, FLAGS.noise_multiplier, steps, orders)
  eps, _, _ = get_privacy_spent(orders, rdp_const, target_delta=target_delta)
  return eps
Пример #6
0
 def test_get_privacy_spent_check_target_delta(self):
     orders = range(2, 33)
     rdp = rdp_accountant.compute_rdp(0.01, 4, 10000, orders)
     eps, _, opt_order = rdp_accountant.get_privacy_spent(orders,
                                                          rdp,
                                                          target_delta=1e-5)
     self.assertAlmostEqual(eps, 1.258575, places=5)
     self.assertEqual(opt_order, 20)
Пример #7
0
def compute_epsilon(steps):
    """Computes epsilon value for given hyperparameters."""
    if FLAGS.noise_multiplier == 0.0:
        return float('inf')
    orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
    sampling_probability = FLAGS.batch_size / 60000
    rdp = compute_rdp(q=sampling_probability,
                      noise_multiplier=FLAGS.noise_multiplier,
                      steps=steps,
                      orders=orders)
    return get_privacy_spent(orders, rdp, target_delta=FLAGS.delta)[0]
Пример #8
0
def compute_epsilon(steps, sample_prob):
    """Computes epsilon value for given hyperparameters."""
    if FLAGS.noise_multiplier == 0.0:
        return float('inf')
    orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
    rdp = compute_rdp(q=sample_prob,
                      noise_multiplier=FLAGS.noise_multiplier,
                      steps=steps,
                      orders=orders)
    # Delta is set to 1e-5 because MNIST has 60000 training points.
    return get_privacy_spent(orders, rdp, target_delta=1e-5)[0]
Пример #9
0
def compute_epsilon(steps, n):
    """Computes epsilon value for given hyperparameters."""
    if FLAGS.noise_multiplier == 0.0:
        return float('inf')
    orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
    sampling_probability = FLAGS.batch_size / n
    rdp = compute_rdp(q=sampling_probability,
                      noise_multiplier=FLAGS.noise_multiplier,
                      steps=steps,
                      orders=orders)
    # Delta is set to approximate 1 / (number of training points).
    return get_privacy_spent(orders, rdp, target_delta=1 / n)[0]
Пример #10
0
def compute_epsilon(steps):
  """Computes epsilon value for given hyperparameters."""
  if FLAGS.noise_multiplier == 0.0:
    return float('inf')
  orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
  sampling_probability = FLAGS.batch_size / NB_TRAIN
  rdp = compute_rdp(q=sampling_probability,
                    noise_multiplier=FLAGS.noise_multiplier,
                    steps=steps,
                    orders=orders)
  # Delta is set to 1e-5 because Penn TreeBank has 60000 training points.
  return get_privacy_spent(orders, rdp, target_delta=1e-5)[0]
Пример #11
0
 def compute_epsilon(steps):
     """Computes epsilon value for given hyperparameters."""
     if FLAGS.noise_multiplier == 0.0:
         return float('inf')
     orders = [1 + x / 10. for x in range(1, 100)] + range(12, 64)
     sampling_probability = FLAGS.batch_size / 60000
     rdp = compute_rdp(q=sampling_probability,
                       stddev_to_sensitivity_ratio=FLAGS.noise_multiplier,
                       steps=steps,
                       orders=orders)
     # Delta is set to 1e-5 because MNIST has 60000 training points.
     return get_privacy_spent(orders, rdp, target_delta=1e-5)[0]
Пример #12
0
    def test_check_composition(self):
        orders = (1.25, 1.5, 1.75, 2., 2.5, 3., 4., 5., 6., 7., 8., 10., 12.,
                  14., 16., 20., 24., 28., 32., 64., 256.)

        rdp = rdp_accountant.compute_rdp(q=1e-4,
                                         stddev_to_sensitivity_ratio=.4,
                                         steps=40000,
                                         orders=orders)

        eps, _, opt_order = rdp_accountant.get_privacy_spent(orders,
                                                             rdp,
                                                             target_delta=1e-6)

        rdp += rdp_accountant.compute_rdp(q=0.1,
                                          stddev_to_sensitivity_ratio=2,
                                          steps=100,
                                          orders=orders)
        eps, _, opt_order = rdp_accountant.get_privacy_spent(orders,
                                                             rdp,
                                                             target_delta=1e-5)
        self.assertAlmostEqual(eps, 8.509656, places=5)
        self.assertEqual(opt_order, 2.5)
Пример #13
0
def apply_dp_sgd_analysis(q, sigma, steps, orders, delta):
  """Compute and print results of DP-SGD analysis."""

  rdp = compute_rdp(q, sigma, steps, orders)

  eps, _, opt_order = get_privacy_spent(orders, rdp, target_delta=delta)

  print('DP-SGD with sampling rate = {:.3g}% and noise_multiplier = {} iterated'
        ' over {} steps satisfies'.format(100 * q, sigma, steps), end=' ')
  print('differential privacy with eps = {:.3g} and delta = {}.'.format(
      eps, delta))
  print('The optimal RDP order is {}.'.format(opt_order))

  if opt_order == max(orders) or opt_order == min(orders):
    print('The privacy estimate is likely to be improved by expanding '
          'the set of orders.')
Пример #14
0
    def test_compute_rdp_from_ledger(self):
        orders = range(2, 33)
        q = 0.1
        n = 1000
        l2_norm_clip = 3.14159
        noise_stddev = 2.71828
        steps = 3

        query_entry = privacy_ledger.GaussianSumQueryEntry(
            l2_norm_clip, noise_stddev)
        ledger = [privacy_ledger.SampleEntry(n, q, [query_entry])] * steps

        z = noise_stddev / l2_norm_clip
        rdp = rdp_accountant.compute_rdp(q, z, steps, orders)
        rdp_from_ledger = rdp_accountant.compute_rdp_from_ledger(
            ledger, orders)
        self.assertSequenceAlmostEqual(rdp, rdp_from_ledger)
Пример #15
0
def apply_dp_sgd_analysis(q, sigma, steps, orders, delta):
  """Compute and print results of DP-SGD analysis."""

  # compute_rdp requires that sigma be the ratio of the standard deviation of
  # the Gaussian noise to the l2-sensitivity of the function to which it is
  # added. Hence, sigma here corresponds to the `noise_multiplier` parameter
  # in the DP-SGD implementation found in privacy.optimizers.dp_optimizer
  rdp = compute_rdp(q, sigma, steps, orders)

  eps, _, opt_order = get_privacy_spent(orders, rdp, target_delta=delta)

  print('DP-SGD with sampling rate = {:.3g}% and noise_multiplier = {} iterated'
        ' over {} steps satisfies'.format(100 * q, sigma, steps), end=' ')
  print('differential privacy with eps = {:.3g} and delta = {}.'.format(
      eps, delta))
  print('The optimal RDP order is {}.'.format(opt_order))

  if opt_order == max(orders) or opt_order == min(orders):
    print('The privacy estimate is likely to be improved by expanding '
          'the set of orders.')
Пример #16
0
def apply_dp_sgd_analysis_biscotti(q, sigma, steps, orders, delta, prev_rdp):

    rdp = compute_rdp(q, sigma, 1, orders)

    new_rdp = prev_rdp + rdp

    eps, _, opt_order = get_privacy_spent(orders, new_rdp, target_delta=delta)

    # print('DP-SGD with sampling rate = {:.3g}% and noise_multiplier = {} iterated'
    #     ' over {} steps satisfies'.format(100 * q, sigma, steps), end=' ')

    # print('differential privacy with eps = {:.3g} and delta = {}.'.format(
    #     eps, delta))
    # print('The optimal RDP order is {}.'.format(opt_order))

    if opt_order == max(orders) or opt_order == min(orders):
        print('The privacy estimate is likely to be improved by expanding '
              'the set of orders.')

    return eps, new_rdp
Пример #17
0
def print_privacy_guarantees(epochs, batch_size, samples, noise_multiplier):
    """Tabulating position-dependent privacy guarantees."""
    if noise_multiplier == 0:
        print('No differential privacy (additive noise is 0).')
        return

    print(
        'In the conditions of Theorem 34 (https://arxiv.org/abs/1808.06651) '
        'the training procedure results in the following privacy guarantees.')

    print('Out of the total of {} samples:'.format(samples))

    steps_per_epoch = samples // batch_size
    orders = np.concatenate(
        [np.linspace(2, 20, num=181),
         np.linspace(20, 100, num=81)])
    delta = 1e-5
    for p in (.5, .9, .99):
        steps = math.ceil(steps_per_epoch * p)  # Steps in the last epoch.
        coef = 2 * (noise_multiplier * batch_size)**-2 * (
            # Accounting for privacy loss
            (epochs - 1) / steps_per_epoch +  # ... from all-but-last epochs
            1 / (steps_per_epoch - steps + 1))  # ... due to the last epoch
        # Using RDP accountant to compute eps. Doing computation analytically is
        # an option.
        rdp = [order * coef for order in orders]
        eps, _, _ = get_privacy_spent(orders, rdp, target_delta=delta)
        print('\t{:g}% enjoy at least ({:.2f}, {})-DP'.format(
            p * 100, eps, delta))

    # Compute privacy guarantees for the Sampled Gaussian Mechanism.
    rdp_sgm = compute_rdp(batch_size / samples, noise_multiplier,
                          epochs * steps_per_epoch, orders)
    eps_sgm, _, _ = get_privacy_spent(orders, rdp_sgm, target_delta=delta)
    print('By comparison, DP-SGD analysis for training done with the same '
          'parameters and random shuffling in each epoch guarantees '
          '({:.2f}, {})-DP for all samples.'.format(eps_sgm, delta))
Пример #18
0
def train(dataset,
          n_hidden=50,
          batch_size=100,
          epochs=100,
          learning_rate=0.01,
          model='nn',
          l2_ratio=1e-7,
          silent=True,
          non_linearity='relu',
          privacy='no_privacy',
          dp='dp',
          epsilon=0.5,
          delta=1e-5):
    train_x, train_y, test_x, test_y = dataset

    n_in = train_x.shape[1]
    n_out = len(np.unique(train_y))

    if batch_size > len(train_y):
        batch_size = len(train_y)

    classifier = tf.estimator.Estimator(model_fn=get_model,
                                        params=[
                                            train_x.shape[0], n_in, n_hidden,
                                            n_out, non_linearity, model,
                                            privacy, dp, epsilon, delta,
                                            batch_size, learning_rate,
                                            l2_ratio, epochs
                                        ])

    train_input_fn = tf.estimator.inputs.numpy_input_fn(x={'x': train_x},
                                                        y=train_y,
                                                        batch_size=batch_size,
                                                        num_epochs=epochs,
                                                        shuffle=True)
    test_eval_input_fn = tf.estimator.inputs.numpy_input_fn(x={'x': test_x},
                                                            y=test_y,
                                                            num_epochs=1,
                                                            shuffle=False)
    train_eval_input_fn = tf.estimator.inputs.numpy_input_fn(x={'x': train_x},
                                                             y=train_y,
                                                             num_epochs=1,
                                                             shuffle=False)
    pred_input_fn = tf.estimator.inputs.numpy_input_fn(x={'x': test_x},
                                                       num_epochs=1,
                                                       shuffle=False)

    steps_per_epoch = train_x.shape[0] // batch_size
    orders = [1 + x / 100.0 for x in range(1, 1000)] + list(range(12, 1200))
    rdp = compute_rdp(batch_size / train_x.shape[0], noise_multiplier[epsilon],
                      epochs * steps_per_epoch, orders)
    eps, _, opt_order = get_privacy_spent(orders, rdp, target_delta=delta)
    print('\nFor delta= %.5f' % delta, ',the epsilon is: %.2f\n' % eps)

    if not os.path.exists(LOG_DIR):
        os.makedirs(LOG_DIR)
    for epoch in range(1, epochs + 1):
        hooks = []
        if LOGGING:
            hooks.append(
                tf.train.ProfilerHook(output_dir=LOG_DIR, save_steps=30))
        # This hook will save traces of what tensorflow is doing
        # during the training of each model. View the combined trace
        # by running `combine_traces.py`

        classifier.train(input_fn=train_input_fn,
                         steps=steps_per_epoch,
                         hooks=hooks)

        if not silent:
            eval_results = classifier.evaluate(input_fn=train_eval_input_fn)
            print('Train loss after %d epochs is: %.3f' %
                  (epoch, eval_results['loss']))

    eval_results = classifier.evaluate(input_fn=train_eval_input_fn)
    train_acc = eval_results['accuracy']
    train_loss = eval_results['loss']
    if not silent:
        print('Train accuracy is: %.3f' % (train_acc))

    eval_results = classifier.evaluate(input_fn=test_eval_input_fn)
    test_acc = eval_results['accuracy']
    if not silent:
        print('Test accuracy is: %.3f' % (test_acc))

    predictions = classifier.predict(input_fn=pred_input_fn)

    pred_y, pred_scores = get_predictions(predictions)

    return classifier, pred_y, pred_scores, train_loss, train_acc, test_acc
Пример #19
0
 def test_compute_rdp_no_sampling(self):
     # q = 1, RDP = alpha/2 * sigma^2
     self.assertEqual(rdp_accountant.compute_rdp(1, 10, 1, 20), 0.1)
Пример #20
0
 def test_compute_rdp_scalar(self):
     rdp_scalar = rdp_accountant.compute_rdp(0.1, 2, 10, 5)
     self.assertAlmostEqual(rdp_scalar, 0.07737, places=5)
Пример #21
0
 def test_compute_rdp_no_data(self):
     # q = 0
     self.assertEqual(rdp_accountant.compute_rdp(0, 10, 1, 20), 0)
Пример #22
0
 def obj(sigma):
     rdp1 = compute_rdp(1.0, sigma / round1, 1, orders)
     rdp2 = compute_rdp(1.0, sigma / round2, 1, orders)
     rdp = rdp1 + rdp2
     privacy = get_privacy_spent(orders, rdp, target_delta=delta)
     return privacy[0] - eps + 1e-8
def train_private(dataset,
                  hold_out_train_data=None,
                  n_hidden=50,
                  batch_size=100,
                  epochs=100,
                  learning_rate=0.01,
                  model='nn',
                  l2_ratio=1e-7,
                  silent=True,
                  non_linearity='relu',
                  privacy='no_privacy',
                  dp='dp',
                  epsilon=0.5,
                  delta=1e-5):
    train_x, train_y, test_x, test_y = dataset

    if hold_out_train_data != None:
        hold_out_x, hold_out_y, _, _ = hold_out_train_data

    n_in = train_x.shape[1]
    n_out = len(np.unique(train_y))

    if batch_size > len(train_y):
        batch_size = len(train_y)

    classifier = tf.estimator.Estimator(model_fn=get_model,
                                        params=[
                                            train_x.shape[0], n_in, n_hidden,
                                            n_out, non_linearity, model,
                                            privacy, dp, epsilon, delta,
                                            batch_size, learning_rate,
                                            l2_ratio, epochs
                                        ])

    train_input_fn = tf.estimator.inputs.numpy_input_fn(x={'x': train_x},
                                                        y=train_y,
                                                        batch_size=batch_size,
                                                        num_epochs=epochs,
                                                        shuffle=True)
    test_eval_input_fn = tf.estimator.inputs.numpy_input_fn(x={'x': test_x},
                                                            y=test_y,
                                                            num_epochs=1,
                                                            shuffle=False)
    train_eval_input_fn = tf.estimator.inputs.numpy_input_fn(x={'x': train_x},
                                                             y=train_y,
                                                             num_epochs=1,
                                                             shuffle=False)
    pred_input_fn = tf.estimator.inputs.numpy_input_fn(x={'x': test_x},
                                                       num_epochs=1,
                                                       shuffle=False)

    steps_per_epoch = train_x.shape[0] // batch_size
    orders = [1 + x / 100.0 for x in range(1, 1000)] + list(range(12, 1200))
    rdp = compute_rdp(batch_size / train_x.shape[0], noise_multiplier[epsilon],
                      epochs * steps_per_epoch, orders)
    eps, _, opt_order = get_privacy_spent(orders, rdp, target_delta=delta)
    print('\nFor delta= %.5f' % delta, ',the epsilon is: %.2f\n' % eps)

    for epoch in range(1, epochs + 1):
        classifier.train(input_fn=train_input_fn, steps=steps_per_epoch)

        if not silent:
            eval_results = classifier.evaluate(input_fn=train_eval_input_fn)
            print('Train loss after %d epochs is: %.3f' %
                  (epoch, eval_results['loss']))

    eval_results = classifier.evaluate(input_fn=train_eval_input_fn)
    train_acc = eval_results['accuracy']
    train_loss = eval_results['loss']
    if not silent:
        print('Train accuracy is: %.3f' % (train_acc))

    eval_results = classifier.evaluate(input_fn=test_eval_input_fn)
    test_acc = eval_results['accuracy']
    if not silent:
        print('Test accuracy is: %.3f' % (test_acc))

    predictions = classifier.predict(input_fn=pred_input_fn)

    pred_y, pred_scores = get_predictions(predictions)

    return classifier, pred_y, pred_scores, train_loss, train_acc, test_acc
delta = 1e-5


def find_eps(multiplier):
    rdp = compute_rdp(q=sampling_probability,
                      noise_multiplier=multiplier,
                      steps=steps,
                      orders=orders)
    return (get_privacy_spent(orders, rdp, target_delta=delta)[0] -
            float(target_eps))


noise_multiplier = bisect(find_eps, 0.5, 3.0)

rdp = compute_rdp(q=sampling_probability,
                  noise_multiplier=noise_multiplier,
                  steps=steps,
                  orders=orders)

epsilon = get_privacy_spent(orders, rdp, target_delta=delta)[0]

# In[3]:

dev = 0.5

# In[5]:


def lrelu(x, th=0.2):
    return tf.maximum(th * x, x)

Пример #25
0
def main(unused_argv):
    tf.logging.set_verbosity(tf.logging.INFO)
    if FLAGS.batch_size % FLAGS.microbatches != 0:
        raise ValueError(
            'Number of microbatches should divide evenly batch_size')

    # Load training and test data.
    train_data, train_labels, test_data, test_labels = load_mnist()

    # Define a sequential Keras model
    model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(16,
                               8,
                               strides=2,
                               padding='same',
                               activation='relu',
                               input_shape=(28, 28, 1)),
        tf.keras.layers.MaxPool2D(2, 1),
        tf.keras.layers.Conv2D(32,
                               4,
                               strides=2,
                               padding='valid',
                               activation='relu'),
        tf.keras.layers.MaxPool2D(2, 1),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(10)
    ])

    if FLAGS.dpsgd:
        dp_average_query = GaussianAverageQuery(
            FLAGS.l2_norm_clip, FLAGS.l2_norm_clip * FLAGS.noise_multiplier,
            FLAGS.microbatches)
        optimizer = DPGradientDescentOptimizer(
            dp_average_query,
            FLAGS.microbatches,
            learning_rate=FLAGS.learning_rate,
            unroll_microbatches=True)
        # Compute vector of per-example loss rather than its mean over a minibatch.
        loss = tf.keras.losses.CategoricalCrossentropy(
            from_logits=True, reduction=tf.losses.Reduction.NONE)
    else:
        optimizer = GradientDescentOptimizer(learning_rate=FLAGS.learning_rate)
        loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True)

    # Compile model with Keras
    model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])

    # Train model with Keras
    model.fit(train_data,
              train_labels,
              epochs=FLAGS.epochs,
              validation_data=(test_data, test_labels),
              batch_size=FLAGS.batch_size)

    # Compute the privacy budget expended.
    if FLAGS.noise_multiplier == 0.0:
        print('Trained with vanilla non-private SGD optimizer')
    orders = [1 + x / 10. for x in range(1, 100)] + list(range(12, 64))
    sampling_probability = FLAGS.batch_size / 60000
    rdp = compute_rdp(q=sampling_probability,
                      noise_multiplier=FLAGS.noise_multiplier,
                      steps=(FLAGS.epochs * 60000 // FLAGS.batch_size),
                      orders=orders)
    # Delta is set to 1e-5 because MNIST has 60000 training points.
    eps = get_privacy_spent(orders, rdp, target_delta=1e-5)[0]
    print('For delta=1e-5, the current epsilon is: %.2f' % eps)