示例#1
0
    def testBaseline(self, cls, num_microbatches, expected_answer):
        with self.cached_session() as sess:
            var0 = tf.Variable([1.0, 2.0])
            data0 = tf.Variable([[3.0, 4.0], [5.0, 6.0], [7.0, 8.0],
                                 [-1.0, 0.0]])

            ledger = privacy_ledger.PrivacyLedger(1e6, num_microbatches / 1e6,
                                                  50, 50)
            dp_average_query = gaussian_query.GaussianAverageQuery(
                1.0e9, 0.0, num_microbatches, ledger)
            dp_average_query = privacy_ledger.QueryWithLedger(
                dp_average_query, ledger)

            opt = cls(dp_average_query,
                      num_microbatches=num_microbatches,
                      learning_rate=2.0)

            self.evaluate(tf.global_variables_initializer())
            # Fetch params to validate initial values
            self.assertAllClose([1.0, 2.0], self.evaluate(var0))

            # Expected gradient is sum of differences divided by number of
            # microbatches.
            gradient_op = opt.compute_gradients(loss(data0, var0), [var0])
            grads_and_vars = sess.run(gradient_op)
            self.assertAllCloseAccordingToType(expected_answer,
                                               grads_and_vars[0][0])
示例#2
0
def rnn_model_fn(features, labels, mode):  # pylint: disable=unused-argument
    """Model function for a RNN."""

    # Define RNN architecture using tf.keras.layers.
    x = features['x']
    x = tf.reshape(x, [-1, SEQ_LEN])
    input_layer = x[:, :-1]
    input_one_hot = tf.one_hot(input_layer, 256)
    # if FLAGS.float16:
    #   input_one_hot = tf.cast(input_one_hot, tf.float16)

    lstm = tf.keras.layers.LSTM(256,
                                return_sequences=True).apply(input_one_hot)
    logits = tf.keras.layers.Dense(256).apply(lstm)

    # Calculate loss as a vector (to support microbatches in DP-SGD).
    vector_loss = tf.nn.softmax_cross_entropy_with_logits(labels=tf.cast(
        tf.one_hot(x[:, 1:], 256), dtype=tf.float32),
                                                          logits=logits)
    # Define mean of loss across minibatch (for reporting through tf.Estimator).
    scalar_loss = tf.reduce_mean(vector_loss)

    # Configure the training op (for TRAIN mode).
    if mode == tf.estimator.ModeKeys.TRAIN:
        if FLAGS.dpsgd:

            ledger = privacy_ledger.PrivacyLedger(
                population_size=NB_TRAIN,
                selection_probability=(FLAGS.batch_size / NB_TRAIN),
                max_samples=1e6,
                max_queries=1e6)

            optimizer = dp_optimizer.DPAdamGaussianOptimizer(
                l2_norm_clip=FLAGS.l2_norm_clip,
                noise_multiplier=FLAGS.noise_multiplier,
                num_microbatches=FLAGS.microbatches,
                ledger=ledger,
                learning_rate=FLAGS.learning_rate,
                unroll_microbatches=True)
            opt_loss = vector_loss
        else:
            optimizer = tf.train.AdamOptimizer(
                learning_rate=FLAGS.learning_rate)
            opt_loss = scalar_loss
        global_step = tf.train.get_global_step()
        train_op = optimizer.minimize(loss=opt_loss, global_step=global_step)
        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=scalar_loss,
                                          train_op=train_op)

    # Add evaluation metrics (for EVAL mode).
    elif mode == tf.estimator.ModeKeys.EVAL:
        eval_metric_ops = {
            'accuracy':
            tf.metrics.accuracy(labels=tf.cast(x[:, 1:], dtype=tf.int32),
                                predictions=tf.argmax(input=logits, axis=2))
        }
        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=scalar_loss,
                                          eval_metric_ops=eval_metric_ops)
示例#3
0
    def testNoiseMultiplier(self, cls):
        with tf.GradientTape(persistent=True) as gradient_tape:
            var0 = tf.Variable([0.0])
            data0 = tf.Variable([[0.0]])

            ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6, 5000, 5000)
            dp_average_query = gaussian_query.GaussianAverageQuery(4.0, 8.0, 1)
            dp_average_query = privacy_ledger.QueryWithLedger(
                dp_average_query, ledger)

            opt = cls(dp_average_query, num_microbatches=1, learning_rate=2.0)

            self.evaluate(tf.global_variables_initializer())
            # Fetch params to validate initial values
            self.assertAllClose([0.0], self.evaluate(var0))

            grads = []
            for _ in range(1000):
                grads_and_vars = opt.compute_gradients(
                    lambda: self._loss_fn(var0, data0), [var0],
                    gradient_tape=gradient_tape)
                grads.append(grads_and_vars[0][0])

            # Test standard deviation is close to l2_norm_clip * noise_multiplier.
            self.assertNear(np.std(grads), 2.0 * 4.0, 0.5)
示例#4
0
    def __init__(
        self,
        l2_norm_clip,
        noise_multiplier,
        num_microbatches,
        unroll_microbatches=False,
        *args,  # pylint: disable=keyword-arg-before-vararg
        **kwargs):
      dp_average_query = gaussian_query.GaussianAverageQuery(
          l2_norm_clip, l2_norm_clip * noise_multiplier, num_microbatches)
      if 'population_size' in kwargs:
        population_size = kwargs.pop('population_size')
        max_queries = kwargs.pop('ledger_max_queries', 1e6)
        max_samples = kwargs.pop('ledger_max_samples', 1e6)
        selection_probability = num_microbatches / population_size
        ledger = privacy_ledger.PrivacyLedger(
            population_size,
            selection_probability,
            max_samples,
            max_queries)
        dp_average_query = privacy_ledger.QueryWithLedger(
            dp_average_query, ledger)

      super(DPGaussianOptimizerClass, self).__init__(
          dp_average_query,
          num_microbatches,
          unroll_microbatches,
          *args,
          **kwargs)
  def test_basic(self):
    ledger = privacy_ledger.PrivacyLedger(10, 0.1, 50, 50)
    ledger.record_sum_query(5.0, 1.0)
    ledger.record_sum_query(2.0, 0.5)

    ledger.finalize_sample()

    expected_queries = [[5.0, 1.0], [2.0, 0.5]]
    formatted = ledger.get_formatted_ledger_eager()

    sample = formatted[0]
    self.assertAllClose(sample.population_size, 10.0)
    self.assertAllClose(sample.selection_probability, 0.1)
    self.assertAllClose(sorted(sample.queries), sorted(expected_queries))
示例#6
0
def model_fn(features, labels, mode):
	logits = linear_layer(features)

	# vector loss: each component of the vector correspond to an individual training point and label.
	# Use for per example gradient later.
	vector_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,
		labels=tf.cast(labels, dtype=tf.int64))#=labels) # change compare w mnist


	scalar_loss = tf.reduce_mean(vector_loss)
	print('*******************')
	print(vector_loss.dtype)
	print(scalar_loss.dtype)
	if mode == tf.estimator.ModeKeys.TRAIN:

		if FLAGS.dpsgd:
			ledger = privacy_ledger.PrivacyLedger(
				population_size=60000,
				selection_probability=(FLAGS.batch_size / 60000))

			optimizer = dp_optimizer.DPGradientDescentGaussianOptimizer(
				l2_norm_clip=FLAGS.l2_norm_clip,
				noise_multiplier=FLAGS.noise_multiplier,
				num_microbatches=FLAGS.microbatches,
				ledger=ledger,
				learning_rate=FLAGS.learning_rate)
			training_hooks = [
				EpsilonPrintingTrainingHook(ledger)
			]
			opt_loss = vector_loss
		else:
			optimizer = tf.train.GradientDescentOptimizer(learning_rate=FLAGS.learning_rate)
			#train_op  = optimizer.minimize(scalar_loss,
			#	global_step=tf.train.get_global_step())
			opt_loss = scalar_loss
			training_hooks = []
		global_step = tf.train.get_global_step()		
		train_op = optimizer.minimize(loss=opt_loss,
			global_step=global_step)
		return tf.estimator.EstimatorSpec(mode=mode,
			loss=scalar_loss,
			train_op=train_op,
			training_hooks=training_hooks)
	elif mode == tf.estimator.ModeKeys.EVAL:
		# pred_probas  = tf.nn.softmax(logits) # should I remove this ?
		pred_classes = tf.argmax(logits, axis=1)
		acc_op = tf.metrics.accuracy(labels=labels, predictions=pred_classes)
		return tf.estimator.EstimatorSpec(mode=mode,
			loss=scalar_loss,
			eval_metric_ops={'accuracy':acc_op})
示例#7
0
  def test_ledger(self):
    record1 = tf.constant([8.5])
    record2 = tf.constant([-7.25])

    population_size = tf.Variable(0)
    selection_probability = tf.Variable(0.0)
    ledger = privacy_ledger.PrivacyLedger(
        population_size, selection_probability, 50, 50)

    query = quantile_adaptive_clip_sum_query.QuantileAdaptiveClipSumQuery(
        initial_l2_norm_clip=10.0,
        noise_multiplier=1.0,
        target_unclipped_quantile=0.0,
        learning_rate=1.0,
        clipped_count_stddev=0.0,
        expected_num_records=2.0,
        ledger=ledger)

    query = privacy_ledger.QueryWithLedger(query, ledger)

    # First sample.
    tf.assign(population_size, 10)
    tf.assign(selection_probability, 0.1)
    _, global_state = test_utils.run_query(query, [record1, record2])

    expected_queries = [[10.0, 10.0], [0.5, 0.0]]
    formatted = ledger.get_formatted_ledger_eager()
    sample_1 = formatted[0]
    self.assertAllClose(sample_1.population_size, 10.0)
    self.assertAllClose(sample_1.selection_probability, 0.1)
    self.assertAllClose(sample_1.queries, expected_queries)

    # Second sample.
    tf.assign(population_size, 20)
    tf.assign(selection_probability, 0.2)
    test_utils.run_query(query, [record1, record2], global_state)

    formatted = ledger.get_formatted_ledger_eager()
    sample_1, sample_2 = formatted
    self.assertAllClose(sample_1.population_size, 10.0)
    self.assertAllClose(sample_1.selection_probability, 0.1)
    self.assertAllClose(sample_1.queries, expected_queries)

    expected_queries_2 = [[9.0, 9.0], [0.5, 0.0]]
    self.assertAllClose(sample_2.population_size, 20.0)
    self.assertAllClose(sample_2.selection_probability, 0.2)
    self.assertAllClose(sample_2.queries, expected_queries_2)
示例#8
0
    def test_nested_query(self):
        population_size = tf.Variable(0)
        selection_probability = tf.Variable(0.0)
        ledger = privacy_ledger.PrivacyLedger(population_size,
                                              selection_probability, 50, 50)

        query1 = gaussian_query.GaussianAverageQuery(l2_norm_clip=4.0,
                                                     sum_stddev=2.0,
                                                     denominator=5.0,
                                                     ledger=ledger)
        query2 = gaussian_query.GaussianAverageQuery(l2_norm_clip=5.0,
                                                     sum_stddev=1.0,
                                                     denominator=5.0,
                                                     ledger=ledger)

        query = nested_query.NestedQuery([query1, query2])
        query = privacy_ledger.QueryWithLedger(query, ledger)

        record1 = [1.0, [12.0, 9.0]]
        record2 = [5.0, [1.0, 2.0]]

        # First sample.
        tf.assign(population_size, 10)
        tf.assign(selection_probability, 0.1)
        test_utils.run_query(query, [record1, record2])

        expected_queries = [[4.0, 2.0], [5.0, 1.0]]
        formatted = ledger.get_formatted_ledger_eager()
        sample_1 = formatted[0]
        self.assertAllClose(sample_1.population_size, 10.0)
        self.assertAllClose(sample_1.selection_probability, 0.1)
        self.assertAllClose(sorted(sample_1.queries), sorted(expected_queries))

        # Second sample.
        tf.assign(population_size, 20)
        tf.assign(selection_probability, 0.2)
        test_utils.run_query(query, [record1, record2])

        formatted = ledger.get_formatted_ledger_eager()
        sample_1, sample_2 = formatted
        self.assertAllClose(sample_1.population_size, 10.0)
        self.assertAllClose(sample_1.selection_probability, 0.1)
        self.assertAllClose(sorted(sample_1.queries), sorted(expected_queries))

        self.assertAllClose(sample_2.population_size, 20.0)
        self.assertAllClose(sample_2.selection_probability, 0.2)
        self.assertAllClose(sorted(sample_2.queries), sorted(expected_queries))
示例#9
0
        def linear_model_fn(features, labels, mode):
            preds = tf.keras.layers.Dense(1, activation='linear',
                                          name='dense').apply(features['x'])

            vector_loss = tf.squared_difference(labels, preds)
            scalar_loss = tf.reduce_mean(vector_loss)
            ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6, 500, 500)
            dp_average_query = gaussian_query.GaussianAverageQuery(1.0, 0.0, 1)
            dp_average_query = privacy_ledger.QueryWithLedger(
                dp_average_query, ledger)
            optimizer = dp_optimizer.DPGradientDescentOptimizer(
                dp_average_query, num_microbatches=1, learning_rate=1.0)
            global_step = tf.train.get_global_step()
            train_op = optimizer.minimize(loss=vector_loss,
                                          global_step=global_step)
            return tf.estimator.EstimatorSpec(mode=mode,
                                              loss=scalar_loss,
                                              train_op=train_op)
示例#10
0
  def testClippingNorm(self, cls):
    with self.cached_session() as sess:
      var0 = tf.Variable([0.0, 0.0])
      data0 = tf.Variable([[3.0, 4.0], [6.0, 8.0]])

      ledger = privacy_ledger.PrivacyLedger(1e6, 1 / 1e6, 50, 50)
      dp_average_query = gaussian_query.GaussianAverageQuery(1.0, 0.0, 1)
      dp_average_query = privacy_ledger.QueryWithLedger(
          dp_average_query, ledger)

      opt = cls(dp_average_query, num_microbatches=1, learning_rate=2.0)

      self.evaluate(tf.global_variables_initializer())
      # Fetch params to validate initial values
      self.assertAllClose([0.0, 0.0], self.evaluate(var0))

      # Expected gradient is sum of differences.
      gradient_op = opt.compute_gradients(loss(data0, var0), [var0])
      grads_and_vars = sess.run(gradient_op)
      self.assertAllCloseAccordingToType([-0.6, -0.8], grads_and_vars[0][0])
示例#11
0
    def test_sum_query(self):
        record1 = tf.constant([2.0, 0.0])
        record2 = tf.constant([-1.0, 1.0])

        population_size = tf.Variable(0)
        selection_probability = tf.Variable(0.0)
        ledger = privacy_ledger.PrivacyLedger(population_size,
                                              selection_probability, 50, 50)

        query = gaussian_query.GaussianSumQuery(l2_norm_clip=10.0,
                                                stddev=0.0,
                                                ledger=ledger)
        query = privacy_ledger.QueryWithLedger(query, ledger)

        # First sample.
        tf.assign(population_size, 10)
        tf.assign(selection_probability, 0.1)
        test_utils.run_query(query, [record1, record2])

        expected_queries = [[10.0, 0.0]]
        formatted = ledger.get_formatted_ledger_eager()
        sample_1 = formatted[0]
        self.assertAllClose(sample_1.population_size, 10.0)
        self.assertAllClose(sample_1.selection_probability, 0.1)
        self.assertAllClose(sample_1.queries, expected_queries)

        # Second sample.
        tf.assign(population_size, 20)
        tf.assign(selection_probability, 0.2)
        test_utils.run_query(query, [record1, record2])

        formatted = ledger.get_formatted_ledger_eager()
        sample_1, sample_2 = formatted
        self.assertAllClose(sample_1.population_size, 10.0)
        self.assertAllClose(sample_1.selection_probability, 0.1)
        self.assertAllClose(sample_1.queries, expected_queries)

        self.assertAllClose(sample_2.population_size, 20.0)
        self.assertAllClose(sample_2.selection_probability, 0.2)
        self.assertAllClose(sample_2.queries, expected_queries)
示例#12
0
def cnn_model_fn(features, labels):
    """Model function for a CNN."""

    # Define CNN architecture using tf.keras.layers.
    if FLAGS.dataset == "mnist":
        input_layer = tf.reshape(features, [-1, 28, 28, 1])
    elif FLAGS.dataset == "cifar10":
        input_layer = features
        # input_layer = tf.reshape(features, [-1, 32, 32, 3])
    elif FLAGS.dataset == "svhn":
        input_layer = tf.reshape(features, [-1, 32, 32, 3])

    # y = tf.keras.layers.Conv2D(16, 8,
    #                            strides=2,
    #                            padding='same',
    #                            activation='relu').apply(input_layer)
    # y = tf.keras.layers.MaxPool2D(2, 1).apply(y)
    # y = tf.keras.layers.Conv2D(32, 4,
    #                            strides=2,
    #                            padding='valid',
    #                            activation='relu').apply(y)
    # y = tf.keras.layers.MaxPool2D(2, 1).apply(y)
    # y = tf.keras.layers.Flatten().apply(y)
    # y = tf.keras.layers.Dense(32, activation='relu').apply(y)

    if FLAGS.model == "trival":
        logits = trival(input_layer=input_layer)
    elif FLAGS.model == "deep":
        logits = deep(input_layer=input_layer)
        # input_layer = tf.reshape(features, [-1, 32, 32, 3])
    elif FLAGS.model == "letnet":
        logits = trival(input_layer=input_layer)

    # Calculate accuracy.
    correct_pred = tf.equal(tf.argmax(logits, 1), labels)
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

    # Calculate loss as a vector (to support microbatches in DP-SGD).
    vector_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels,
                                                                 logits=logits)
    # Define mean of loss across minibatch (for reporting through tf.Estimator).
    scalar_loss = tf.reduce_mean(vector_loss)

    if FLAGS.dpsgd:
        ledger = privacy_ledger.PrivacyLedger(
            population_size=60000,
            selection_probability=(FLAGS.batch_size / 60000))

        # Use DP version of GradientDescentOptimizer. Other optimizers are
        # available in dp_optimizer. Most optimizers inheriting from
        # tf.train.Optimizer should be wrappable in differentially private
        # counterparts by calling dp_optimizer.optimizer_from_args().
        if FLAGS.method == 'sgd':
            optimizer = dp_optimizer.DPGradientDescentGaussianOptimizer(
                l2_norm_clip=FLAGS.l2_norm_clip,
                noise_multiplier=FLAGS.noise_multiplier,
                num_microbatches=FLAGS.microbatches,
                ledger=ledger,
                learning_rate=FLAGS.learning_rate)
        elif FLAGS.method == 'adam':
            optimizer = dp_optimizer.DPAdamGaussianOptimizer(
                l2_norm_clip=FLAGS.l2_norm_clip,
                noise_multiplier=FLAGS.noise_multiplier,
                num_microbatches=FLAGS.microbatches,
                ledger=ledger,
                learning_rate=FLAGS.learning_rate,
                unroll_microbatches=True)
        elif FLAGS.method == 'adagrad':
            optimizer = dp_optimizer.DPAdagradGaussianOptimizer(
                l2_norm_clip=FLAGS.l2_norm_clip,
                noise_multiplier=FLAGS.noise_multiplier,
                num_microbatches=FLAGS.microbatches,
                ledger=ledger,
                learning_rate=FLAGS.learning_rate)
        elif FLAGS.method == 'momentum':
            optimizer = dp_optimizer.DPMomentumGaussianOptimizer(
                l2_norm_clip=FLAGS.l2_norm_clip,
                noise_multiplier=FLAGS.noise_multiplier,
                num_microbatches=FLAGS.microbatches,
                ledger=ledger,
                learning_rate=FLAGS.learning_rate,
                momentum=FLAGS.momentum,
                use_nesterov=FLAGS.use_nesterov)

        else:
            raise ValueError(
                'method must be sgd or adam or adagrad or momentum')
        opt_loss = vector_loss
    else:
        if FLAGS.method == 'sgd':
            optimizer = GradientDescentOptimizer(
                learning_rate=FLAGS.learning_rate)
        elif FLAGS.method == 'adam':
            optimizer = AdamOptimizer(learning_rate=FLAGS.learning_rate)
        elif FLAGS.method == 'adagrad':
            optimizer = AdagradOptimizer(learning_rate=FLAGS.learning_rate)
        elif FLAGS.method == 'momentum':
            optimizer = MomentumOptimizer(learning_rate=FLAGS.learning_rate,
                                          momentum=FLAGS.momentum,
                                          use_nesterov=FLAGS.use_nesterov)
        else:
            raise ValueError(
                'method must be sgd or adam or adagrad or momentum')
        opt_loss = scalar_loss
    global_step = tf.train.get_global_step()
    train_op = optimizer.minimize(loss=opt_loss, global_step=global_step)
    # In the following, we pass the mean of the loss (scalar_loss) rather than
    # the vector_loss because tf.estimator requires a scalar loss. This is only
    # used for evaluation and debugging by tf.estimator. The actual loss being
    # minimized is opt_loss defined above and passed to optimizer.minimize().
    return train_op, scalar_loss, accuracy
示例#13
0
def cnn_model_fn(features, labels, mode):
  """Model function for a CNN."""

  # Define CNN architecture using tf.keras.layers.
  input_layer = tf.reshape(features['x'], [-1, 28, 28, 1])
  y = tf.keras.layers.Conv2D(16, 8,
                             strides=2,
                             padding='same',
                             activation='relu').apply(input_layer)
  y = tf.keras.layers.MaxPool2D(2, 1).apply(y)
  y = tf.keras.layers.Conv2D(32, 4,
                             strides=2,
                             padding='valid',
                             activation='relu').apply(y)
  y = tf.keras.layers.MaxPool2D(2, 1).apply(y)
  y = tf.keras.layers.Flatten().apply(y)
  y = tf.keras.layers.Dense(32, activation='relu').apply(y)
  logits = tf.keras.layers.Dense(10).apply(y)

  # Calculate loss as a vector (to support microbatches in DP-SGD).
  vector_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
      labels=labels, logits=logits)
  # Define mean of loss across minibatch (for reporting through tf.Estimator).
  scalar_loss = tf.reduce_mean(vector_loss)

  # Configure the training op (for TRAIN mode).
  if mode == tf.estimator.ModeKeys.TRAIN:

    if FLAGS.dpsgd:
      ledger = privacy_ledger.PrivacyLedger(
          population_size=60000,
          selection_probability=(FLAGS.batch_size / 60000))

      # Use DP version of GradientDescentOptimizer. Other optimizers are
      # available in dp_optimizer. Most optimizers inheriting from
      # tf.train.Optimizer should be wrappable in differentially private
      # counterparts by calling dp_optimizer.optimizer_from_args().
      optimizer = dp_optimizer.DPGradientDescentGaussianOptimizer(
          l2_norm_clip=FLAGS.l2_norm_clip,
          noise_multiplier=FLAGS.noise_multiplier,
          num_microbatches=FLAGS.microbatches,
          ledger=ledger,
          learning_rate=FLAGS.learning_rate)
      training_hooks = [
          EpsilonPrintingTrainingHook(ledger)
      ]
      opt_loss = vector_loss
    else:
      optimizer = GradientDescentOptimizer(learning_rate=FLAGS.learning_rate)
      training_hooks = []
      opt_loss = scalar_loss
    global_step = tf.train.get_global_step()
    train_op = optimizer.minimize(loss=opt_loss, global_step=global_step)
    # In the following, we pass the mean of the loss (scalar_loss) rather than
    # the vector_loss because tf.estimator requires a scalar loss. This is only
    # used for evaluation and debugging by tf.estimator. The actual loss being
    # minimized is opt_loss defined above and passed to optimizer.minimize().
    return tf.estimator.EstimatorSpec(mode=mode,
                                      loss=scalar_loss,
                                      train_op=train_op,
                                      training_hooks=training_hooks)

  # Add evaluation metrics (for EVAL mode).
  elif mode == tf.estimator.ModeKeys.EVAL:
    eval_metric_ops = {
        'accuracy':
            tf.metrics.accuracy(
                labels=labels,
                predictions=tf.argmax(input=logits, axis=1))
    }

    return tf.estimator.EstimatorSpec(mode=mode,
                                      loss=scalar_loss,
                                      eval_metric_ops=eval_metric_ops)
示例#14
0
def cnn_model_fn(features, labels, mode):
  """Model function for a CNN."""

  # Define CNN architecture using tf.keras.layers.
  input_layer = tf.reshape(features['x'], [-1, 28, 28, 1])
  y = tf.keras.layers.Conv2D(16, 8,
                             strides=2,
                             padding='same',
                             activation='relu').apply(input_layer)
  y = tf.keras.layers.MaxPool2D(2, 1).apply(y)
  y = tf.keras.layers.Conv2D(32, 4,
                             strides=2,
                             padding='valid',
                             activation='relu').apply(y)
  y = tf.keras.layers.MaxPool2D(2, 1).apply(y)
  y = tf.keras.layers.Flatten().apply(y)
  y = tf.keras.layers.Dense(32, activation='relu').apply(y)
  logits = tf.keras.layers.Dense(10).apply(y)

  # Calculate loss as a vector and as its average across minibatch.
  vector_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels,
                                                               logits=logits)
  scalar_loss = tf.reduce_mean(vector_loss)

  # Configure the training op (for TRAIN mode).
  if mode == tf.estimator.ModeKeys.TRAIN:
    # optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate)
    # opt_loss = scalar_loss
    # global_step = tf.train.get_global_step()
    # train_op = optimizer.minimize(loss=opt_loss, global_step=global_step)


    ledger = privacy_ledger.PrivacyLedger(
        population_size=60000,
        selection_probability=(FLAGS.batch_size / 60000),
        max_samples=1e6,
        max_queries=1e6)

    optimizer = optimizers.dp_optimizer.DPGradientDescentGaussianOptimizer(
        l2_norm_clip=FLAGS.l2_norm_clip,
        noise_multiplier=FLAGS.noise_multiplier,
        num_microbatches=FLAGS.microbatches,
        ledger=ledger,
        learning_rate=FLAGS.learning_rate 
        )   # population_size=60000
    train_op = optimizer.minimize(loss=vector_loss)

    return tf.estimator.EstimatorSpec(mode=mode,
                                      loss=scalar_loss,
                                      train_op=train_op)

  # Add evaluation metrics (for EVAL mode).
  elif mode == tf.estimator.ModeKeys.EVAL:
    eval_metric_ops = {
        'accuracy':
            tf.metrics.accuracy(
                labels=labels,
                predictions=tf.argmax(input=logits, axis=1))
    }
    return tf.estimator.EstimatorSpec(mode=mode,
                                      loss=scalar_loss,
                                      eval_metric_ops=eval_metric_ops)
示例#15
0
    def __init__(self,
                 sequence_length,
                 num_classes,
                 vocab_size,
                 dis_emb_dim,
                 d_rate,
                 noise_multiplier,
                 l2_norm_clip,
                 population_size,
                 delta,
                 num_microbatches,
                 filter_sizes,
                 num_filters,
                 batch_size,
                 hidden_dim,
                 start_token,
                 goal_out_size,
                 step_size,
                 l2_reg_lambda=0.0):
        self.sequence_length = sequence_length
        self.num_classes = num_classes
        self.vocab_size = vocab_size
        self.dis_emb_dim = dis_emb_dim
        self.filter_sizes = filter_sizes
        self.num_filters = num_filters
        self.batch_size = batch_size
        self.hidden_dim = hidden_dim
        self.start_token = tf.constant([start_token] * self.batch_size,
                                       dtype=tf.int32)
        self.l2_reg_lambda = l2_reg_lambda
        self.num_filters_total = sum(self.num_filters)
        self.temperature = 1.0
        self.grad_clip = 5.0  #Does not apply to d_optimizer
        self.goal_out_size = goal_out_size
        self.step_size = step_size

        self.D_input_y = tf.placeholder(tf.float32, [None, num_classes],
                                        name="input_y")
        self.D_input_x = tf.placeholder(tf.int32, [None, sequence_length],
                                        name="input_x")
        self.dropout_keep_prob = tf.placeholder(tf.float32,
                                                name="dropout_keep_prob")

        self.d_rate = d_rate
        self.l2_norm_clip = l2_norm_clip
        self.noise_multiplier = noise_multiplier
        self.num_microbatches = num_microbatches
        self.population_size = population_size
        self.delta = delta

        with tf.name_scope('D_update'):
            self.D_l2_loss = tf.constant(0.0)
            self.FeatureExtractor_unit = self.FeatureExtractor()

            # Train for Discriminator
            with tf.variable_scope("feature") as self.feature_scope:
                D_feature = self.FeatureExtractor_unit(
                    self.D_input_x,
                    self.dropout_keep_prob)  #,self.dropout_keep_prob)
                self.feature_scope.reuse_variables()
            # tf.get_variable_scope().reuse_variables()

            D_scores, D_predictions, self.ypred_for_auc = self.classification(
                D_feature)
            losses = tf.nn.softmax_cross_entropy_with_logits(
                logits=D_scores, labels=self.D_input_y)
            self.D_loss = tf.reduce_mean(
                losses) + self.l2_reg_lambda * self.D_l2_loss

            self.D_params = [
                param for param in tf.trainable_variables()
                if 'Discriminator' or 'FeatureExtractor' in param.name
            ]

            self.ledger = privacy_ledger.PrivacyLedger(
                population_size=self.population_size,
                selection_probability=(self.batch_size / self.population_size))

            d_optimizer = dp_optimizer.DPAdamGaussianOptimizer(
                l2_norm_clip=self.l2_norm_clip,
                noise_multiplier=self.noise_multiplier,
                num_microbatches=self.num_microbatches,
                ledger=self.ledger,
                learning_rate=self.d_rate)

            D_grads_and_vars = d_optimizer.compute_gradients(
                self.D_loss, self.D_params, aggregation_method=2)
            self.D_train_op = d_optimizer.apply_gradients(D_grads_and_vars)
示例#16
0
 def test_fail_on_probability_zero(self):
   with self.assertRaisesRegexp(ValueError,
                                'Selection probability cannot be 0.'):
     privacy_ledger.PrivacyLedger(10, 0)
示例#17
0
    def __init__(self,
                 sequence_length,
                 num_classes,
                 vocab_size,
                 emb_dim,
                 dis_emb_dim,
                 noise_multiplier,
                 l2_norm_clip,
                 population_size,
                 delta,
                 num_microbatches,
                 filter_sizes,
                 num_filters,
                 batch_size,
                 hidden_dim,
                 start_token,
                 goal_out_size,
                 goal_size,
                 step_size,
                 D_model,
                 LSTMlayer_num=1,
                 l2_reg_lambda=0.0,
                 learning_rate=0.001):
        self.sequence_length = sequence_length
        self.num_classes = num_classes
        self.vocab_size = vocab_size
        self.emb_dim = emb_dim
        self.dis_emb_dim = dis_emb_dim

        self.noise_multiplier = noise_multiplier
        self.l2_norm_clip = l2_norm_clip
        self.population_size = population_size
        self.delta = delta
        self.num_microbatches = num_microbatches

        self.filter_sizes = filter_sizes
        self.num_filters = num_filters
        self.batch_size = batch_size
        self.hidden_dim = hidden_dim
        self.start_token = tf.constant([start_token] * self.batch_size,
                                       dtype=tf.int32)
        self.LSTMlayer_num = LSTMlayer_num
        self.l2_reg_lambda = l2_reg_lambda
        self.learning_rate = learning_rate
        self.num_filters_total = sum(self.num_filters)
        self.grad_clip = 5.0
        self.goal_out_size = goal_out_size
        self.goal_size = goal_size
        self.step_size = step_size
        self.D_model = D_model
        self.FeatureExtractor_unit = self.D_model.FeatureExtractor_unit

        self.scope = self.D_model.feature_scope
        self.worker_params = []
        self.manager_params = []

        self.epis = 0.65
        self.tem = 0.8
        with tf.variable_scope('place_holder'):
            self.x = tf.placeholder(
                tf.int32,
                shape=[self.batch_size, self.sequence_length
                       ])  # sequence of tokens generated by generator
            self.reward = tf.placeholder(
                tf.float32,
                shape=[self.batch_size, self.sequence_length / self.step_size
                       ])  # sequence of tokens generated by generator
            self.given_num = tf.placeholder(tf.int32)
            self.drop_out = tf.placeholder(tf.float32,
                                           name="dropout_keep_prob")
            self.train = tf.placeholder(tf.int32, None, name="train")

        with tf.variable_scope('Worker'):
            self.g_embeddings = tf.Variable(
                tf.random_normal([self.vocab_size, self.emb_dim], stddev=0.1))
            self.worker_params.append(self.g_embeddings)
            self.g_worker_recurrent_unit = self.create_Worker_recurrent_unit(
                self.worker_params)  # maps h_tm1 to h_t for generator
            self.g_worker_output_unit = self.create_Worker_output_unit(
                self.worker_params)  # maps h_t to o_t (output token logits)
            self.W_workerOut_change = tf.Variable(
                tf.random_normal([self.vocab_size, self.goal_size],
                                 stddev=0.1))

            self.g_change = tf.Variable(
                tf.random_normal([self.goal_out_size, self.goal_size],
                                 stddev=0.1))
            self.worker_params.extend([self.W_workerOut_change, self.g_change])

            self.h0_worker = tf.zeros([self.batch_size, self.hidden_dim])
            self.h0_worker = tf.stack([self.h0_worker, self.h0_worker])

        with tf.variable_scope('Manager'):
            self.g_manager_recurrent_unit = self.create_Manager_recurrent_unit(
                self.manager_params)  # maps h_tm1 to h_t for generator
            self.g_manager_output_unit = self.create_Manager_output_unit(
                self.manager_params)  # maps h_t to o_t (output token logits)
            self.h0_manager = tf.zeros([self.batch_size, self.hidden_dim])
            self.h0_manager = tf.stack([self.h0_manager, self.h0_manager])

            self.goal_init = tf.get_variable(
                "goal_init",
                initializer=tf.truncated_normal(
                    [self.batch_size, self.goal_out_size], stddev=0.1))
            self.manager_params.extend([self.goal_init])

        self.padding_array = tf.constant(
            -1, shape=[self.batch_size, self.sequence_length], dtype=tf.int32)

        with tf.name_scope("roll_out"):
            self.gen_for_reward = self.rollout(self.x, self.given_num)

        # processed for batch
        with tf.device("/cpu:0"):
            self.processed_x = tf.transpose(
                tf.nn.embedding_lookup(self.g_embeddings, self.x),
                perm=[1, 0, 2])  # seq_length x batch_size x emb_dim

        gen_o = tensor_array_ops.TensorArray(dtype=tf.float32,
                                             size=self.sequence_length,
                                             dynamic_size=False,
                                             infer_shape=True)
        gen_x = tensor_array_ops.TensorArray(dtype=tf.int32,
                                             size=1,
                                             dynamic_size=True,
                                             infer_shape=True,
                                             clear_after_read=False)

        goal = tensor_array_ops.TensorArray(dtype=tf.float32,
                                            size=self.sequence_length,
                                            dynamic_size=False,
                                            infer_shape=True,
                                            clear_after_read=False)

        feature_array = tensor_array_ops.TensorArray(
            dtype=tf.float32,
            size=self.sequence_length + 1,
            dynamic_size=False,
            infer_shape=True,
            clear_after_read=False)
        real_goal_array = tensor_array_ops.TensorArray(
            dtype=tf.float32,
            size=self.sequence_length / self.step_size,
            dynamic_size=False,
            infer_shape=True,
            clear_after_read=False)

        gen_real_goal_array = tensor_array_ops.TensorArray(
            dtype=tf.float32,
            size=self.sequence_length,
            dynamic_size=False,
            infer_shape=True,
            clear_after_read=False)

        gen_o_worker_array = tensor_array_ops.TensorArray(
            dtype=tf.float32,
            size=self.sequence_length / self.step_size,
            dynamic_size=False,
            infer_shape=True,
            clear_after_read=False)

        def _g_recurrence(i, x_t, h_tm1, h_tm1_manager, gen_o, gen_x, goal,
                          last_goal, real_goal, step_size, gen_real_goal_array,
                          gen_o_worker_array):
            ## padding sentence by -1
            cur_sen = tf.cond(
                i > 0, lambda: tf.split(
                    tf.concat([
                        tf.transpose(gen_x.stack(), perm=[1, 0]), self.
                        padding_array
                    ], 1), [self.sequence_length, i], 1)[0],
                lambda: self.padding_array)
            with tf.variable_scope(self.scope):
                feature = self.FeatureExtractor_unit(cur_sen, self.drop_out)
            h_t_Worker = self.g_worker_recurrent_unit(
                x_t, h_tm1)  # hidden_memory_tuple
            o_t_Worker = self.g_worker_output_unit(
                h_t_Worker)  # batch x vocab , logits not prob
            o_t_Worker = tf.reshape(
                o_t_Worker, [self.batch_size, self.vocab_size, self.goal_size])

            h_t_manager = self.g_manager_recurrent_unit(feature, h_tm1_manager)
            sub_goal = self.g_manager_output_unit(h_t_manager)
            sub_goal = tf.nn.l2_normalize(sub_goal, 1)
            goal = goal.write(i, sub_goal)

            real_sub_goal = tf.add(last_goal, sub_goal)

            w_g = tf.matmul(real_goal, self.g_change)  #batch x goal_size
            w_g = tf.nn.l2_normalize(w_g, 1)
            gen_real_goal_array = gen_real_goal_array.write(i, real_goal)

            w_g = tf.expand_dims(w_g, 2)  #batch x goal_size x 1

            gen_o_worker_array = gen_o_worker_array.write(i, o_t_Worker)

            x_logits = tf.matmul(o_t_Worker, w_g)
            x_logits = tf.squeeze(x_logits)

            log_prob = tf.log(
                tf.nn.softmax(
                    tf.cond(
                        i > 1, lambda: tf.cond(self.train > 0, lambda: self.
                                               tem, lambda: 1.5), lambda: 1.5)
                    * x_logits))
            next_token = tf.cast(
                tf.reshape(tf.multinomial(log_prob, 1), [self.batch_size]),
                tf.int32)
            x_tp1 = tf.nn.embedding_lookup(self.g_embeddings,
                                           next_token)  # batch x emb_dim
            with tf.control_dependencies([cur_sen]):
                gen_x = gen_x.write(i, next_token)  # indices, batch_size
            gen_o = gen_o.write(i,
                                tf.reduce_sum(
                                    tf.multiply(
                                        tf.one_hot(next_token, self.vocab_size,
                                                   1.0, 0.0),
                                        tf.nn.softmax(x_logits)),
                                    1))  # [batch_size] , prob
            return i+1,x_tp1,h_t_Worker,h_t_manager,gen_o,gen_x,goal,\
                   tf.cond(((i+1)%step_size)>0,lambda:real_sub_goal,lambda :tf.constant(0.0,shape=[self.batch_size,self.goal_out_size]))\
                    ,tf.cond(((i+1)%step_size)>0,lambda :real_goal,lambda :real_sub_goal),step_size,gen_real_goal_array,gen_o_worker_array

        _, _, _, _, self.gen_o, self.gen_x, _, _, _, _, self.gen_real_goal_array, self.gen_o_worker_array = control_flow_ops.while_loop(
            cond=lambda i, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11: i <
            self.sequence_length,
            body=_g_recurrence,
            loop_vars=(tf.constant(0, dtype=tf.int32),
                       tf.nn.embedding_lookup(self.g_embeddings,
                                              self.start_token),
                       self.h0_worker, self.h0_manager, gen_o, gen_x, goal,
                       tf.zeros([self.batch_size,
                                 self.goal_out_size]), self.goal_init,
                       step_size, gen_real_goal_array, gen_o_worker_array),
            parallel_iterations=1)

        self.gen_x = self.gen_x.stack()  # seq_length x batch_size

        self.gen_x = tf.transpose(self.gen_x,
                                  perm=[1, 0])  # batch_size x seq_length

        self.gen_real_goal_array = self.gen_real_goal_array.stack(
        )  # seq_length x batch_size x goal

        self.gen_real_goal_array = tf.transpose(
            self.gen_real_goal_array,
            perm=[1, 0, 2])  # batch_size x seq_length x goal

        self.gen_o_worker_array = self.gen_o_worker_array.stack(
        )  # seq_length x batch_size* vocab*goal

        self.gen_o_worker_array = tf.transpose(
            self.gen_o_worker_array,
            perm=[1, 0, 2, 3])  # batch_size x seq_length * vocab*goal

        sub_feature = tensor_array_ops.TensorArray(dtype=tf.float32,
                                                   size=self.sequence_length /
                                                   self.step_size,
                                                   dynamic_size=False,
                                                   infer_shape=True,
                                                   clear_after_read=False)

        all_sub_features = tensor_array_ops.TensorArray(
            dtype=tf.float32,
            size=self.sequence_length,
            dynamic_size=False,
            infer_shape=True,
            clear_after_read=False)
        all_sub_goals = tensor_array_ops.TensorArray(dtype=tf.float32,
                                                     size=self.sequence_length,
                                                     dynamic_size=False,
                                                     infer_shape=True,
                                                     clear_after_read=False)

        # supervised pretraining for generator
        g_predictions = tensor_array_ops.TensorArray(dtype=tf.float32,
                                                     size=self.sequence_length,
                                                     dynamic_size=False,
                                                     infer_shape=True)
        ta_emb_x = tensor_array_ops.TensorArray(dtype=tf.float32,
                                                size=self.sequence_length)
        ta_emb_x = ta_emb_x.unstack(self.processed_x)

        def preTrain(i, x_t, g_predictions, h_tm1, input_x, h_tm1_manager,
                     last_goal, real_goal, feature_array, real_goal_array,
                     sub_feature, all_sub_features, all_sub_goals):
            ## padding sentence by -1
            cur_sen = tf.split(
                tf.concat([
                    tf.split(input_x, [i, self.sequence_length - i], 1)[0],
                    self.padding_array
                ], 1), [self.sequence_length, i], 1)[0]  #padding sentence
            with tf.variable_scope(self.scope):
                feature = self.FeatureExtractor_unit(cur_sen, self.drop_out)
            feature_array = feature_array.write(i, feature)

            real_goal_array = tf.cond(
                i > 0, lambda: real_goal_array,
                lambda: real_goal_array.write(0, self.goal_init))
            h_t_manager = self.g_manager_recurrent_unit(feature, h_tm1_manager)
            sub_goal = self.g_manager_output_unit(h_t_manager)
            sub_goal = tf.nn.l2_normalize(sub_goal, 1)

            h_t_Worker = tf.cond(
                i > 0, lambda: self.g_worker_recurrent_unit(x_t, h_tm1),
                lambda: h_tm1)  # hidden_memory_tuple
            o_t_Worker = self.g_worker_output_unit(
                h_t_Worker)  # batch x vocab , logits not prob
            o_t_Worker = tf.reshape(
                o_t_Worker, [self.batch_size, self.vocab_size, self.goal_size])

            real_sub_goal = tf.cond(i > 0, lambda: tf.add(last_goal, sub_goal),
                                    lambda: real_goal)
            all_sub_goals = tf.cond(
                i > 0, lambda: all_sub_goals.write(i - 1, real_goal),
                lambda: all_sub_goals)

            w_g = tf.matmul(real_goal, self.g_change)  # batch x goal_size
            w_g = tf.nn.l2_normalize(w_g, 1)
            w_g = tf.expand_dims(w_g, 2)  # batch x goal_size x 1

            x_logits = tf.matmul(o_t_Worker, w_g)
            x_logits = tf.squeeze(x_logits)

            g_predictions = tf.cond(
                i > 0,
                lambda: g_predictions.write(i - 1, tf.nn.softmax(x_logits)),
                lambda: g_predictions)

            sub_feature = tf.cond(
                ((((i) % step_size) > 0)), lambda: sub_feature, lambda:
                (tf.cond(
                    i > 0, lambda: sub_feature.write(
                        i / step_size - 1,
                        tf.subtract(feature, feature_array.read(i - step_size))
                    ), lambda: sub_feature)))

            all_sub_features = tf.cond(i > 0,lambda: tf.cond((i % step_size) > 0, lambda :all_sub_features.write(i-1,tf.subtract(feature,feature_array.read(i-i%step_size))),\
                                                                                     lambda :all_sub_features.write(i-1,tf.subtract(feature,feature_array.read(i-step_size)))),
                                            lambda : all_sub_features)

            real_goal_array = tf.cond(
                ((i) % step_size) > 0, lambda: real_goal_array,
                lambda: tf.cond(
                    (i) / step_size < self.sequence_length / step_size, lambda:
                    tf.cond(
                        i > 0, lambda: real_goal_array.write(
                            (i) / step_size, real_sub_goal), lambda:
                        real_goal_array), lambda: real_goal_array))
            x_tp1 = tf.cond(i > 0, lambda: ta_emb_x.read(i - 1), lambda: x_t)

            return i+1, x_tp1, g_predictions, h_t_Worker, input_x, h_t_manager,\
                   tf.cond(((i)%step_size)>0,lambda:real_sub_goal,lambda :tf.constant(0.0,shape=[self.batch_size,self.goal_out_size])) ,\
                    tf.cond(((i) % step_size) > 0, lambda: real_goal, lambda: real_sub_goal),\
                   feature_array,real_goal_array,sub_feature,all_sub_features,all_sub_goals

        _, _, self.g_predictions, _, _, _, _, _, self.feature_array, self.real_goal_array, self.sub_feature, self.all_sub_features, self.all_sub_goals = control_flow_ops.while_loop(
            cond=lambda i, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12: i
            < self.sequence_length + 1,
            body=preTrain,
            loop_vars=(tf.constant(0, dtype=tf.int32),
                       tf.nn.embedding_lookup(self.g_embeddings,
                                              self.start_token), g_predictions,
                       self.h0_worker, self.x, self.h0_manager,
                       tf.zeros([self.batch_size, self.goal_out_size]),
                       self.goal_init, feature_array, real_goal_array,
                       sub_feature, all_sub_features, all_sub_goals),
            parallel_iterations=1)

        self.sub_feature = self.sub_feature.stack(
        )  # seq_length x batch_size x num_filter
        self.sub_feature = tf.transpose(self.sub_feature, perm=[1, 0, 2])

        self.real_goal_array = self.real_goal_array.stack()
        self.real_goal_array = tf.transpose(self.real_goal_array,
                                            perm=[1, 0, 2])
        print self.real_goal_array.shape
        print self.sub_feature.shape
        self.pretrain_goal_loss = -tf.reduce_sum(1 - tf.losses.cosine_distance(
            tf.nn.l2_normalize(self.sub_feature, 2),
            tf.nn.l2_normalize(self.real_goal_array, 2), 2)) / (
                self.sequence_length * self.batch_size / self.step_size)

        with tf.name_scope("Manager_PreTrain_update"):
            pretrain_manager_opt = tf.train.AdamOptimizer(self.learning_rate)

            self.pretrain_manager_grad, _ = tf.clip_by_global_norm(
                tf.gradients(self.pretrain_goal_loss, self.manager_params),
                self.grad_clip)
            self.pretrain_manager_updates = pretrain_manager_opt.apply_gradients(
                zip(self.pretrain_manager_grad, self.manager_params))
        # self.real_goal_array = self.real_goal_array.stack()

        self.g_predictions = tf.transpose(
            self.g_predictions.stack(),
            perm=[1, 0, 2])  # batch_size x seq_length x vocab_size
        self.cross_entropy = tf.reduce_sum(self.g_predictions * tf.log(
            tf.clip_by_value(self.g_predictions, 1e-20, 1.0))) / (
                self.batch_size * self.sequence_length * self.vocab_size)

        self.pretrain_worker_loss = -tf.reduce_sum(
            tf.one_hot(tf.to_int32(tf.reshape(
                self.x, [-1])), self.vocab_size, 1.0, 0.0) * tf.log(
                    tf.clip_by_value(
                        tf.reshape(self.g_predictions, [-1, self.vocab_size]),
                        1e-20, 1.0))) / (self.sequence_length *
                                         self.batch_size)

        with tf.name_scope("Worker_PreTrain_update"):
            # training updates
            self.worker_pre_ledger = privacy_ledger.PrivacyLedger(
                population_size=self.population_size,
                selection_probability=(self.batch_size / self.population_size))

            pretrain_worker_opt = dp_optimizer.DPAdamGaussianOptimizer(
                l2_norm_clip=self.l2_norm_clip,
                noise_multiplier=self.noise_multiplier,
                num_microbatches=self.num_microbatches,
                ledger=self.worker_pre_ledger,
                learning_rate=self.learning_rate)

            self.pretrain_worker_grad, _ = tf.clip_by_global_norm(
                tf.gradients(self.pretrain_worker_loss, self.worker_params),
                self.grad_clip)
            self.pretrain_worker_updates = pretrain_worker_opt.apply_gradients(
                zip(self.pretrain_worker_grad, self.worker_params))

        self.goal_loss = -tf.reduce_sum(
            tf.multiply(
                self.reward, 1 - tf.losses.cosine_distance(
                    tf.nn.l2_normalize(self.sub_feature, 2),
                    tf.nn.l2_normalize(self.real_goal_array, 2), 2))) / (
                        self.sequence_length * self.batch_size /
                        self.step_size)

        with tf.name_scope("Manager_update"):
            manager_opt = tf.train.AdamOptimizer(self.learning_rate)

            self.manager_grad, _ = tf.clip_by_global_norm(
                tf.gradients(self.goal_loss, self.manager_params),
                self.grad_clip)
            self.manager_updates = manager_opt.apply_gradients(
                zip(self.manager_grad, self.manager_params))

        self.all_sub_features = self.all_sub_features.stack()
        self.all_sub_features = tf.transpose(self.all_sub_features,
                                             perm=[1, 0, 2])

        self.all_sub_goals = self.all_sub_goals.stack()
        self.all_sub_goals = tf.transpose(self.all_sub_goals, perm=[1, 0, 2])
        # self.all_sub_features = tf.nn.l2_normalize(self.all_sub_features, 2)
        self.Worker_Reward = 1 - tf.losses.cosine_distance(
            tf.nn.l2_normalize(self.all_sub_features, 2),
            tf.nn.l2_normalize(self.all_sub_goals, 2), 2)
        # print self.Worker_Reward.shape
        self.worker_loss = -tf.reduce_sum(
            tf.multiply(
                self.Worker_Reward,
                tf.one_hot(tf.to_int32(tf.reshape(
                    self.x, [-1])), self.vocab_size, 1.0, 0.0) * tf.log(
                        tf.clip_by_value(
                            tf.reshape(self.g_predictions,
                                       [-1, self.vocab_size]), 1e-20,
                            1.0)))) / (self.sequence_length * self.batch_size)
        with tf.name_scope("Worker_update"):
            # training updates
            worker_opt = tf.train.AdamOptimizer(self.learning_rate)
            self.worker_grad, _ = tf.clip_by_global_norm(
                tf.gradients(self.worker_loss, self.worker_params),
                self.grad_clip)
            self.worker_updates = worker_opt.apply_gradients(
                zip(self.worker_grad, self.worker_params))
D_loss = D_loss_real + D_loss_fake

vector_G_loss = tf.nn.sigmoid_cross_entropy_with_logits(
    logits=D_fake_logits, labels=tf.ones([batch_size, 1, 1, 1]))
G_loss = tf.reduce_mean(vector_G_loss)

# trainable variables for each network
T_vars = tf.trainable_variables()
D_vars = [var for var in T_vars if var.name.startswith('discriminator')]
G_vars = [var for var in T_vars if var.name.startswith('generator')]

# In[11]:

ledger = privacy_ledger.PrivacyLedger(population_size=55000,
                                      selection_probability=(batch_size /
                                                             55000),
                                      max_samples=1e6,
                                      max_queries=1e6)

G_optimizer = dp_optimizer.DPAdamGaussianOptimizer(
    l2_norm_clip=l2_norm_clip,
    noise_multiplier=noise_multiplier,
    num_microbatches=num_microbatches,
    learning_rate=lr,
    beta1=0.5,
    ledger=ledger)

# In[12]:

# optimizer for each network
with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
示例#19
0
def generate_estimator_spec(logits, features, labels, mode):
    if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {
            'probabilities': tf.nn.softmax(logits),
            'logits': logits,
        }
        return tf.estimator.EstimatorSpec(mode, predictions=predictions)
    # Calculate loss as a vector (to support microbatches in DP-SGD).
    vector_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels,
                                                                 logits=logits)
    # Define mean of loss across minibatch (for reporting through tf.Estimator).
    scalar_loss = tf.reduce_mean(vector_loss)

    # Configure the training op (for TRAIN mode).
    if mode == tf.estimator.ModeKeys.TRAIN:

        if FLAGS.dp:
            ledger = privacy_ledger.PrivacyLedger(
                population_size=60000,
                selection_probability=(FLAGS.batch_size / 60000))

            # Use DP version of GradientDescentOptimizer. Other optimizers are
            # available in dp_optimizer. Most optimizers inheriting from
            # tf.train.Optimizer should be wrappable in differentially private
            # counterparts by calling dp_optimizer.optimizer_from_args().
            if FLAGS.optim == 'sgd':
                optimizer_func = dp_optimizer.DPGradientDescentGaussianOptimizer
            elif FLAGS.optim == 'adam':
                optimizer_func = dp_optimizer.DPAdamGaussianOptimizer
            elif FLAGS.optim == 'adagrad':
                optimizer_func = dp_optimizer.DPAdagradGaussianOptimizer
            else:
                raise ValueError("optimizer function not supported")

            optimizer = optimizer_func(l2_norm_clip=FLAGS.l2_norm_clip,
                                       noise_multiplier=FLAGS.noise_multiplier,
                                       num_microbatches=FLAGS.microbatches,
                                       ledger=ledger,
                                       learning_rate=FLAGS.learning_rate)
            training_hooks = [EpsilonPrintingTrainingHook(ledger)]
            opt_loss = vector_loss
        else:
            if FLAGS.optim == 'sgd':
                optimizer_func = GradientDescentOptimizer
            elif FLAGS.optim == 'adam':
                optimizer_func = AdamOptimizer
            elif FLAGS.optim == 'adagrad':
                optimizer_func = AdagradOptimizer
            else:
                raise ValueError("optimizer function not supported")
            optimizer = GradientDescentOptimizer(
                learning_rate=FLAGS.learning_rate)
            training_hooks = []
            opt_loss = scalar_loss
        global_step = tf.train.get_global_step()
        train_op = optimizer.minimize(loss=opt_loss, global_step=global_step)
        # In the following, we pass the mean of the loss (scalar_loss) rather than
        # the vector_loss because tf.estimator requires a scalar loss. This is only
        # used for evaluation and debugging by tf.estimator. The actual loss being
        # minimized is opt_loss defined above and passed to optimizer.minimize().
        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=scalar_loss,
                                          train_op=train_op,
                                          training_hooks=training_hooks)

    # Add evaluation metrics (for EVAL mode).
    elif mode == tf.estimator.ModeKeys.EVAL:
        eval_metric_ops = {
            'accuracy':
            tf.metrics.accuracy(labels=labels,
                                predictions=tf.argmax(input=logits, axis=1)),
            'crossentropy':
            tf.metrics.mean(scalar_loss)
        }

        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=scalar_loss,
                                          eval_metric_ops=eval_metric_ops)