def perturb_on_batch(self, x, **config_kwargs):
        """Perturbs the given input to generates adversarial examples.

    Args:
      x: Input examples to be perturbed, in a dictionary of Numpy arrays,
        `Tensor`, `SparseTensor`, or `RaggedTensor` objects. The first
        dimension of all tensors or arrays should be the same (i.e. batch size).
      **config_kwargs: (optional) hyperparameters for generating adversarial
        preturbation. Any keyword argument here will overwrite the corresponding
        field in `nsl.configs.AdvNeighborConfig` specified in `__init__`.
        Acceptable keys: `feature_mask`, `adv_step_size`, `adv_grad_norm`,
        `clip_value_min`, `clip_value_max`, `pgd_iterations`, and `pgd_epsilon`.

    Returns:
      A dictionary of NumPy arrays, `SparseTensor`, or `RaggedTensor` objects of
      the generated adversarial examples.
    """
        inputs = tf.nest.map_structure(tf.convert_to_tensor,
                                       x,
                                       expand_composites=True)
        labels, sample_weights = self._extract_labels_and_weights(inputs)
        _, labeled_loss, _, tape = self._forward_pass(inputs, labels,
                                                      sample_weights,
                                                      {'training': False})

        config_kwargs = {
            k: v
            for k, v in config_kwargs.items() if v is not None
        }
        config = attr.evolve(self.adv_config.adv_neighbor_config,
                             **config_kwargs)
        adv_inputs, _ = nsl_lib.gen_adv_neighbor(
            inputs,
            labeled_loss,
            config=config,
            gradient_tape=tape,
            pgd_model_fn=self._call_base_model,
            pgd_loss_fn=self._compute_total_loss,
            pgd_labels=labels)

        if tf.executing_eagerly():
            # Converts `Tensor` objects to NumPy arrays and keeps other objects (e.g.
            # `SparseTensor`) as-is.
            adv_inputs = tf.nest.map_structure(lambda x: x.numpy()
                                               if hasattr(x, 'numpy') else x,
                                               adv_inputs,
                                               expand_composites=False)
        else:
            adv_inputs = tf.keras.backend.function([], adv_inputs)([])

        # Inserts the labels and sample_weights back to the input dictionary, so
        # the returned input has the same structure as the original input.
        for label_key, label in zip(self.label_keys, labels):
            adv_inputs[label_key] = label
        if self.sample_weight_key is not None:
            adv_inputs[self.sample_weight_key] = sample_weights

        return adv_inputs
def adversarial_loss(features,
                     labels,
                     model,
                     loss_fn,
                     sample_weights=None,
                     adv_config=None,
                     predictions=None,
                     labeled_loss=None,
                     gradient_tape=None,
                     model_kwargs=None):
    """Computes the adversarial loss for `model` given `features` and `labels`.

  This utility function adds adversarial perturbations to the input `features`,
  runs the `model` on the perturbed features for predictions, and returns the
  corresponding loss `loss_fn(labels, model(perturbed_features))`. This function
  can be used in a Keras subclassed model and a custom training loop. This can
  also be used freely as a helper function in eager execution mode.

  The adversarial perturbation is based on the gradient of the labeled loss on
  the original input features, i.e. `loss_fn(labels, model(features))`.
  Therefore, this function needs to compute the model's predictions on the input
  features as `model(features)`, and the labeled loss as `loss_fn(labels,
  predictions)`. If predictions or labeled loss have already been computed, they
  can be passed in via the `predictions` and `labeled_loss` arguments in order
  to save computational resources. Note that in eager execution mode,
  `gradient_tape` needs to be set accordingly when passing in `predictions` or
  `labeled_loss`, so that the gradient can be computed correctly.

  Example:
  ```python
  # A linear regression model (for demonstrating the usage only)
  model = tf.keras.Sequential([tf.keras.layers.Dense(1, input_shape=(2,))])
  loss_fn = tf.keras.losses.MeanSquaredError()
  optimizer = tf.keras.optimizers.SGD()

  # Custom training loop. (The actual training data is omitted for clarity.)
  for x, y in train_dataset:
    with tf.GradientTape() as tape_w:

      # A separate GradientTape is needed for watching the input.
      with tf.GradientTape() as tape_x:
        tape_x.watch(x)

        # Regular forward pass.
        labeled_loss = loss_fn(y, model(x))

      # Calculates the adversarial loss. This will reuse labeled_loss and will
      # consume tape_x.
      adv_loss = nsl.keras.adversarial_loss(
          x, y, model, loss_fn, labeled_loss=labeled_loss, gradient_tape=tape_x)

      # Combines both losses. This could also be a weighted combination.
      total_loss = labeled_loss + adv_loss

    # Regular backward pass.
    gradients = tape_w.gradient(total_loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
  ```

  Arguments:
    features: Input features, should be a `Tensor` or a collection of `Tensor`
      objects. If it is a collection, the first dimension of all `Tensor`
      objects inside should be the same (i.e. batch size).
    labels: Target labels.
    model: A callable that takes `features` as inputs and computes `predictions`
      as outputs. An example would be a `tf.keras.Model` object.
    loss_fn: A callable which calcualtes labeled loss from `labels`,
      `predictions`, and `sample_weights`. An example would be a
      `tf.keras.losses.Loss` object.
    sample_weights: (optional) A 1-D `Tensor` of weights for the examples, with
      the same length as the first dimension of `features`.
    adv_config: (optional) An `nsl.configs.AdvRegConfig` object for adversarial
      regularization hyperparameters. Use `nsl.configs.make_adv_reg_config` to
      construct one.
    predictions: (optional) Precomputed value of `model(features)`. If set, the
      value will be reused when calculating adversarial regularization. In eager
      mode, the `gradient_tape` has to be set as well.
    labeled_loss: (optional) Precomputed value of `loss_fn(labels,
      model(features))`. If set, the value will be reused when calculating
      adversarial regularization. In eager mode, the `gradient_tape` has to be
      set as well.
    gradient_tape: (optional) A `tf.GradientTape` object watching `features`.
    model_kwargs: (optional) A dictionary of additional keyword arguments to be
      passed to the `model`.

  Returns:
    A `Tensor` for adversarial regularization loss, i.e. labeled loss on
    adversarially perturbed features.
  """

    if adv_config is None:
        adv_config = nsl_configs.AdvRegConfig()

    if model_kwargs is not None:
        model = functools.partial(model, **model_kwargs)

    # Calculates labeled_loss if not provided.
    if labeled_loss is None:
        # Reuses the tape if provided; otherwise creates a new tape.
        gradient_tape = gradient_tape or tf.GradientTape()
        with gradient_tape:
            gradient_tape.watch(tf.nest.flatten(features))
            # Calculates prediction if not provided.
            predictions = predictions if predictions is not None else model(
                features)
            labeled_loss = loss_fn(labels, predictions, sample_weights)

    adv_input, adv_sample_weights = nsl_lib.gen_adv_neighbor(
        features,
        labeled_loss,
        config=adv_config.adv_neighbor_config,
        gradient_tape=gradient_tape)
    adv_output = model(adv_input)
    if sample_weights is not None:
        adv_sample_weights = tf.math.multiply(sample_weights,
                                              adv_sample_weights)
    adv_loss = loss_fn(labels, adv_output, adv_sample_weights)
    return adv_loss
Exemplo n.º 3
0
  def adv_model_fn(features, labels, mode, params=None, config=None):
    """The adversarial-regularized model_fn.

    Args:
      features: This is the first item returned from the `input_fn` passed to
        `train`, `evaluate`, and `predict`. This should be a single `tf.Tensor`
        or `dict` of same.
      labels: This is the second item returned from the `input_fn` passed to
        `train`, `evaluate`, and `predict`. This should be a single `tf.Tensor`
        or dict of same (for multi-head models). If mode is
        `tf.estimator.ModeKeys.PREDICT`, `labels=None` will be passed. If the
        `model_fn`'s signature does not accept `mode`, the `model_fn` must still
        be able to handle `labels=None`.
      mode: Optional. Specifies if this is training, evaluation, or prediction.
        See `tf.estimator.ModeKeys`.
      params: Optional `dict` of hyperparameters. Will receive what is passed to
        Estimator in the `params` parameter. This allows users to configure
        Estimators from hyper parameter tuning.
      config: Optional `estimator.RunConfig` object. Will receive what is passed
        to Estimator as its `config` parameter, or a default value. Allows
        setting up things in the model_fn based on configuration such as
        `num_ps_replicas`, or `model_dir`. Unused currently.

    Returns:
      A `tf.estimator.EstimatorSpec` with adversarial regularization.
    """

    # Uses the same variable scope for calculating the original objective and
    # adversarial regularization.
    with tf.compat.v1.variable_scope(tf.compat.v1.get_variable_scope(),
                                     reuse=tf.compat.v1.AUTO_REUSE,
                                     auxiliary_name_scope=False):
      # If no 'params' is passed, then it is possible for base_model_fn not to
      # accept a 'params' argument. See documentation for tf.estimator.Estimator
      # for additional context.
      if params:
        original_spec = base_model_fn(features, labels, mode, params, config)
      else:
        original_spec = base_model_fn(features, labels, mode, config)

      # Adversarial regularization only happens in training.
      if mode != tf.estimator.ModeKeys.TRAIN:
        return original_spec

      adv_neighbor, _ = nsl_lib.gen_adv_neighbor(features, original_spec.loss,
                                                 adv_config.adv_neighbor_config)

      # Runs the base model again to compute loss on adv_neighbor.
      adv_spec = base_model_fn(adv_neighbor, labels, mode, config)

      final_loss = original_spec.loss + adv_config.multiplier * adv_spec.loss

      if not optimizer_fn:
        # Default to the Adagrad optimizer, the same as canned DNNEstimator.
        optimizer = tf.train.AdagradOptimizer(learning_rate=0.05)
      else:
        optimizer = optimizer_fn()

      final_train_op = optimizer.minimize(
          loss=final_loss, global_step=tf.compat.v1.train.get_global_step())

    return original_spec._replace(loss=final_loss, train_op=final_train_op)
Exemplo n.º 4
0
    def adv_model_fn(features, labels, mode, params=None, config=None):
        """The adversarial-regularized model_fn.

    Args:
      features: This is the first item returned from the `input_fn` passed to
        `train`, `evaluate`, and `predict`. This should be a single `tf.Tensor`
        or `dict` of same.
      labels: This is the second item returned from the `input_fn` passed to
        `train`, `evaluate`, and `predict`. This should be a single `tf.Tensor`
        or dict of same (for multi-head models). If mode is
        `tf.estimator.ModeKeys.PREDICT`, `labels=None` will be passed. If the
        `model_fn`'s signature does not accept `mode`, the `model_fn` must still
        be able to handle `labels=None`.
      mode: Optional. Specifies if this is training, evaluation, or prediction.
        See `tf.estimator.ModeKeys`.
      params: Optional `dict` of hyperparameters. Will receive what is passed to
        Estimator in the `params` parameter. This allows users to configure
        Estimators from hyper parameter tuning.
      config: Optional `estimator.RunConfig` object. Will receive what is passed
        to Estimator as its `config` parameter, or a default value. Allows
        setting up things in the model_fn based on configuration such as
        `num_ps_replicas`, or `model_dir`. Unused currently.

    Returns:
      A `tf.estimator.EstimatorSpec` with adversarial regularization.
    """
        # Parameters 'params' and 'config' are optional. If they are not passed,
        # then it is possible for base_model_fn not to accept these arguments.
        # See documentation for tf.estimator.Estimator for additional context.
        kwargs = {'mode': mode}
        if 'params' in base_model_fn_args:
            kwargs['params'] = params
        if 'config' in base_model_fn_args:
            kwargs['config'] = config
        base_fn = functools.partial(base_model_fn, **kwargs)

        # Uses the same variable scope for calculating the original objective and
        # adversarial regularization.
        with tf.compat.v1.variable_scope(tf.compat.v1.get_variable_scope(),
                                         reuse=tf.compat.v1.AUTO_REUSE,
                                         auxiliary_name_scope=False):
            original_spec = base_fn(features, labels)

            # Adversarial regularization only happens in training.
            if mode != tf.estimator.ModeKeys.TRAIN:
                return original_spec

            adv_neighbor, _ = nsl_lib.gen_adv_neighbor(
                features,
                original_spec.loss,
                adv_config.adv_neighbor_config,
                # The pgd_model_fn is a dummy identity function since loss is
                # directly available from base_fn.
                pgd_model_fn=lambda features: features,
                pgd_loss_fn=lambda labels, features: base_fn(features, labels).
                loss,
                pgd_labels=labels,
                use_while_loop=False)

            # Runs the base model again to compute loss on adv_neighbor.
            adv_spec = base_fn(adv_neighbor, labels)
            scaled_adversarial_loss = adv_config.multiplier * adv_spec.loss
            tf.compat.v1.summary.scalar('loss/scaled_adversarial_loss',
                                        scaled_adversarial_loss)

            supervised_loss = original_spec.loss
            tf.compat.v1.summary.scalar('loss/supervised_loss',
                                        supervised_loss)

            final_loss = supervised_loss + scaled_adversarial_loss

            if not optimizer_fn:
                # Default to the Adagrad optimizer, the same as canned DNNEstimator.
                optimizer = tf.compat.v1.train.AdagradOptimizer(
                    learning_rate=0.05)
            else:
                optimizer = optimizer_fn()

            train_op = optimizer.minimize(
                loss=final_loss,
                global_step=tf.compat.v1.train.get_global_step())

            update_ops = tf.compat.v1.get_collection(
                tf.compat.v1.GraphKeys.UPDATE_OPS)
            if update_ops:
                train_op = tf.group(train_op, *update_ops)

        return original_spec._replace(loss=final_loss, train_op=train_op)