def testMaximizeWithinUnitNormL1(self):
     with self.cached_session() as sess:
         weights = tf.constant([[3.0, -4.0, -5.0], [1.0, 1.0, 0.0]])
         actual = utils.maximize_within_unit_norm(weights, 'l1')
         sess.run(actual)
         expected = tf.constant([[0.0, 0.0, -1.0], [0.5, 0.5, 0.0]])
         self.assertAllEqual(actual, expected)
 def testMaximizeWithinUnitNormInf(self):
     with self.cached_session() as sess:
         weights = tf.constant([[1.0, 2.0, -4.0], [-1.0, 5.0, -3.0]])
         actual = utils.maximize_within_unit_norm(weights, 'infinity')
         sess.run(actual)
         expected = tf.constant([[1.0, 1.0, -1.0], [-1.0, 1.0, -1.0]])
         self.assertAllEqual(actual, expected)
 def testMaximizeWithinUnitNormL2(self):
     with self.cached_session() as sess:
         weights = tf.constant([[3.0, -4.0], [-7.0, 24.0]])
         actual = utils.maximize_within_unit_norm(weights, 'l2')
         sess.run(actual)
         # Weights are normalized by their L2 norm: [[5], [25]]
         expected = tf.constant([[0.6, -0.8], [-0.28, 0.96]])
         self.assertAllEqual(actual, expected)
Пример #4
0
 def testMaximizeWithinUnitNormWithMultipleInputs(self):
     weights = {
         'w1': tf.constant([[1., 2.], [-4., 4.]]),
         'w2': tf.constant([[-2.], [-7.]]),
     }
     actual = self.evaluate(utils.maximize_within_unit_norm(weights, 'l2'))
     expected = {
         'w1': np.array([[1. / 3., 2. / 3.], [-4. / 9., 4. / 9.]]),
         'w2': np.array([[-2. / 3.], [-7. / 9.]]),
     }
     self.assertAllClose(actual, expected)
Пример #5
0
  def _apply_gradients(self, dense_features, keyed_grads,
                       dense_original_features):
    """Applies gradients to the features to generate perturbed features.

    Args:
      dense_features: A dictionary of tensors at which gradients are computed.
      keyed_grads: A dictionary of tensors representing the gradients.
      dense_original_features: A dictionary of tensors which the epsilon
        constraint (`adv_config.pgd_epsilon`) should reference. This should have
        the same structure as `dense_features`.

    Returns:
      A dictionary of tensors with the same structure as `dense_features`
      representing the perturbed features.
    """
    masked_grads = {
        key: utils.apply_feature_mask(grad, self.feature_masks.get(key, None))
        for key, grad in keyed_grads.items()
    }
    perturb_directions = utils.maximize_within_unit_norm(
        masked_grads, self._adv_config.adv_grad_norm)
    # Clip perturbations into epsilon ball here. Note that this ball is
    # centered around the original input point.
    perturbations = {}
    for key, direction in perturb_directions.items():
      perturbations[key] = (
          direction * self._adv_config.adv_step_size + dense_features[key]
          - dense_original_features[key])
    if self._adv_config.pgd_epsilon is not None:
      perturbations = utils.project_to_ball(perturbations,
                                            self._adv_config.pgd_epsilon,
                                            self._adv_config.adv_grad_norm)
    # Apply feature constraints specified in the config.
    perturbed_features = {}
    for key, feature in dense_original_features.items():
      if key not in perturbations:  # No perturbation due to no gradient
        perturbed_features[key] = feature
      else:
        perturbed_features[key] = _apply_feature_constraints(
            feature + tf.stop_gradient(perturbations[key]),
            self.feature_min.get(key, None),
            self.feature_max.get(key, None))
    return perturbed_features
Пример #6
0
    def _normalize_gradient(self, keyed_grads, feature_masks):
        """Masks the gradients and normalizes to the size specified in adv_config.

    Arguments:
      keyed_grads: A dictionary of (feature_name, Tensor) representing gradients
        on each feature.
      feature_masks: A dictionary of (feature_name, Tensor-compatible value)
        representing masks on each feature. A feature is not masked if its name
        is missing in this dictionary.

    Returns:
      perturbation: A dictionary of (feature_name, Tensor) representing the
        adversarial perturbation on that feature.

    Raises:
      ValueError: if 'raise_invalid_gradient' is set and gradients cannot be
        computed on some input features.
    """
        masked_grads = {}
        for (key, grad) in keyed_grads.items():
            if grad is None:
                # Two cases that grad can be None:
                # (1) The feature is not differentiable, like strings or integers.
                # (2) The feature is not involved in loss computation.
                # In either case, no gradient will be calculated for this feature.
                if self._raise_invalid_gradient:
                    raise ValueError('Cannot perturb feature ' + key)
                logging.warn('Cannot perturb feature %s', key)
                continue

            # Guards against numerical errors. If the gradient is malformed (inf,
            # -inf, or NaN) on a dimension, replace it with 0, which has the effect of
            # not perturbing the original sample along that perticular dimension.
            grad = tf.where(tf.math.is_finite(grad), grad, tf.zeros_like(grad))
            masked_grads[key] = utils.apply_feature_mask(
                grad, feature_masks.get(key, None))

        if not masked_grads:
            return {}
        adv_perturbation = utils.maximize_within_unit_norm(
            masked_grads, self._adv_config.adv_grad_norm)
        return tf.nest.map_structure(
            lambda t: t * self._adv_config.adv_step_size, adv_perturbation)
  def _compute_perturbations(self, dense_features, keyed_grads,
                             dense_original_features):
    """Generates perturbations based on gradients and features.

    This computes the steepest descent direction from the gradient, multiplies
    by the step size, and projects into the epsilon ball.

    Args:
      dense_features: A dictionary of tensors at which gradients are computed.
      keyed_grads: A dictionary of tensors representing the gradients.
      dense_original_features: A dictionary of tensors which the epsilon
        constraint (`adv_config.pgd_epsilon`) should reference. This should have
        the same structure as `dense_features`.

    Returns:
      A dictionary of tensors with the same structure as `dense_features`
      representing the perturbations to be applied on `dense_original_features`.
    """
    masked_grads = {
        key: utils.apply_feature_mask(grad, self.feature_masks.get(key, None))
        for key, grad in keyed_grads.items()
    }
    perturb_directions = utils.maximize_within_unit_norm(
        masked_grads, self._adv_config.adv_grad_norm)
    # Clip perturbations into epsilon ball here. Note that this ball is
    # centered around the original input point.
    perturbations = {}
    for key, direction in perturb_directions.items():
      perturbations[key] = (
          direction * self._adv_config.adv_step_size + dense_features[key]
          - dense_original_features[key])
    if self._adv_config.pgd_epsilon is not None:
      perturbations = utils.project_to_ball(perturbations,
                                            self._adv_config.pgd_epsilon,
                                            self._adv_config.adv_grad_norm)
    return perturbations
  def gen_neighbor(self, input_features, pgd_labels=None):
    """Generates adversarial neighbors and the corresponding weights.

    This function perturbs only *dense* tensors to generate adversarial
    neighbors. No perturbation will be applied on sparse tensors  (e.g., string
    or int). Therefore, in the generated adversarial neighbors, the values of
    these sparse tensors will be kept the same as the input_features. In other
    words, if input_features is a dictionary mapping feature names to tensors,
    the dense features will be perturbed and the values of sparse features will
    remain the same.

    Arguments:
      input_features: a dense (float32) tensor, a list of dense tensors, or a
        dictionary of feature names and dense tensors. The shape of the
        tensor(s) should be either:
        (a) pointwise samples: [batch_size, feat_len], or
        (b) sequence samples: [batch_size, seq_len, feat_len]
      pgd_labels: the labels corresponding to each input. This should have shape
        `[batch_size, 1]`. This is required for PGD-generated adversaries, and
        unused otherwise.

    Returns:
      adv_neighbor: the perturbed example, with the same shape and structure as
        input_features
      adv_weight: a dense (float32) tensor with shape of [batch_size, 1],
        representing the weight for each neighbor

    Raises:
      ValueError: if some of the `input_features` cannot be perturbed due to
        (a) it is a `tf.SparseTensor`,
        (b) it has a non-differentiable type like string or integer, or
        (c) it is not involved in loss computation.
        This error is suppressed if `raise_invalid_gradient` is set to False
        (which is the default).
    """
    loss = self._labeled_loss
    gradient_tape = self._gradient_tape

    # Composes both features and feature_masks to dictionaries, so that the
    # feature_masks can be looked up by key.
    features = self._compose_as_dict(input_features)
    dense_original_features, sparse_original_features = self._split_dict(
        features, lambda feature: isinstance(feature, tf.Tensor))
    feature_masks = self._compose_as_dict(self._adv_config.feature_mask)
    feature_min = self._compose_as_dict(self._adv_config.clip_value_min)
    feature_max = self._compose_as_dict(self._adv_config.clip_value_max)
    if sparse_original_features:
      sparse_keys = str(sparse_original_features.keys())
      if self._raise_invalid_gradient:
        raise ValueError('Cannot perturb non-Tensor input: ' + sparse_keys)
      logging.log_first_n(logging.WARNING,
                          'Cannot perturb non-Tensor input: %s', 1, sparse_keys)
    dense_features = dense_original_features
    for t in range(self._adv_config.iterations):
      keyed_grads = self._compute_gradient(loss, dense_features, gradient_tape)
      masked_grads = {
          key: utils.apply_feature_mask(grad, feature_masks.get(key, None))
          for key, grad in keyed_grads.items()
      }

      unit_perturbations = utils.maximize_within_unit_norm(
          masked_grads, self._adv_config.adv_grad_norm)
      perturbations = tf.nest.map_structure(
          lambda t: t * self._adv_config.adv_step_size, unit_perturbations)
      # Clip perturbations into epsilon ball here. Note that this ball is
      # centered around the original input point.
      diff = {}
      bounded_diff = {}
      for key, perturb in perturbations.items():
        # Only include features for which perturbation occurred. There is
        # nothing to project for features without perturbations.
        diff[key] = dense_features[key] + perturb - dense_original_features[key]
      if self._adv_config.epsilon is not None:
        bounded_diff = utils.project_to_ball(diff, self._adv_config.epsilon,
                                             self._adv_config.adv_grad_norm)
      else:
        bounded_diff = diff
      # Backfill the rest of the dense features.
      for key, feature in dense_features.items():
        if key not in bounded_diff:
          bounded_diff[key] = feature - dense_original_features[key]
      adv_neighbor = dict(sparse_original_features)
      for key, feature in dense_original_features.items():
        adv_neighbor[key] = tf.stop_gradient(
            _apply_feature_constraints(
                feature +
                bounded_diff[key] if key in perturbations else feature,
                feature_min.get(key, None), feature_max.get(key, None)))

      # Update for the next iteration.
      if t < self._adv_config.iterations - 1:
        inputs_t = self._decompose_as(input_features, adv_neighbor)
        # Compute the new loss to calculate gradients with.
        features = self._compose_as_dict(inputs_t)
        dense_features, _ = self._split_dict(
            features, lambda feature: isinstance(feature, tf.Tensor))
        if gradient_tape is not None:
          with gradient_tape:
            # Gradient calculated against dense features only.
            gradient_tape.watch(dense_features)
            loss = self._pgd_loss_fn(pgd_labels, self._pgd_model_fn(inputs_t))
        else:
          loss = self._pgd_loss_fn(pgd_labels, self._pgd_model_fn(inputs_t))

    # Converts the perturbed examples back to their original structure.
    adv_neighbor = self._decompose_as(input_features, adv_neighbor)

    batch_size = tf.shape(list(features.values())[0])[0]
    adv_weight = tf.ones([batch_size, 1])

    return adv_neighbor, adv_weight
Пример #9
0
 def testMaximizeWithinUnitNormWithNestedStructure(self):
     weights = {'w': tf.constant([[3., -4.], [-4., 4.]])}
     actual = self.evaluate(utils.maximize_within_unit_norm(weights, 'l1'))
     expected = {'w': np.array([[0., -1.], [-0.5, 0.5]])}
     self.assertAllClose(actual, expected)
Пример #10
0
 def testMaximizeWithinUnitNormL1(self):
     weights = tf.constant([[3.0, -4.0, -5.0], [1.0, 1.0, 0.0]])
     actual = self.evaluate(utils.maximize_within_unit_norm(weights, 'l1'))
     expected = tf.constant([[0.0, 0.0, -1.0], [0.5, 0.5, 0.0]])
     self.assertAllEqual(actual, expected)
Пример #11
0
 def testMaximizeWithinUnitNormInf(self):
     weights = tf.constant([[1.0, 2.0, -4.0], [-1.0, 5.0, -3.0]])
     actual = self.evaluate(
         utils.maximize_within_unit_norm(weights, 'infinity'))
     expected = tf.constant([[1.0, 1.0, -1.0], [-1.0, 1.0, -1.0]])
     self.assertAllEqual(actual, expected)
 def testMaximizeWithinUnitNormL2WithZeroInputShouldReturnZero(self, norm):
     weights = tf.constant([[0.0, 0.0]])
     actual = self.evaluate(utils.maximize_within_unit_norm(weights, norm))
     self.assertAllEqual(actual, weights)
Пример #13
0
  def _normalize_gradient(self, keyed_grads, feature_masks):
    """Masks the gradients and normalizes to the size specified in adv_config.

    Arguments:
      keyed_grads: A dictionary of (feature_name, Tensor) representing gradients
        on each feature.
      feature_masks: A dictionary of (feature_name, Tensor-compatible value)
        representing masks on each feature. A feature is not masked if its name
        is missing in this dictionary.

    Returns:
      perturbation: A dictionary of (feature_name, Tensor) representing the
        adversarial perturbation on that feature.

    Raises:
      ValueError: if 'raise_invalid_gradient' is set and gradients cannot be
        computed on some input features.
    """
    grads_to_concat = []
    dim_index_and_sizes = {}
    total_dims = 0
    for (key, grad) in keyed_grads.items():
      if grad is None:
        # Two cases that grad can be None:
        # (1) The feature is not differentiable, like strings, integer indices.
        # (2) The feature is not involved in loss computation.
        # In either case, no gradient will be calculated for this feature.
        if self._raise_invalid_gradient:
          raise ValueError('Cannot perturb feature ' + key)
        tf.compat.v1.logging.warn('Cannot perturb feature ' + key)
        continue

      # Guards against numerical errors. If the gradient is malformed (inf,
      # -inf, or NaN) on a dimension, replace it with 0, which has the effect of
      # not perturbing the original sample along that perticular dimension.
      grad = tf.where(tf.math.is_finite(grad), grad, tf.zeros_like(grad))
      # Applies feature masks if available.
      if key in feature_masks:
        grad *= tf.cast(feature_masks[key], grad.dtype)

      # The gradients are reshaped to 2-D (batch_size x total_feature_len;
      # sequence data will be processed in the same way) so they can be
      # concatenated and normalized across features. They will be reshaped back
      # to the original shape after normalization.
      feature_dim = tf.reduce_prod(input_tensor=grad.get_shape()[1:])
      grad = tf.reshape(grad, [-1, feature_dim])
      grads_to_concat.append(grad)
      dim_index_and_sizes[key] = (total_dims, total_dims + feature_dim)
      total_dims += feature_dim

    if not grads_to_concat:
      return {}  # no perturbation

    # Concatenates all the gradients so they can be normalized together.
    concat_grads = tf.concat(grads_to_concat, axis=-1)
    adv_perturbation = utils.maximize_within_unit_norm(
        concat_grads, self._adv_config.adv_grad_norm)
    adv_perturbation = self._adv_config.adv_step_size * adv_perturbation

    perturbation = {}
    for (key, grad) in keyed_grads.items():
      if key not in dim_index_and_sizes:
        continue
      dim_idx_begin, dim_idx_end = dim_index_and_sizes[key]
      sub_grad = adv_perturbation[:, dim_idx_begin:dim_idx_end]
      if grad.get_shape().rank > 2:
        sub_grad = tf.reshape(sub_grad, [-1] + grad.get_shape().as_list()[1:])
      perturbation[key] = sub_grad
    return perturbation
    def gen_neighbor(self, input_features):
        """Generates adversarial neighbors and the corresponding weights.

    This function perturbs only *dense* tensors to generate adversarial
    neighbors. No pertubation will be applied on sparse tensors  (e.g., string
    or int). Therefore, in the generated adversarial neighbors, the values of
    these sparse tensors will be kept the same as the input_features. In other
    words, if input_features is a dictionary mapping feature names to tensors,
    the dense features will be perturbed and the values of sparse features will
    remain the same.

    Arguments:
      input_features: a dense (float32) tensor, a list of dense tensors, or a
        dictionary of feature names and dense tensors. The shape of the
        tensor(s) should be either:
        (a) pointwise samples: [batch_size, feat_len], or
        (b) sequence samples: [batch_size, seq_len, feat_len]

    Returns:
      adv_neighbor: the perturbed example, with the same shape and structure as
        input_features
      adv_weight: a dense (float32) tensor with shape of [batch_size, 1],
        representing the weight for each neighbor

    Raises:
      ValueError: if some of the `input_features` cannot be perturbed due to
        (a) it is a `tf.SparseTensor`,
        (b) it has a non-differentiable type like string or integer, or
        (c) it is not involved in loss computation.
        This error is suppressed if `raise_invalid_gradient` is set to False
        (which is the default).
    """

        # Composes both features and feature_masks to dictionaries, so that the
        # feature_masks can be looked up by key.
        features = self._compose_as_dict(input_features)
        feature_masks = self._compose_as_dict(self._adv_config.feature_mask)

        dense_features, sparse_features = self._split_dict(
            features, lambda feature: isinstance(feature, tf.Tensor))
        if sparse_features:
            sparse_keys = str(sparse_features.keys())
            if self._raise_invalid_gradient:
                raise ValueError('Cannot perturb non-Tensor input: ' +
                                 sparse_keys)
            logging.warn('Cannot perturb non-Tensor input: %s', sparse_keys)

        keyed_grads = self._compute_gradient(dense_features)
        masked_grads = {
            key: utils.apply_feature_mask(grad, feature_masks.get(key, None))
            for key, grad in keyed_grads.items()
        }

        unit_perturbations = utils.maximize_within_unit_norm(
            masked_grads, self._adv_config.adv_grad_norm)
        perturbations = tf.nest.map_structure(
            lambda t: t * self._adv_config.adv_step_size, unit_perturbations)

        # Sparse features are copied directly without perturbation.
        adv_neighbor = dict(sparse_features)
        for (key, feature) in dense_features.items():
            adv_neighbor[key] = tf.stop_gradient(feature +
                                                 perturbations[key] if key in
                                                 perturbations else feature)
        # Converts the perturbed examples back to their original structure.
        adv_neighbor = self._decompose_as(input_features, adv_neighbor)

        batch_size = tf.shape(list(features.values())[0])[0]
        adv_weight = tf.ones([batch_size, 1])

        return adv_neighbor, adv_weight