Exemplo n.º 1
0
  def test_random_lp_vector_linf(self):
    """
    test_random_lp_sample_linf: Test that `random_lp_vector` returns
    random samples in the l-inf ball.
    """

    eps = 0.5
    d = 10

    r = self.sess.run(utils_tf.random_lp_vector((1000, d), np.infty, eps))

    # test that some values are close to the boundaries
    self.assertLessEqual(np.max(r), eps)
    self.assertGreaterEqual(np.max(r), 0.95*eps)
    self.assertGreaterEqual(np.min(r), -eps)
    self.assertLessEqual(np.min(r), -0.95*eps)

    # test that the mean value of each feature is close to zero
    means = np.mean(r, axis=0)
    self.assertClose(means, np.zeros(d), atol=0.05)
Exemplo n.º 2
0
  def test_random_lp_srandom_lp_vector_l1_l2(self):
    """
    test_random_lp_vector_l1_l2: Test that `random_lp_vector` returns
    random samples in an l1 or l2 ball.
    """

    eps = 0.5
    d = 10

    for ord in [1, 2]:
      r = self.sess.run(utils_tf.random_lp_vector((1000, d), ord, eps))

      norms = np.linalg.norm(r, axis=-1, ord=ord)

      # test that some values are close to the boundaries
      self.assertLessEqual(np.max(norms), eps)
      self.assertGreaterEqual(np.max(norms), 0.95 * eps)

      # The expected norm is eps * Exp[U[0,1]^(1/d)] where U is a standard
      # uniform random variable and d is the dimension. The second term is
      # equal to the expected value of a Beta(d, 1) variable which is d/(d+1).
      expected_mean_norm = eps * (d / (d + 1.))
      self.assertClose(np.mean(norms), expected_mean_norm, atol=0.02)
    def generate(self, x, **kwargs):
        """
    Generate symbolic graph for adversarial examples and return.

    :param x: The model's symbolic inputs.
    :param kwargs: See `parse_params`
    """
        # Parse and save attack-specific parameters
        assert self.parse_params(**kwargs)

        asserts = []

        # If a data range was specified, check that the input was in that range
        if self.clip_min is not None:
            asserts.append(
                utils_tf.assert_greater_equal(x,
                                              tf.cast(self.clip_min, x.dtype)))

        if self.clip_max is not None:
            asserts.append(
                utils_tf.assert_less_equal(x, tf.cast(self.clip_max, x.dtype)))

        # Initialize loop variables
        if self.rand_init:
            eta = random_lp_vector(tf.shape(x),
                                   ord=1,
                                   eps=tf.cast(self.eps, x.dtype),
                                   dtype=x.dtype)
        else:
            eta = tf.zeros(tf.shape(x))

        # Clip eta
        eta = clip_eta(eta, ord=1, eps=self.eps)
        adv_x = x + eta
        if self.clip_min is not None or self.clip_max is not None:
            adv_x = utils_tf.clip_by_value(adv_x, self.clip_min, self.clip_max)

        if self.y_target is not None:
            y = self.y_target
            targeted = True
        elif self.y is not None:
            y = self.y
            targeted = False
        else:
            model_preds = self.model.get_probs(x)
            preds_max = tf.reduce_max(model_preds, 1, keepdims=True)
            y = tf.to_float(tf.equal(model_preds, preds_max))
            y = tf.stop_gradient(y)
            targeted = False
            del model_preds

        y_kwarg = 'y_target' if targeted else 'y'

        def cond(i, _):
            """Iterate until requested number of iterations is completed"""
            return tf.less(i, self.nb_iter)

        def body(i, adv_x):
            """Do a projected gradient step"""

            labels, _ = self.get_or_guess_labels(adv_x, {y_kwarg: y})
            logits = self.model.get_logits(adv_x)

            adv_x = sparse_l1_descent(adv_x,
                                      logits,
                                      y=labels,
                                      eps=self.eps_iter,
                                      q=self.grad_sparsity,
                                      clip_min=self.clip_min,
                                      clip_max=self.clip_max,
                                      clip_grad=self.clip_grad,
                                      targeted=(self.y_target is not None),
                                      sanity_checks=self.sanity_checks)

            # Clipping perturbation eta to the l1-ball
            eta = adv_x - x
            eta = clip_eta(eta, ord=1, eps=self.eps)
            adv_x = x + eta

            # Redo the clipping.
            # Subtracting and re-adding eta can add some small numerical error.
            if self.clip_min is not None or self.clip_max is not None:
                adv_x = utils_tf.clip_by_value(adv_x, self.clip_min,
                                               self.clip_max)

            return i + 1, adv_x

        _, adv_x = tf.while_loop(cond,
                                 body, (tf.zeros([]), adv_x),
                                 back_prop=True,
                                 maximum_iterations=self.nb_iter)

        # Asserts run only on CPU.
        # When multi-GPU eval code tries to force all PGD ops onto GPU, this
        # can cause an error.
        common_dtype = tf.float32
        asserts.append(
            utils_tf.assert_less_equal(
                tf.cast(self.eps_iter, dtype=common_dtype),
                tf.cast(self.eps, dtype=common_dtype)))

        if self.sanity_checks:
            with tf.control_dependencies(asserts):
                adv_x = tf.identity(adv_x)

        return adv_x
Exemplo n.º 4
0
    def generate(self, x, **kwargs):
        """
    Generate symbolic graph for adversarial examples and return.

    :param x: The model's symbolic inputs.
    :param kwargs: See `parse_params`
    """
        # Parse and save attack-specific parameters
        assert self.parse_params(**kwargs)

        asserts = []

        # If a data range was specified, check that the input was in that range
        if self.clip_min is not None:
            asserts.append(
                utils_tf.assert_greater_equal(x,
                                              tf.cast(self.clip_min, x.dtype)))

        if self.clip_max is not None:
            asserts.append(
                utils_tf.assert_less_equal(x, tf.cast(self.clip_max, x.dtype)))

        # Initialize loop variables
        if self.rand_init:
            eta = random_lp_vector(tf.shape(input=x),
                                   self.ord,
                                   tf.cast(self.rand_init_eps, x.dtype),
                                   dtype=x.dtype)
        else:
            eta = tf.zeros(tf.shape(input=x))

        # Clip eta
        eta = clip_eta(eta, self.ord, self.eps)
        adv_x = x + eta
        if self.clip_min is not None or self.clip_max is not None:
            adv_x = utils_tf.clip_by_value(adv_x, self.clip_min, self.clip_max)

        if self.y_target is not None:
            y = self.y_target
            targeted = True
        elif self.y is not None:
            y = self.y
            targeted = False
        else:
            model_preds = self.model.get_probs(x)
            preds_max = tf.reduce_max(input_tensor=model_preds,
                                      axis=1,
                                      keepdims=True)
            y = tf.cast(tf.equal(model_preds, preds_max), dtype=tf.float32)
            y = tf.stop_gradient(y)
            targeted = False
            del model_preds

        y_kwarg = 'y_target' if targeted else 'y'

        fgm_params = {
            'eps': self.eps_iter,
            y_kwarg: y,
            'ord': self.ord,
            'loss_fn': self.loss_fn,
            'clip_min': self.clip_min,
            'clip_max': self.clip_max,
            'clip_grad': self.clip_grad
        }
        if self.ord == 1:
            raise NotImplementedError(
                "FGM is not a good inner loop step for PGD "
                " when ord=1, because ord=1 FGM changes only "
                " one pixel at a time. Use the SparseL1Descent "
                " attack instead, which allows fine-grained "
                " control over the sparsity of the gradient "
                " updates.")

        # Use getattr() to avoid errors in eager execution attacks
        FGM = self.FGM_CLASS(self.model,
                             sess=getattr(self, 'sess', None),
                             dtypestr=self.dtypestr)

        def cond(i, _):
            """Iterate until requested number of iterations is completed"""
            return tf.less(i, self.nb_iter)

        def body(i, adv_x):
            """Do a projected gradient step"""
            adv_x = FGM.generate(adv_x, **fgm_params)

            # Clipping perturbation eta to self.ord norm ball
            eta = adv_x - x
            eta = clip_eta(eta, self.ord, self.eps)
            adv_x = x + eta

            # Redo the clipping.
            # FGM already did it, but subtracting and re-adding eta can add some
            # small numerical error.
            if self.clip_min is not None or self.clip_max is not None:
                adv_x = utils_tf.clip_by_value(adv_x, self.clip_min,
                                               self.clip_max)

            return i + 1, adv_x

        _, adv_x = tf.while_loop(cond=cond,
                                 body=body,
                                 loop_vars=(tf.zeros([]), adv_x),
                                 back_prop=True,
                                 maximum_iterations=self.nb_iter)

        # Asserts run only on CPU.
        # When multi-GPU eval code tries to force all PGD ops onto GPU, this
        # can cause an error.
        common_dtype = tf.float32
        asserts.append(
            utils_tf.assert_less_equal(
                tf.cast(self.eps_iter, dtype=common_dtype),
                tf.cast(self.eps, dtype=common_dtype)))
        if self.ord == np.inf and self.clip_min is not None:
            # The 1e-6 is needed to compensate for numerical error.
            # Without the 1e-6 this fails when e.g. eps=.2, clip_min=.5,
            # clip_max=.7
            asserts.append(
                utils_tf.assert_less_equal(
                    tf.cast(self.eps,
                            x.dtype), 1e-6 + tf.cast(self.clip_max, x.dtype) -
                    tf.cast(self.clip_min, x.dtype)))

        if self.sanity_checks:
            with tf.control_dependencies(asserts):
                adv_x = tf.identity(adv_x)

        return adv_x