def attack(self, x, y):
        """
        This method creates a symbolic graph that given an input image,
        first randomly perturbs the image. The
        perturbation is bounded to an epsilon ball. Then multiple steps of
        gradient descent is performed to increase the probability of a target
        label or decrease the probability of the ground-truth label.
        :param x: A tensor with the input image.
        """
        from cleverhans.utils_tf import clip_eta

        if self.rand_init:
            if self.ord == np.inf:
                eta = tf.random_uniform(tf.shape(x), -self.eps, self.eps)
            else:
                eta = tf.random_normal(tf.shape(x), 0.0, 1.0)
            eta = clip_eta(eta, self.ord, self.eps)
        else:
            eta = tf.zeros_like(x)

        for i in range(self.nb_iter):
            eta = self.attack_single_step(x, eta, y)

        adv_x = x + eta
        if self.clip_min is not None and self.clip_max is not None:
            adv_x = tf.clip_by_value(adv_x, self.clip_min, self.clip_max)

        return adv_x
        def body(i, ax, m):
            logits = self.model.get_logits(ax)
            loss = self.loss_func(labels=y, logits=logits)
            if targeted:
                loss = -loss

            # Define gradient of loss wrt input
            grad, = tf.gradients(loss, ax)

            # Normalize current gradient and add it to the accumulated gradient
            red_ind = list(xrange(1, len(grad.get_shape())))
            avoid_zero_div = tf.cast(1e-12, grad.dtype)
            grad = grad / tf.maximum(
                avoid_zero_div,
                reduce_mean(tf.abs(grad), red_ind, keepdims=True))
            m = self.decay_factor * m + grad

            optimal_perturbation = optimize_linear(m, self.eps_iter, self.ord)
            if self.ord == 1:
                raise NotImplementedError(
                    "This attack hasn't been tested for ord=1."
                    "It's not clear that FGM makes a good inner "
                    "loop step for iterative optimization since "
                    "it updates just one coordinate at a time.")

            # Update and clip adversarial example in current iteration
            ax = ax + optimal_perturbation
            ax = x + utils_tf.clip_eta(ax - x, self.ord, self.eps)

            if self.clip_min is not None and self.clip_max is not None:
                ax = utils_tf.clip_by_value(ax, self.clip_min, self.clip_max)

            ax = tf.stop_gradient(ax)

            return i + 1, ax, m
    def attack_single_step(self, x, eta, y):
        """
        Given the original image and the perturbation computed so far, computes
        a new perturbation.
        :param x: A tensor with the original input.
        :param eta: A tensor the same shape as x that holds the perturbation.
        :param y: A tensor with the target labels or ground-truth labels.
        """
        from cleverhans.utils_tf import model_loss, clip_eta

        adv_x = x + eta
        preds = self.model.get_probs(adv_x)
        loss = model_loss(y, preds)
        if self.targeted:
            loss = -loss
        grad, = tf.gradients(loss, adv_x)
        if self.pgd_update == 'sign':
            adv_x = adv_x + self.eps_iter * tf.sign(grad)
        elif self.pgd_update == 'plain':
            adv_x = adv_x + self.eps_iter * grad / tf.reduce_sum(
                grad**2, axis=[1, 2, 3], keep_dims=True)**0.5
        else:
            raise Exception('Wrong pgd_update.')
        if self.clip_min is not None and self.clip_max is not None:
            adv_x = tf.clip_by_value(adv_x, self.clip_min, self.clip_max)
        eta = adv_x - x
        eta = clip_eta(eta, self.ord, self.eps)
        return eta
        def body(i, adv_x):
            """Do a projected gradient step"""

            labels, _ = self.get_or_guess_labels(adv_x, {y_kwarg: y})
            logits = self.model.get_logits(adv_x)

            adv_x = sparse_l1_descent(adv_x,
                                      logits,
                                      y=labels,
                                      eps=self.eps_iter,
                                      q=self.grad_sparsity,
                                      clip_min=self.clip_min,
                                      clip_max=self.clip_max,
                                      clip_grad=self.clip_grad,
                                      targeted=(self.y_target is not None),
                                      sanity_checks=self.sanity_checks)

            # Clipping perturbation eta to the l1-ball
            eta = adv_x - x
            eta = clip_eta(eta, ord=1, eps=self.eps)
            adv_x = x + eta

            # Redo the clipping.
            # Subtracting and re-adding eta can add some small numerical error.
            if self.clip_min is not None or self.clip_max is not None:
                adv_x = utils_tf.clip_by_value(adv_x, self.clip_min,
                                               self.clip_max)

            return i + 1, adv_x
Пример #5
0
    def attack(self, x):
        """
        :param x: A tensor with the input image.
        """

        if self.rand_init:
            eta = tf.random_uniform(tf.shape(x),
                                    -self.eps,
                                    self.eps,
                                    dtype=self.tf_dtype)
            eta = clip_eta(eta, self.norm, self.eps)
        else:
            eta = tf.zeros_like(x)

        first = True
        for i in range(self.nb_iter):
            loss, eta = self.attack_single_step(x, eta, first)
            print('iter: ', i, loss)
            first = False

        adv_x = x + eta
        if self.clip_min is not None and self.clip_max is not None:
            adv_x = tf.clip_by_value(adv_x, self.clip_min, self.clip_max)

        return adv_x
Пример #6
0
    def attack(self, x, y):
        """
        This method creates a symbolic graph that given an input image,
        first randomly perturbs the image. The
        perturbation is bounded to an epsilon ball. Then multiple steps of
        gradient descent is performed to increase the probability of a target
        label or decrease the probability of the ground-truth label.

        :param x: A tensor with the input image.
        """
        from cleverhans.utils_tf import clip_eta

        if self.rand_init:
            eta = tf.random_uniform(tf.shape(x),
                                    -self.eps,
                                    self.eps,
                                    dtype=self.tf_dtype)
            eta = clip_eta(eta, self.ord, self.eps)
        else:
            eta = tf.zeros_like(x)

        def cond(i, _):
            return tf.less(i, self.nb_iter)

        def body(i, e):
            new_eta = self.attack_single_step(x, e, y)
            return i + 1, new_eta

        _, eta = tf.while_loop(cond, body, [tf.zeros([]), eta], back_prop=True)

        adv_x = x + eta
        if self.clip_min is not None and self.clip_max is not None:
            adv_x = tf.clip_by_value(adv_x, self.clip_min, self.clip_max)

        return adv_x
Пример #7
0
 def test_clip_eta_goldilocks(self):
   """test_clip_eta_goldilocks: Test that the clipping handles perturbations
   that are too small, just right, and too big correctly"""
   eta = tf.constant([[2.], [3.], [4.]])
   self.assertTrue(eta.dtype == tf.float32, eta.dtype)
   eps = 3.
   for ord_arg in [np.inf, 1, 2]:
     for sign in [-1., 1.]:
       try:
         clipped = utils_tf.clip_eta(eta * sign, ord_arg, eps)
       except NotImplementedError:
         # Don't raise SkipTest, it skips the rest of the for loop
         continue
       clipped_value = self.sess.run(clipped)
       gold = sign * np.array([[2.], [3.], [3.]])
       self.assertClose(clipped_value, gold)
       grad, = tf.gradients(clipped, eta)
       grad_value = self.sess.run(grad)
       # Note: the second 1. is debatable (the left-sided derivative
       # and the right-sided derivative do not match, so formally
       # the derivative is not defined). This test makes sure that
       # we at least handle this oddity consistently across all the
       # argument values we test
       gold = sign * np.array([[1.], [1.], [0.]])
       self.assertClose(grad_value, gold)
  def generate(self, x, g, **kwargs):
    """
    Generate symbolic graph for adversarial examples and return.

    :param x: The model's symbolic inputs.
    :param g: The target value of the symbolic representation
    :param kwargs: See `parse_params`
    """

    # Parse and save attack-specific parameters
    assert self.parse_params(**kwargs)

    g_feat = self.model.fprop(g)[self.layer]

    # Initialize loop variables
    eta = tf.random_uniform(tf.shape(x), -self.eps, self.eps, dtype=self.tf_dtype)
    eta = clip_eta(eta, self.ord, self.eps)

    def cond(i, _):
      return tf.less(i, self.nb_iter)

    def body(i, e):
      new_eta = self.attack_single_step(x, e, g_feat)
      return i + 1, new_eta

    _, eta = tf.while_loop(cond, body, (tf.zeros([]), eta), back_prop=True)

    # Define adversarial example (and clip if necessary)
    adv_x = x + eta
    if self.clip_min is not None and self.clip_max is not None:
      adv_x = tf.clip_by_value(adv_x, self.clip_min, self.clip_max)

    return adv_x
Пример #9
0
    def attack_single_step(self, x, eta, y):
        """
        Given the original image and the perturbation computed so far, computes
        a new perturbation.

        :param x: A tensor with the original input.
        :param eta: A tensor the same shape as x that holds the perturbation.
        :param y: A tensor with the target labels or ground-truth labels.
        """
        import tensorflow as tf
        from cleverhans.utils_tf import model_loss, clip_eta

        adv_x = x + eta
        preds = self.model.get_probs(adv_x)
        loss = model_loss(y, preds)
        loss_vector = model_loss(y, preds, mean=False)
        if self.targeted:
            loss = -loss
        grad, = tf.gradients(loss, adv_x)
        scaled_signed_grad = self.eps_iter * tf.sign(grad)
        adv_x = adv_x + scaled_signed_grad
        if self.clip_min is not None and self.clip_max is not None:
            adv_x = tf.clip_by_value(adv_x, self.clip_min, self.clip_max)
        eta = adv_x - x
        eta = clip_eta(eta, self.ord, self.eps)
        return eta, loss, loss_vector
Пример #10
0
    def update_and_clip(ax, perturbation):
      ax = ax + perturbation
      ax = x + utils_tf.clip_eta(ax - x, self.ord, self.eps)

      if self.clip_min is not None and self.clip_max is not None:
        ax = utils_tf.clip_by_value(ax, self.clip_min, self.clip_max)

      ax = tf.stop_gradient(ax)

      return ax
        def body(i, ax, m):
            """Do a momentum step"""
            if loss_type == 'softmax':
                logits = self.model.get_logits(ax)
                early_stop = False
                if early_stop:
                    # i = tf.cond(tf.less(loss, early_stop_loss_threshold), lambda: self.nb_iter, lambda: i)
                    max_y = tf.argmax(y, axis=-1, name='max_y')
                    max_logits = tf.argmax(logits, axis=-1, name='max_logits')
                    eq = tf.equal(max_y, max_logits)
                    eq = tf.cast(eq, dtype=tf.float32)
                    cnt_eq = tf.reduce_sum(1 - eq)
                    # len_txt = max_y.get_shape().as_list()[1]
                    tot_eq = tf.equal(cnt_eq, 0)
                    i = tf.cond(tot_eq, lambda: self.nb_iter, lambda: i)
                loss = softmax_cross_entropy_with_logits(labels=y, logits=logits)
                loss = tf.reduce_mean(loss, name='softmax_loss')
            elif loss_type == "ctc":
                time_major_logits, output_seq_len = self.model.get_logits(ax)
                ctc_loss = tf.nn.ctc_loss(labels=y,
                                          inputs=time_major_logits,
                                          sequence_length=output_seq_len,
                                          time_major=True,
                                          ctc_merge_repeated=True,
                                          ignore_longer_outputs_than_inputs=True)
                loss = tf.reduce_mean(ctc_loss, name='ctc_loss')

            if targeted:
                loss = -loss

            # Define gradient of loss wrt input
            grad, = tf.gradients(loss, ax)

            # Normalize current gradient and add it to the accumulated gradient
            red_ind = list(range(1, len(grad.get_shape())))
            avoid_zero_div = tf.cast(1e-12, grad.dtype)
            grad = grad / tf.maximum(avoid_zero_div, tf.reduce_mean(tf.abs(grad), red_ind, keepdims=True))
            m = self.decay_factor * m + grad

            # optimal_perturbation = optimize_linear(m, self.eps_iter, self.ord)
            optimal_perturbation = optimize_linear_pos(m, self.eps_iter, self.ord, self.pert_type)
            optimal_perturbation = tf.multiply(optimal_perturbation, self.mask, name="op_multiply")
            if self.ord == 1:
                raise NotImplementedError("This attack hasn't been tested for ord=1. It's not clear that FGM makes a good inner loop step "
                                          "for iterative optimization since it updates just one coordinate at a time.")

            # Update and clip adversarial example in current iteration
            ax = ax + optimal_perturbation
            ax = x + utils_tf.clip_eta(ax - x, self.ord, self.eps)

            if self.clip_min is not None and self.clip_max is not None:
                ax = utils_tf.clip_by_value(ax, self.clip_min, self.clip_max)

            ax = tf.stop_gradient(ax)
            return i + 1, ax, m
Пример #12
0
        def body(i, e):
            adv_x = FGM.generate(x + e, **fgm_params)

            # Clipping perturbation according to clip_min and clip_max
            if self.clip_min is not None and self.clip_max is not None:
                adv_x = tf.clip_by_value(adv_x, self.clip_min, self.clip_max)

            # Clipping perturbation eta to self.ord norm ball
            eta = adv_x - x
            eta = clip_eta(eta, self.ord, self.eps)
            return i + 1, eta
Пример #13
0
 def test_clip_eta_norm_0(self):
     """test_clip_eta_norm_0: Test that `clip_eta` still works when the norm of
 `eta` is zero. This used to cause a divide by zero for ord 1 and ord 2."""
     eta = tf.zeros((5, 3))
     assert eta.dtype == tf.float32, eta.dtype
     eps = .25
     for ord_arg in [np.inf, 1, 2]:
         clipped = clip_eta(eta, ord_arg, eps)
         clipped = self.sess.run(clipped)
         assert not np.any(np.isinf(clipped))
         assert not np.any(np.isnan(clipped)), (ord_arg, clipped)
Пример #14
0
    def attack(self, x, y):
        """
        This method creates a symbolic graph that given an input image,
        first randomly perturbs the image. The
        perturbation is bounded to an epsilon ball. Then multiple steps of
        gradient descent is performed to increase the probability of a target
        label or decrease the probability of the ground-truth label.

        :param x: A tensor with the input image.
        """
        import tensorflow as tf
        from cleverhans.utils_tf import clip_eta

        best_loss = None
        best_eta = None

        print("Number of steps running", self.nb_restarts + 1)

        for restart_step in range(0, self.nb_restarts + 1):
            if self.rand_init:
                eta = tf.random_uniform(tf.shape(x), -self.eps, self.eps)
                eta = clip_eta(eta, self.ord, self.eps)
            else:
                eta = tf.zeros_like(x)
            #eta = tf.Print(eta, [eta[0:2,0:3],restart_step], "Clipped Eta drawn on this step")

            for i in range(self.nb_iter):
                eta, loss, loss_vec = self.attack_single_step(x, eta, y)

            if best_loss == None:
                #print("first time in loop")
                best_loss = loss_vec
                best_eta = eta
            else:
                #print("second time in loop")
                switch_cond = tf.less(best_loss, loss_vec)
                new_best_loss = tf.where(switch_cond, loss_vec * 1.0,
                                         best_loss * 1.0)
                new_best_eta = tf.where(switch_cond, eta * 1.0, best_eta * 1.0)
                #best_loss = tf.Print(best_loss, [best_loss[0:10], restart_step], "This is the best loss")
                #best_eta = tf.Print(best_eta, [best_loss[0:5],loss_vec[0:5],new_best_loss[0:5],best_eta[0:3,0,0,0],eta[0:3,0,0,0],new_best_eta[0:3,0,0,0],tf.shape(eta),restart_step], "Best_Loss, Loss_vec, New_Best_Loss, Best_eta,Eta_Curr, New_Best_Eta, Eta_Shape")
                best_loss = new_best_loss * 1.0
                best_eta = new_best_eta * 1.0

        adv_x = x + best_eta
        if self.clip_min is not None and self.clip_max is not None:
            adv_x = tf.clip_by_value(adv_x, self.clip_min, self.clip_max)

        return adv_x
Пример #15
0
    def body(i, adv_x):
      adv_x = FGM.generate(adv_x, **fgm_params)

      # Clipping perturbation eta to self.ord norm ball
      eta = adv_x - x
      eta = clip_eta(eta, self.ord, self.eps)
      adv_x = x + eta

      # Redo the clipping.
      # FGM already did it, but subtracting and re-adding eta can add some
      # small numerical error.
      if self.clip_min is not None or self.clip_max is not None:
        adv_x = utils_tf.clip_by_value(adv_x, self.clip_min, self.clip_max)

      return i + 1, adv_x
Пример #16
0
 def test_clip_eta_norm_0(self):
   """test_clip_eta_norm_0: Test that `clip_eta` still works when the
   norm of `eta` is zero. This used to cause a divide by zero for ord
   1 and ord 2."""
   eta = tf.zeros((5, 3))
   self.assertTrue(eta.dtype == tf.float32, eta.dtype)
   eps = .25
   for ord_arg in [np.inf, 1, 2]:
     try:
       clipped = utils_tf.clip_eta(eta, ord_arg, eps)
     except NotImplementedError:
       # Don't raise SkipTest, it skips the rest of the for loop
       continue
     clipped = self.sess.run(clipped)
     self.assertTrue(not np.any(np.isinf(clipped)))
     self.assertTrue(not np.any(np.isnan(clipped)), (ord_arg, clipped))
  def attack_single_step(self, x, eta, g_feat):
    """
    TensorFlow implementation of the Fast Feature Gradient. This is a
    single step attack similar to Fast Gradient Method that attacks an
    internal representation.

    :param x: the input placeholder
    :param eta: A tensor the same shape as x that holds the perturbation.
    :param g_feat: model's internal tensor for guide
    :return: a tensor for the adversarial example
    """

    adv_x = x + eta
    a_feat = self.model.fprop(adv_x)[self.layer]

    # feat.shape = (batch, c) or (batch, w, h, c)
    axis = list(range(1, len(a_feat.shape)))

    # Compute loss
    # This is a targeted attack, hence the negative sign
    loss = -reduce_sum(tf.square(a_feat - g_feat), axis)

    # Define gradient of loss wrt input
    grad, = tf.gradients(loss, adv_x)

    # Multiply by constant epsilon
    scaled_signed_grad = self.eps_iter * tf.sign(grad)

    # Add perturbation to original example to obtain adversarial example
    adv_x = adv_x + scaled_signed_grad

    # If clipping is needed,
    # reset all values outside of [clip_min, clip_max]
    if (self.clip_min is not None) and (self.clip_max is not None):
      adv_x = tf.clip_by_value(adv_x, self.clip_min, self.clip_max)

    adv_x = tf.stop_gradient(adv_x)

    eta = adv_x - x
    eta = clip_eta(eta, self.ord, self.eps)

    return eta
Пример #18
0
 def test_clip_eta_goldilocks(self):
     # Test that the clipping handles perturbations that are
     # too small, just right, and too big correctly
     eta = tf.constant([[2.], [3.], [4.]])
     assert eta.dtype == tf.float32, eta.dtype
     eps = 3.
     for ord_arg in [np.inf, 1, 2]:
         for sign in [-1., 1.]:
             clipped = clip_eta(eta * sign, ord_arg, eps)
             clipped_value = self.sess.run(clipped)
             gold = sign * np.array([[2.], [3.], [3.]])
             self.assertClose(clipped_value, gold)
             grad, = tf.gradients(clipped, eta)
             grad_value = self.sess.run(grad)
             # Note: the second 1. is debatable (the left-sided derivative
             # and the right-sided derivative do not match, so formally
             # the derivative is not defined). This test makes sure that
             # we at least handle this oddity consistently across all the
             # argument values we test
             gold = sign * np.array([[1.], [1.], [0.]])
             assert np.allclose(grad_value, gold)
Пример #19
0
        def body(i, ax, m):
            logits = self.model.get_logits(ax)
            loss = softmax_cross_entropy_with_logits(labels=y, logits=logits)
            if targeted:
                loss = -loss

            # print("body", loss, ax)

            # Define gradient of loss wrt input
            grad, = tf.gradients(loss, ax)

            grad = self.grad_smooth(grad)

            # Normalize current gradient and add it to the accumulated gradient
            grad = self.grad_norm(grad)

            #momentom
            m = self.decay_factor * m + grad

            m = self.grad_norm(m)

            optimal_perturbation = optimize_linear(m, self.eps_iter, self.ord)
            if self.ord == 1:
                raise NotImplementedError(
                    "This attack hasn't been tested for ord=1."
                    "It's not clear that FGM makes a good inner "
                    "loop step for iterative optimization since "
                    "it updates just one coordinate at a time.")

            # Update and clip adversarial example in current iteration
            ax = ax + optimal_perturbation
            ax = x + utils_tf.clip_eta(ax - x, self.ord, self.eps)

            if self.clip_min is not None and self.clip_max is not None:
                ax = utils_tf.clip_by_value(ax, self.clip_min, self.clip_max)

            ax = tf.stop_gradient(ax)

            return i + 1, ax, m
Пример #20
0
def overwrite_fastfeature(attack, x, g, eta, **kwargs):
    # overwrite cleverhans generate function for fastfeatureattack to
    # allow eta as an input
    from cleverhans.utils_tf import clip_eta

    # Parse and save attack-specific parameters
    assert attack.parse_params(**kwargs)

    g_feat = attack.model.get_layer(g, attack.layer)

    # Initialize loop variables
    eta = tf.Variable(tf.convert_to_tensor(eta, np.float32))
    eta = clip_eta(eta, attack.ord, attack.eps)

    for i in range(attack.nb_iter):
        eta = attack.attack_single_step(x, eta, g_feat)

    # Define adversarial example (and clip if necessary)
    adv_x = x + eta
    if attack.clip_min is not None and attack.clip_max is not None:
        adv_x = tf.clip_by_value(adv_x, attack.clip_min, attack.clip_max)

    return adv_x
Пример #21
0
    def attack_single_step(self, x, eta, y):
        """
        Given the original image and the perturbation computed so far, computes
        a new perturbation.

        :param x: A tensor with the original input.
        :param eta: A tensor the same shape as x that holds the perturbation.
        :param y: A tensor with the target labels or ground-truth labels.
        """
        from cleverhans.utils_tf import clip_eta

        adv_x = x + eta
        input_batch = tf.concat([x, adv_x], 0)
        logits = self.model.get_logits(input_batch)

        loss = self.loss()
        grad, = tf.gradients(loss, adv_x)
        scaled_signed_grad = self.eps_iter * tf.sign(grad)
        adv_x = adv_x + scaled_signed_grad
        if self.clip_min is not None and self.clip_max is not None:
            adv_x = tf.clip_by_value(adv_x, self.clip_min, self.clip_max)
        eta = adv_x - x
        eta = clip_eta(eta, self.ord, self.eps)
        return eta
Пример #22
0
    def generate(self, x, **kwargs):
        """
        Generate symbolic graph for adversarial examples and return.

        :param x: The model's symbolic inputs.
        :param eps: (optional float) maximum distortion of adversarial example
                    compared to original input
        :param eps_iter: (optional float) step size for each attack iteration
        :param nb_iter: (optional int) Number of attack iterations.
        :param rand_init: (optional) Whether to use random initialization
        :param y: (optional) A tensor with the true class labels
            NOTE: do not use smoothed labels here
        :param y_target: (optional) A tensor with the labels to target. Leave
                            y_target=None if y is also set. Labels should be
                            one-hot-encoded.
            NOTE: do not use smoothed labels here
        :param ord: (optional) Order of the norm (mimics Numpy).
                    Possible values: np.inf, 1 or 2.
        :param clip_min: (optional float) Minimum input component value
        :param clip_max: (optional float) Maximum input component value
        """
        # Parse and save attack-specific parameters
        assert self.parse_params(**kwargs)

        # Initialize loop variables
        if self.rand_init:
            eta = tf.random_uniform(tf.shape(x),
                                    -self.rand_minmax,
                                    self.rand_minmax,
                                    dtype=self.tf_dtype)
        else:
            eta = tf.zeros(tf.shape(x))
        eta = clip_eta(eta, self.ord, self.eps)

        # Fix labels to the first model predictions for loss computation
        model_preds = self.model.get_output(x)
        preds_max = reduce_max(model_preds, 1, keepdims=True)
        if self.y_target is not None:
            y = self.y_target
            targeted = True
        elif self.y is not None:
            y = self.y
            targeted = False
        else:
            y = tf.to_float(tf.equal(model_preds, preds_max))
            y = tf.stop_gradient(y)
            targeted = False

        y_kwarg = 'y_target' if targeted else 'y'
        fgm_params = {
            'eps': self.eps_iter,
            y_kwarg: y,
            'ord': self.ord,
            'clip_min': self.clip_min,
            'clip_max': self.clip_max
        }

        # Use getattr() to avoid errors in eager execution attacks
        FGM = self.FGM_CLASS(self.model,
                             sess=getattr(self, 'sess', None),
                             dtypestr=self.dtypestr)

        def cond(i, _):
            return tf.less(i, self.nb_iter)

        def body(i, e):
            adv_x = FGM.generate(x + e, **fgm_params)

            # Clipping perturbation according to clip_min and clip_max
            if self.clip_min is not None and self.clip_max is not None:
                adv_x = tf.clip_by_value(adv_x, self.clip_min, self.clip_max)

            # Clipping perturbation eta to self.ord norm ball
            eta = adv_x - x
            eta = clip_eta(eta, self.ord, self.eps)
            return i + 1, eta

        _, eta = tf.while_loop(cond, body, [tf.zeros([]), eta], back_prop=True)

        # Define adversarial example (and clip if necessary)
        adv_x = x + eta
        if self.clip_min is not None or self.clip_max is not None:
            assert self.clip_min is not None and self.clip_max is not None
            adv_x = tf.clip_by_value(adv_x, self.clip_min, self.clip_max)

        asserts = []

        # Asserts run only on CPU.
        # When multi-GPU eval code tries to force all PGD ops onto GPU, this
        # can cause an error.
        with tf.device("/CPU:0"):
            asserts.append(tf.assert_less_equal(self.eps_iter, self.eps))
            if self.ord == np.inf and self.clip_min is not None:
                # The 1e-6 is needed to compensate for numerical error.
                # Without the 1e-6 this fails when e.g. eps=.2, clip_min=.5, clip_max=.7
                asserts.append(
                    tf.assert_less_equal(self.eps,
                                         1e-6 + self.clip_max - self.clip_min))

        if self.sanity_checks:
            with tf.control_dependencies(asserts):
                adv_x = tf.identity(adv_x)

        return adv_x
Пример #23
0
    def attack_single_step(self, x, eta, y):
        """
        Given the original image and the perturbation computed so far, computes
        a new perturbation.

        :param x: A tensor with the original input.
        :param eta: A tensor the same shape as x that holds the perturbation.
        :param y: A tensor with the target labels or ground-truth labels.
        """
        import tensorflow as tf
        from cleverhans.utils_tf import clip_eta

        adv_x = x + eta
        preds = self.model.get_logits(adv_x)  # shape (K, N, dimY)
        loss = self.model_loss(y, preds)  # see Carlini's recipe
        if self.targeted:
            loss = -loss

        # now forms the predicted output
        if len(preds.get_shape().as_list()) == 2:
            logits = preds
        else:
            logits = self.combine(preds)

        # loss to evade marginal detection
        def logsumexp(x):
            x_max = tf.expand_dims(tf.reduce_max(x, 1), 1)
            res = tf.log(
                tf.clip_by_value(tf.reduce_sum(tf.exp(x - x_max), 1), 1e-10,
                                 np.inf))
            return res + x_max[:, 0]

        logpx = logsumexp(logits)
        loss_detect_marginal = -tf.reduce_mean(
            tf.nn.relu(-logpx - self.delta_marginal))

        # loss to evade logit detection
        y_pred = tf.argmax(logits, 1)
        loss_detect_logit = tf.nn.relu(-logits - self.delta_logit)
        loss_detect_logit = -tf.reduce_mean(
            tf.gather(loss_detect_logit, y_pred, axis=1))

        # loss to evade kl detection
        N = logits.get_shape().as_list()[0]
        logits_normalised = logits - tf.expand_dims(logsumexp(logits), 1)
        kl = tf.reduce_sum(
            self.kl_prob_vec *
            (tf.log(self.kl_prob_vec) - tf.expand_dims(logits_normalised, 1)),
            2)
        loss_detect_kl = tf.nn.relu(kl - self.delta_kl)
        loss_detect_kl = -tf.reduce_mean(
            tf.gather(loss_detect_kl, y_pred, axis=1))

        #loss_detect = loss_detect_marginal
        loss_detect = loss_detect_logit
        #loss_detect = loss_detect_kl

        # combine
        print('using lambda_detect = %.2f' % self.detection_lambda)
        loss += self.detection_lambda * loss_detect

        grad, = tf.gradients(loss, adv_x)
        scaled_signed_grad = self.eps_iter * tf.sign(grad)
        adv_x = adv_x + scaled_signed_grad
        if self.clip_min is not None and self.clip_max is not None:
            adv_x = tf.clip_by_value(adv_x, self.clip_min, self.clip_max)
        eta = adv_x - x
        eta = clip_eta(eta, self.ord, self.eps)
        return eta
Пример #24
0
    def attack(self, x, y_p, **kwargs):
        """
        This method creates a symoblic graph of the MadryEtAl attack on
        multiple GPUs. The graph is created on the first n GPUs.

        Stop gradient is needed to get the speed-up. This prevents us from
        being able to back-prop through the attack.

        :param x: A tensor with the input image.
        :param y_p: Ground truth label or predicted label.
        :return: Two lists containing the input and output tensors of each GPU.
        """
        inputs = []
        outputs = []

        # Create the initial random perturbation
        device_name = '/gpu:0'
        self.model.set_device(device_name)
        with tf.device(device_name):
            with tf.variable_scope('init_rand'):
                if self.rand_init:
                    eta = tf.random_uniform(tf.shape(x), -self.eps, self.eps)
                    eta = clip_eta(eta, self.ord, self.eps)
                    eta = tf.stop_gradient(eta)
                else:
                    eta = tf.zeros_like(x)

        # TODO: Break the graph only nGPU times instead of nb_iter times.
        # The current implementation by the time an adversarial example is
        # used for training, the weights of the model have changed nb_iter
        # times. This can cause slower convergence compared to the single GPU
        # adversarial training.
        for i in range(self.nb_iter):
            # Create the graph for i'th step of attack
            inputs += [OrderedDict()]
            outputs += [OrderedDict()]
            device_name = x.device
            self.model.set_device(device_name)
            with tf.device(device_name):
                with tf.variable_scope('step%d' % i):
                    if i > 0:
                        # Clone the variables to separate the graph of 2 GPUs
                        x = clone_variable('x', x)
                        y_p = clone_variable('y_p', y_p)
                        eta = clone_variable('eta', eta)

                    inputs[i]['x'] = x
                    inputs[i]['y_p'] = y_p
                    outputs[i]['x'] = x
                    outputs[i]['y_p'] = y_p
                    inputs[i]['eta'] = eta

                    eta = self.attack_single_step(x, eta, y_p)

                    if i < self.nb_iter-1:
                        outputs[i]['eta'] = eta
                    else:
                        # adv_x, not eta is the output of the last step
                        adv_x = x + eta
                        if (self.clip_min is not None
                                and self.clip_max is not None):
                            adv_x = tf.clip_by_value(adv_x, self.clip_min,
                                                     self.clip_max)
                        adv_x = tf.stop_gradient(adv_x, name='adv_x')
                        outputs[i]['adv_x'] = adv_x

        return inputs, outputs
Пример #25
0
  def generate(self, x, **kwargs):
    assert self.parse_params(**kwargs)

    asserts = []

    if self.clip_min is not None:
      asserts.append(utils_tf.assert_greater_equal(
        x, tf.cast(self.clip_min,x.dtype)))

    if self.clip_max is not None:
      asserts.append(utils_tf.assert_less_equal(
        x, tf.cast(self.clip_max, x.dtype)))

    m_cache = tf.zeros_like(x)
    v_cache = tf.zeros_like(x)
    adv_x = x

    y, _nb_classes = self.get_or_guess_labels(x, kwargs)
    y = y / reduce_sum(y, 1, keepdims=True)
    targeted = (self.y_target is not None)

    def save_batch(directory, images, labels, iteration, batch_idx):
      for idx, (image, label) in enumerate(zip(images, labels)):
        filename = "id{}_b{}_it{}_l{}.png".format(idx, batch_idx,
                                                  iteration, np.argmax(label))
        save_image_np(join(directory, filename), image)

    for i in range(self.nb_iter):
      self.logger.debug("Starting #{} iteration".format(i + 1))

      logits = self.model.get_logits(adv_x)
      loss = softmax_cross_entropy_with_logits(labels=y, logits=logits)
      if targeted:
        loss = -loss

      grad, = tf.gradients(loss, adv_x)

      red_ind = list(range(1, len(grad.get_shape())))
      avoid_zero_div = tf.cast(1e-8, grad.dtype)
      grad = grad / tf.maximum(
        avoid_zero_div,
        reduce_mean(tf.abs(grad), red_ind, keepdims=True))

      m_cache = self.betha1 * m_cache + (1 - self.betha1) * grad
      v_cache = self.betha2 * v_cache + (1 - self.betha2) * tf.square(grad)
      update = tf.divide(m_cache, tf.sqrt(v_cache + avoid_zero_div))

      optimal_perturbation = optimize_linear(update, self.eps_iter, self.ord)
      if self.ord == 1:
        raise NotImplementedError("This attack hasn't been tested for ord=1."
                                  "It's not clear that FGM makes a good inner "
                                  "loop step for iterative optimization since "
                                  "it updates just one coordinate at a time.")

      adv_x = adv_x + optimal_perturbation
      adv_x = x + utils_tf.clip_eta(adv_x - x, self.ord, self.eps)

      if self.clip_min is not None and self.clip_max is not None:
        adv_x = utils_tf.clip_by_value(adv_x, self.clip_min, self.clip_max)

      adv_x = tf.stop_gradient(adv_x)

      if self.sanity_checks:
        with tf.control_dependencies(asserts):
          adv_x = tf.identity(adv_x)

      with self.sess.as_default():
        self.sess.run(self.init_op)
        for batch in range(self.nb_batches):
          adv_x_np, y_np = self.sess.run([adv_x, y])
          self.logger.debug("Saving attacked batch #{}".format(batch + 1))
          save_batch(self.adv_dir, adv_x_np, y_np, i, batch)
    def generate(self, x, **kwargs):
        """
    Generate symbolic graph for adversarial examples and return.

    :param x: The model's symbolic inputs.
    :param kwargs: See `parse_params`
    """
        # Parse and save attack-specific parameters
        assert self.parse_params(**kwargs)

        asserts = []

        # If a data range was specified, check that the input was in that range
        if self.clip_min is not None:
            asserts.append(
                utils_tf.assert_greater_equal(x,
                                              tf.cast(self.clip_min, x.dtype)))

        if self.clip_max is not None:
            asserts.append(
                utils_tf.assert_less_equal(x, tf.cast(self.clip_max, x.dtype)))

        # Initialize loop variables
        if self.rand_init:
            eta = random_lp_vector(tf.shape(x),
                                   ord=1,
                                   eps=tf.cast(self.eps, x.dtype),
                                   dtype=x.dtype)
        else:
            eta = tf.zeros(tf.shape(x))

        # Clip eta
        eta = clip_eta(eta, ord=1, eps=self.eps)
        adv_x = x + eta
        if self.clip_min is not None or self.clip_max is not None:
            adv_x = utils_tf.clip_by_value(adv_x, self.clip_min, self.clip_max)

        if self.y_target is not None:
            y = self.y_target
            targeted = True
        elif self.y is not None:
            y = self.y
            targeted = False
        else:
            model_preds = self.model.get_probs(x)
            preds_max = tf.reduce_max(model_preds, 1, keepdims=True)
            y = tf.to_float(tf.equal(model_preds, preds_max))
            y = tf.stop_gradient(y)
            targeted = False
            del model_preds

        y_kwarg = 'y_target' if targeted else 'y'

        def cond(i, _):
            """Iterate until requested number of iterations is completed"""
            return tf.less(i, self.nb_iter)

        def body(i, adv_x):
            """Do a projected gradient step"""

            labels, _ = self.get_or_guess_labels(adv_x, {y_kwarg: y})
            logits = self.model.get_logits(adv_x)

            adv_x = sparse_l1_descent(adv_x,
                                      logits,
                                      y=labels,
                                      eps=self.eps_iter,
                                      q=self.grad_sparsity,
                                      clip_min=self.clip_min,
                                      clip_max=self.clip_max,
                                      clip_grad=self.clip_grad,
                                      targeted=(self.y_target is not None),
                                      sanity_checks=self.sanity_checks)

            # Clipping perturbation eta to the l1-ball
            eta = adv_x - x
            eta = clip_eta(eta, ord=1, eps=self.eps)
            adv_x = x + eta

            # Redo the clipping.
            # Subtracting and re-adding eta can add some small numerical error.
            if self.clip_min is not None or self.clip_max is not None:
                adv_x = utils_tf.clip_by_value(adv_x, self.clip_min,
                                               self.clip_max)

            return i + 1, adv_x

        _, adv_x = tf.while_loop(cond,
                                 body, (tf.zeros([]), adv_x),
                                 back_prop=True,
                                 maximum_iterations=self.nb_iter)

        # Asserts run only on CPU.
        # When multi-GPU eval code tries to force all PGD ops onto GPU, this
        # can cause an error.
        common_dtype = tf.float32
        asserts.append(
            utils_tf.assert_less_equal(
                tf.cast(self.eps_iter, dtype=common_dtype),
                tf.cast(self.eps, dtype=common_dtype)))

        if self.sanity_checks:
            with tf.control_dependencies(asserts):
                adv_x = tf.identity(adv_x)

        return adv_x
Пример #27
0
  def attack(self, x, y_p, **kwargs):
    """
    This method creates a symoblic graph of the MadryEtAl attack on
    multiple GPUs. The graph is created on the first n GPUs.

    Stop gradient is needed to get the speed-up. This prevents us from
    being able to back-prop through the attack.

    :param x: A tensor with the input image.
    :param y_p: Ground truth label or predicted label.
    :return: Two lists containing the input and output tensors of each GPU.
    """
    inputs = []
    outputs = []

    # Create the initial random perturbation
    device_name = '/gpu:0'
    self.model.set_device(device_name)
    with tf.device(device_name):
      with tf.variable_scope('init_rand'):
        if self.rand_init:
          eta = tf.random_uniform(tf.shape(x), -self.eps, self.eps)
          eta = clip_eta(eta, self.ord, self.eps)
          eta = tf.stop_gradient(eta)
        else:
          eta = tf.zeros_like(x)

    # TODO: Break the graph only nGPU times instead of nb_iter times.
    # The current implementation by the time an adversarial example is
    # used for training, the weights of the model have changed nb_iter
    # times. This can cause slower convergence compared to the single GPU
    # adversarial training.
    for i in range(self.nb_iter):
      # Create the graph for i'th step of attack
      inputs += [OrderedDict()]
      outputs += [OrderedDict()]
      device_name = x.device
      self.model.set_device(device_name)
      with tf.device(device_name):
        with tf.variable_scope('step%d' % i):
          if i > 0:
            # Clone the variables to separate the graph of 2 GPUs
            x = clone_variable('x', x)
            y_p = clone_variable('y_p', y_p)
            eta = clone_variable('eta', eta)

          inputs[i]['x'] = x
          inputs[i]['y_p'] = y_p
          outputs[i]['x'] = x
          outputs[i]['y_p'] = y_p
          inputs[i]['eta'] = eta

          eta = self.attack_single_step(x, eta, y_p)

          if i < self.nb_iter-1:
            outputs[i]['eta'] = eta
          else:
            # adv_x, not eta is the output of the last step
            adv_x = x + eta
            if (self.clip_min is not None and self.clip_max is not None):
              adv_x = tf.clip_by_value(adv_x, self.clip_min, self.clip_max)
            adv_x = tf.stop_gradient(adv_x, name='adv_x')
            outputs[i]['adv_x'] = adv_x

    return inputs, outputs
    def generate(self, x, **kwargs):
        """
    Generate symbolic graph for adversarial examples and return.
    :param x: The model's symbolic inputs.
    :param eps: (optional float) maximum distortion of adversarial example
                compared to original input
    :param eps_iter: (optional float) step size for each attack iteration
    :param nb_iter: (optional int) Number of attack iterations.
    :param rand_init: (optional) Whether to use random initialization
    :param y: (optional) A tensor with the true class labels
      NOTE: do not use smoothed labels here
    :param y_target: (optional) A tensor with the labels to target. Leave
                     y_target=None if y is also set. Labels should be
                     one-hot-encoded.
      NOTE: do not use smoothed labels here
    :param ord: (optional) Order of the norm (mimics Numpy).
                Possible values: np.inf, 1 or 2.
    :param clip_min: (optional float) Minimum input component value
    :param clip_max: (optional float) Maximum input component value
    """
        # Parse and save attack-specific parameters
        assert self.parse_params(**kwargs)

        # Initialize loop variables
        if self.rand_init:
            eta = tf.random_uniform(tf.shape(x),
                                    tf.cast(-self.rand_minmax, x.dtype),
                                    tf.cast(self.rand_minmax, x.dtype),
                                    dtype=x.dtype)
        else:
            eta = tf.zeros(tf.shape(x))

        # Clip eta
        eta = clip_eta(eta, self.ord, self.eps)
        adv_x = x + eta
        if self.clip_min is not None or self.clip_max is not None:
            adv_x = utils_tf.clip_by_value(adv_x, self.clip_min, self.clip_max)

        if self.y_target is not None:
            y = self.y_target
            targeted = True
        elif self.y is not None:
            y = self.y
            targeted = False
        else:
            model_preds = self.model.get_probs(x)
            preds_max = reduce_max(model_preds, 1, keepdims=True)
            y = tf.to_float(tf.equal(model_preds, preds_max))
            y = tf.stop_gradient(y)
            targeted = False
            del model_preds

        y_kwarg = 'y_target' if targeted else 'y'
        fgm_params = {
            'eps': self.eps_iter,
            y_kwarg: y,
            'ord': self.ord,
            'clip_min': self.clip_min,
            'clip_max': self.clip_max,
            'loss_func': self.loss_func
        }
        if self.ord == 1:
            raise NotImplementedError(
                "It's not clear that FGM is a good inner loop"
                " step for PGD when ord=1, because ord=1 FGM "
                " changes only one pixel at a time. We need "
                " to rigorously test a strong ord=1 PGD "
                "before enabling this feature.")

        # Use getattr() to avoid errors in eager execution attacks
        FGM = self.FGM_CLASS(self.model,
                             sess=getattr(self, 'sess', None),
                             dtypestr=self.dtypestr)

        def cond(i, _):
            return tf.less(i, self.nb_iter)

        def body(i, adv_x):
            #fgm_params['loss_func'] = self.loss_func#(labels=fgm_params['y'], logits=self.model.get_logits(adv_x))
            adv_x = FGM.generate(adv_x, **fgm_params)

            # Clipping perturbation eta to self.ord norm ball
            eta = adv_x - x
            eta = clip_eta(eta, self.ord, self.eps)
            adv_x = x + eta

            # Redo the clipping.
            # FGM already did it, but subtracting and re-adding eta can add some
            # small numerical error.
            if self.clip_min is not None or self.clip_max is not None:
                adv_x = utils_tf.clip_by_value(adv_x, self.clip_min,
                                               self.clip_max)

            return i + 1, adv_x

        _, adv_x = tf.while_loop(cond,
                                 body, [tf.zeros([]), adv_x],
                                 back_prop=True)

        asserts = []

        # Asserts run only on CPU.
        # When multi-GPU eval code tries to force all PGD ops onto GPU, this
        # can cause an error.
        with tf.device("/CPU:0"):
            asserts.append(tf.assert_less_equal(self.eps_iter, self.eps))
            if self.ord == np.inf and self.clip_min is not None:
                # The 1e-6 is needed to compensate for numerical error.
                # Without the 1e-6 this fails when e.g. eps=.2, clip_min=.5,
                # clip_max=.7
                asserts.append(
                    tf.assert_less_equal(self.eps,
                                         1e-6 + self.clip_max - self.clip_min))

        if self.sanity_checks:
            with tf.control_dependencies(asserts):
                adv_x = tf.identity(adv_x)

        return adv_x
Пример #29
0
def pgd_perturb(x,
                y,
                loss_fn,
                y_target=None,
                clip_min=None,
                clip_max=None,
                rand_init=False,
                ord=np.inf,
                eps=0.3,
                eps_iter=0.1,
                rand_minmax=0.3,
                nb_iter=20):
    # changed nb_iter to 20 and eps_iter to 0.1 for higher eps attack
    # Initialize loop variables
    if rand_init:
        eta = tf.random_uniform(tf.shape(x),
                                tf.cast(-rand_minmax, x.dtype),
                                tf.cast(rand_minmax, x.dtype),
                                dtype=x.dtype)
    else:
        eta = tf.zeros(tf.shape(x))

    # Clip eta
    eta = clip_eta(eta, ord, eps)
    adv_x = x + eta
    if clip_min is not None or clip_max is not None:
        adv_x = utils_tf.clip_by_value(adv_x, clip_min, clip_max)

    if y_target is not None:
        y = y_target
        targeted = True
    elif y is not None:
        y = y
        targeted = False
    else:
        raise ValueError
    #    model_preds = self.model.get_probs(x)
    #    preds_max = reduce_max(model_preds, 1, keepdims=True)
    #    y = tf.to_float(tf.equal(model_preds, preds_max))
    #    y = tf.stop_gradient(y)
    #    targeted = False
    #    del model_preds

    y_kwarg = 'y_target' if targeted else 'y'
    fgm_params = {
        'loss_fn': loss_fn,
        'eps': eps_iter,
        y_kwarg: y,
        'ord': ord,
        'clip_min': clip_min,
        'clip_max': clip_max
    }
    if ord == 1:
        raise NotImplementedError(
            "It's not clear that FGM is a good inner loop"
            " step for PGD when ord=1, because ord=1 FGM "
            " changes only one pixel at a time. We need "
            " to rigorously test a strong ord=1 PGD "
            "before enabling this feature.")

    # Use getattr() to avoid errors in eager execution attacks
    #FGM = self.FGM_CLASS(
    #    self.model,
    #    sess=getattr(self, 'sess', None),
    #    dtypestr=self.dtypestr)

    def cond(i, _):
        return tf.less(i, nb_iter)

    def body(i, adv_x):
        adv_x = fgm_perturb(adv_x, **fgm_params)

        # Clipping perturbation eta to self.ord norm ball
        eta = adv_x - x
        eta = clip_eta(eta, ord, eps)
        adv_x = x + eta

        # Redo the clipping.
        # FGM already did it, but subtracting and re-adding eta can add some
        # small numerical error.
        if clip_min is not None or clip_max is not None:
            adv_x = utils_tf.clip_by_value(adv_x, clip_min, clip_max)

        return i + 1, adv_x

    _, adv_x = tf.while_loop(cond,
                             body, (tf.zeros([]), adv_x),
                             back_prop=True,
                             maximum_iterations=nb_iter)

    #if self.sanity_checks:
    #    with tf.control_dependencies(asserts):
    #        adv_x = tf.identity(adv_x)

    return adv_x
Пример #30
0
    def set_parameters(self, params, target_imgs, src_imgs, margin, model,
                       base_imgs, **kwargs):
        """
        Set the parameters specific to our attack
        :param model_type:
        :param loss_type:
        :param targeted:
        :param hinge_loss:
        :param adv_x:
        :param target_imgs:
        :param src_imgs:
        :param margin:
        :param model:
        :return:
        """
        self.model_type = params['model_type']
        self.loss_type = params['loss_type']
        self.TARGET_FLAG = params['targeted_flag']
        self.TV_FLAG = params['tv_flag']
        self.HINGE_FLAG = params['hinge_flag']
        self.target_imgs = target_imgs
        self.src_imgs = src_imgs
        self.margin = margin
        self.model = model
        self.LOSS_IMPL = params['mean_loss']

        if self.model_type == 'small':
            if self.loss_type == 'center':
                boxmin = -1
                boxmax = 1
            elif self.loss_type == 'triplet':
                boxmin = 0
                boxmax = 1
        elif self.model_type == 'large':
            boxmin = 0
            boxmax = 1

        boxmul = (boxmax -
                  boxmin) / 2.  #what is the rationale for this variable name?
        boxplus = (boxmin + boxmax) / 2.

        target_imgs_tanh = tf.tanh(self.target_imgs) * boxmul + boxplus
        src_imgs_tanh = tf.tanh(self.src_imgs) * boxmul + boxplus

        self.outputTarg = self.model.predict(target_imgs_tanh)
        self.outputSelf = self.model.predict(src_imgs_tanh)

        # self.outputSelf = tf.ones([batch_size, src_imgs.shape[0], self.outputSelf.get_shape()[1]]) * self.outputSelf
        # if not self.model_type == 'large':
        #     self.outputSelf = tf.transpose(self.outputSelf, [1, 0, 2])

        # self.outputTarg = tf.ones([batch_size, target_imgs.shape[0], self.outputTarg.get_shape()[1]]) * self.outputTarg
        # if not self.model_type == 'large':
        #     self.outputTarg = tf.transpose(self.outputTarg, [1, 0, 2])
        self.parse_params(**kwargs)
        self.old_x = base_imgs
        self.assign_adv_x = tf.placeholder(self.tf_dtype, self.old_x.shape)
        if self.rand_init:
            self.init_eta = tf.random_uniform(tf.shape(self.old_x),
                                              -self.eps,
                                              self.eps,
                                              dtype=self.tf_dtype)
            self.init_eta = clip_eta(self.init_eta, self.norm, self.eps)
        else:
            self.init_eta = tf.zeros_like(self.old_x)
        self.adv_x = tf.Variable(self.old_x + self.init_eta,
                                 dtype=self.tf_dtype)
        # Making unbounded adv_x bounded
        # def compute_loss(adv_x, x):
        self.newimg = tf.tanh(self.adv_x) * boxmul + boxplus
        self.oldimg = tf.tanh(self.old_x) * boxmul + boxplus

        # model.predict(x) obtains the fixed length embedding of x
        self.outputNew = self.model.predict(
            self.newimg
        )  # this returns the logits, can be pre-computed actually!
        self.outputOld = self.model.predict(self.oldimg)
        self.outputTargMean = tf.reduce_mean(self.outputTarg, axis=0)
        self.outputSelfMean = tf.reduce_mean(self.outputSelf, axis=0)

        if self.LOSS_IMPL == 'embeddingmean':
            self.target_loss = tf.sqrt(
                tf.reduce_sum(tf.square(self.outputNew - self.outputTargMean),
                              [1]))
            self.src_loss = tf.sqrt(
                tf.reduce_sum(tf.square(self.outputNew - self.outputSelfMean),
                              [1]))
            self.orig_loss = tf.sqrt(
                tf.reduce_sum(tf.square(self.outputOld - self.outputSelfMean),
                              [1]))
        else:
            self.target_loss = tf.reduce_mean(tf.reduce_sum(
                tf.square(self.outputNew - self.outputTarg), 1),
                                              axis=0)
            self.src_loss = tf.reduce_mean(tf.reduce_sum(
                tf.square(self.outputNew - self.outputSelf), 1),
                                           axis=0)
            self.orig_loss = tf.reduce_mean(tf.reduce_sum(
                tf.square(self.outputOld - self.outputSelf), 1),
                                            axis=0)

        def ZERO():
            return np.asarray(0., dtype=np.dtype('float32'))

        if self.TARGET_FLAG:

            if self.HINGE_FLAG:
                self.hinge_loss = self.target_loss - self.src_loss + self.margin
                self.hinge_loss = tf.maximum(self.hinge_loss, ZERO())
                self.loss = self.hinge_loss
            else:
                self.loss = self.target_loss

        else:
            self.loss = self.orig_loss - self.src_loss + self.margin
            self.loss = tf.maximum(self.loss, ZERO())

        if not self.TV_FLAG:
            self.loss = -self.loss
        else:
            if self.model_type == 'large':
                transpose_newimg = tf.transpose(self.newimg, (0, 3, 1, 2))
            else:
                transpose_newimg = self.newimg
            self.loss = self.loss + get_tv_loss(transpose_newimg)
            self.loss = -self.loss
        self.grad, = tf.gradients(self.loss, self.adv_x)
        self.scaled_signed_grad = self.eps_iter * tf.sign(self.grad)
        self.adv_x_out = self.adv_x + self.scaled_signed_grad
        if self.clip_min is not None and self.clip_max is not None:
            self.adv_x_out = tf.clip_by_value(self.adv_x_out, self.clip_min,
                                              self.clip_max)
        self.eta = self.adv_x_out - self.old_x
        self.eta = clip_eta(self.eta, self.norm, self.eps)
        self.setup = []
        self.setup.append(self.adv_x.assign(self.assign_adv_x))
Пример #31
0
  def generate(self, x, **kwargs):
    """
    Generate symbolic graph for adversarial examples and return.

    :param x: The model's symbolic inputs.
    :param kwargs: See `parse_params`
    """
    # Parse and save attack-specific parameters
    assert self.parse_params(**kwargs)

    asserts = []

    # If a data range was specified, check that the input was in that range
    if self.clip_min is not None:
      asserts.append(utils_tf.assert_greater_equal(x,
                                                   tf.cast(self.clip_min,
                                                           x.dtype)))

    if self.clip_max is not None:
      asserts.append(utils_tf.assert_less_equal(x,
                                                tf.cast(self.clip_max,
                                                        x.dtype)))

    # Initialize loop variables
    if self.rand_init:
      eta = tf.random_uniform(tf.shape(x),
                              tf.cast(-self.rand_minmax, x.dtype),
                              tf.cast(self.rand_minmax, x.dtype),
                              dtype=x.dtype)
    else:
      eta = tf.zeros(tf.shape(x))

    # Clip eta
    eta = clip_eta(eta, self.ord, self.eps)
    adv_x = x + eta
    if self.clip_min is not None or self.clip_max is not None:
      adv_x = utils_tf.clip_by_value(adv_x, self.clip_min, self.clip_max)

    if self.y_target is not None:
      y = self.y_target
      targeted = True
    elif self.y is not None:
      y = self.y
      targeted = False
    else:
      model_preds = self.model.get_probs(x)
      preds_max = reduce_max(model_preds, 1, keepdims=True)
      y = tf.to_float(tf.equal(model_preds, preds_max))
      y = tf.stop_gradient(y)
      targeted = False
      del model_preds

    y_kwarg = 'y_target' if targeted else 'y'
    fgm_params = {
        'eps': self.eps_iter,
        y_kwarg: y,
        'ord': self.ord,
        'clip_min': self.clip_min,
        'clip_max': self.clip_max
    }
    if self.ord == 1:
      raise NotImplementedError("It's not clear that FGM is a good inner loop"
                                " step for PGD when ord=1, because ord=1 FGM "
                                " changes only one pixel at a time. We need "
                                " to rigorously test a strong ord=1 PGD "
                                "before enabling this feature.")

    # Use getattr() to avoid errors in eager execution attacks
    FGM = self.FGM_CLASS(
        self.model,
        sess=getattr(self, 'sess', None),
        dtypestr=self.dtypestr)

    def cond(i, _):
      return tf.less(i, self.nb_iter)

    def body(i, adv_x):
      adv_x = FGM.generate(adv_x, **fgm_params)

      # Clipping perturbation eta to self.ord norm ball
      eta = adv_x - x
      eta = clip_eta(eta, self.ord, self.eps)
      adv_x = x + eta

      # Redo the clipping.
      # FGM already did it, but subtracting and re-adding eta can add some
      # small numerical error.
      if self.clip_min is not None or self.clip_max is not None:
        adv_x = utils_tf.clip_by_value(adv_x, self.clip_min, self.clip_max)

      return i + 1, adv_x

    _, adv_x = tf.while_loop(cond, body, (tf.zeros([]), adv_x), back_prop=True,
                             maximum_iterations=self.nb_iter)

    # Asserts run only on CPU.
    # When multi-GPU eval code tries to force all PGD ops onto GPU, this
    # can cause an error.
    common_dtype = tf.float64
    asserts.append(utils_tf.assert_less_equal(tf.cast(self.eps_iter,
                                                      dtype=common_dtype),
                                              tf.cast(self.eps, dtype=common_dtype)))
    if self.ord == np.inf and self.clip_min is not None:
      # The 1e-6 is needed to compensate for numerical error.
      # Without the 1e-6 this fails when e.g. eps=.2, clip_min=.5,
      # clip_max=.7
      asserts.append(utils_tf.assert_less_equal(tf.cast(self.eps, x.dtype),
                                                1e-6 + tf.cast(self.clip_max,
                                                               x.dtype)
                                                - tf.cast(self.clip_min,
                                                          x.dtype)))

    if self.sanity_checks:
      with tf.control_dependencies(asserts):
        adv_x = tf.identity(adv_x)

    return adv_x