def body(i, ax, m): """Do a momentum step""" logits = self.model.get_logits(ax) loss = softmax_cross_entropy_with_logits(labels=y, logits=logits) if targeted: loss = -loss # Define gradient of loss wrt input grad, = tf.gradients(ys=loss, xs=ax) # Normalize current gradient and add it to the accumulated gradient red_ind = list(range(1, len(grad.get_shape()))) avoid_zero_div = tf.cast(1e-12, grad.dtype) grad = grad / tf.maximum( avoid_zero_div, reduce_mean(tf.abs(grad), red_ind, keepdims=True)) m = self.decay_factor * m + grad optimal_perturbation = optimize_linear(m, self.eps_iter, self.ord) if self.ord == 1: raise NotImplementedError("This attack hasn't been tested for ord=1." "It's not clear that FGM makes a good inner " "loop step for iterative optimization since " "it updates just one coordinate at a time.") # Update and clip adversarial example in current iteration ax = ax + optimal_perturbation ax = x + utils_tf.clip_eta(ax - x, self.ord, self.eps) if self.clip_min is not None and self.clip_max is not None: ax = utils_tf.clip_by_value(ax, self.clip_min, self.clip_max) ax = tf.stop_gradient(ax) return i + 1, ax, m
def fgm_perturb(x, y, loss_fn, clip_min=None, clip_max=None, ord=np.inf, eps=0.3): loss = loss_fn(x) grad, = tf.gradients(loss, x) optimal_perturbation = optimize_linear(grad, eps, ord) adv_x = x + optimal_perturbation if (clip_min is not None) or (clip_max is not None): # We don't currently support one-sided clipping assert clip_min is not None and clip_max is not None adv_x = utils_tf.clip_by_value(adv_x, clip_min, clip_max) return adv_x
def generate(self, x, **kwargs): assert self.parse_params(**kwargs) asserts = [] if self.clip_min is not None: asserts.append(utils_tf.assert_greater_equal( x, tf.cast(self.clip_min,x.dtype))) if self.clip_max is not None: asserts.append(utils_tf.assert_less_equal( x, tf.cast(self.clip_max, x.dtype))) m_cache = tf.zeros_like(x) v_cache = tf.zeros_like(x) adv_x = x y, _nb_classes = self.get_or_guess_labels(x, kwargs) y = y / reduce_sum(y, 1, keepdims=True) targeted = (self.y_target is not None) def save_batch(directory, images, labels, iteration, batch_idx): for idx, (image, label) in enumerate(zip(images, labels)): filename = "id{}_b{}_it{}_l{}.png".format(idx, batch_idx, iteration, np.argmax(label)) save_image_np(join(directory, filename), image) for i in range(self.nb_iter): self.logger.debug("Starting #{} iteration".format(i + 1)) logits = self.model.get_logits(adv_x) loss = softmax_cross_entropy_with_logits(labels=y, logits=logits) if targeted: loss = -loss grad, = tf.gradients(loss, adv_x) red_ind = list(range(1, len(grad.get_shape()))) avoid_zero_div = tf.cast(1e-8, grad.dtype) grad = grad / tf.maximum( avoid_zero_div, reduce_mean(tf.abs(grad), red_ind, keepdims=True)) m_cache = self.betha1 * m_cache + (1 - self.betha1) * grad v_cache = self.betha2 * v_cache + (1 - self.betha2) * tf.square(grad) update = tf.divide(m_cache, tf.sqrt(v_cache + avoid_zero_div)) optimal_perturbation = optimize_linear(update, self.eps_iter, self.ord) if self.ord == 1: raise NotImplementedError("This attack hasn't been tested for ord=1." "It's not clear that FGM makes a good inner " "loop step for iterative optimization since " "it updates just one coordinate at a time.") adv_x = adv_x + optimal_perturbation adv_x = x + utils_tf.clip_eta(adv_x - x, self.ord, self.eps) if self.clip_min is not None and self.clip_max is not None: adv_x = utils_tf.clip_by_value(adv_x, self.clip_min, self.clip_max) adv_x = tf.stop_gradient(adv_x) if self.sanity_checks: with tf.control_dependencies(asserts): adv_x = tf.identity(adv_x) with self.sess.as_default(): self.sess.run(self.init_op) for batch in range(self.nb_batches): adv_x_np, y_np = self.sess.run([adv_x, y]) self.logger.debug("Saving attacked batch #{}".format(batch + 1)) save_batch(self.adv_dir, adv_x_np, y_np, i, batch)