def clip_eta(eta, ord, eps): """ Helper function to clip the perturbation to epsilon norm ball. :param eta: A tensor with the current perturbation. :param ord: Order of the norm (mimics Numpy). Possible values: np.inf, 1 or 2. :param eps: Epilson, bound of the perturbation. """ # Clipping perturbation eta to self.ord norm ball if ord not in [np.inf, 1, 2]: raise ValueError('ord must be np.inf, 1, or 2.') reduc_ind = list(xrange(1, len(eta.get_shape()))) avoid_zero_div = 1e-12 if ord == np.inf: eta = tf.clip_by_value(eta, -eps, eps) else: if ord == 1: norm = tf.maximum(avoid_zero_div, reduce_sum(tf.abs(eta), reduc_ind, keepdims=True)) elif ord == 2: # avoid_zero_div must go inside sqrt to avoid a divide by zero # in the gradient through this operation norm = tf.sqrt(tf.maximum(avoid_zero_div, reduce_sum(tf.square(eta), reduc_ind, keepdims=True))) # We must *clip* to within the norm ball, not *normalize* onto the # surface of the ball factor = tf.minimum(1., eps / norm) eta = eta * factor return eta
def kl_with_logits(p_logits, q_logits, scope=None, loss_collection=tf.GraphKeys.REGULARIZATION_LOSSES): """Helper function to compute kl-divergence KL(p || q) """ with tf.name_scope(scope, "kl_divergence") as name: p = tf.nn.softmax(p_logits) p_log = tf.nn.log_softmax(p_logits) q_log = tf.nn.log_softmax(q_logits) loss = reduce_mean(reduce_sum(p * (p_log - q_log), axis=1), name=name) tf.losses.add_loss(loss, loss_collection) return loss
def l2_batch_normalize(x, epsilon=1e-12, scope=None): """ Helper function to normalize a batch of vectors. :param x: the input placeholder :param epsilon: stabilizes division :return: the batch of l2 normalized vector """ with tf.name_scope(scope, "l2_batch_normalize") as scope: x_shape = tf.shape(x) x = tf.contrib.layers.flatten(x) x /= (epsilon + reduce_max(tf.abs(x), 1, keepdims=True)) square_sum = reduce_sum(tf.square(x), 1, keepdims=True) x_inv_norm = tf.rsqrt(np.sqrt(epsilon) + square_sum) x_norm = tf.multiply(x, x_inv_norm) return tf.reshape(x_norm, x_shape, scope)
def fgm(self, x, labels, targeted=False): """ TensorFlow Eager implementation of the Fast Gradient Method. :param x: the input variable :param targeted: Is the attack targeted or untargeted? Untargeted, the default, will try to make the label incorrect. Targeted will instead try to move in the direction of being more like y. :return: a tensor for the adversarial example """ # Compute loss with tf.GradientTape() as tape: # input should be watched because it may be # combination of trainable and non-trainable variables tape.watch(x) loss_obj = LossCrossEntropy(self.model, smoothing=0.) loss = loss_obj.fprop(x=x, y=labels) if targeted: loss = -loss # Define gradient of loss wrt input grad = tape.gradient(loss, x) if self.ord == np.inf: # Take sign of gradient normalized_grad = tf.sign(grad) # The following line should not change the numerical results. # It applies only because `normalized_grad` is the output of # a `sign` op, which has zero derivative anyway. # It should not be applied for the other norms, where the # perturbation has a non-zero derivative. normalized_grad = tf.stop_gradient(normalized_grad) elif self.ord == 1: red_ind = list(xrange(1, len(x.get_shape()))) avoid_zero_div = 1e-12 avoid_nan_norm = tf.maximum( avoid_zero_div, reduce_sum(tf.abs(grad), reduction_indices=red_ind, keepdims=True)) normalized_grad = grad / avoid_nan_norm elif self.ord == 2: red_ind = list(xrange(1, len(x.get_shape()))) avoid_zero_div = 1e-12 square = tf.maximum( avoid_zero_div, reduce_sum(tf.square(grad), reduction_indices=red_ind, keepdims=True)) normalized_grad = grad / tf.sqrt(square) else: raise NotImplementedError("Only L-inf, L1 and L2 norms are " "currently implemented.") # Multiply by constant epsilon scaled_grad = self.eps * normalized_grad # Add perturbation to original example to obtain adversarial example adv_x = x + scaled_grad # If clipping is needed # reset all values outside of [clip_min, clip_max] if (self.clip_min is not None) and (self.clip_max is not None): adv_x = tf.clip_by_value(adv_x, self.clip_min, self.clip_max) return adv_x