def fgm(self, x, labels, targeted=False): """ TensorFlow Eager implementation of the Fast Gradient Method. :param x: the input variable :param targeted: Is the attack targeted or untargeted? Untargeted, the default, will try to make the label incorrect. Targeted will instead try to move in the direction of being more like y. :return: a tensor for the adversarial example """ # Compute loss with tf.GradientTape() as tape: # input should be watched because it may be # combination of trainable and non-trainable variables tape.watch(x) loss_obj = LossCrossEntropy(self.model, smoothing=0.0) loss = loss_obj.fprop(x=x, y=labels) if targeted: loss = -loss # Define gradient of loss wrt input grad = tape.gradient(loss, x) optimal_perturbation = attacks.optimize_linear(grad, self.eps, self.ord) # Add perturbation to original example to obtain adversarial example adv_x = x + optimal_perturbation # If clipping is needed # reset all values outside of [clip_min, clip_max] if (self.clip_min is not None) and (self.clip_max is not None): adv_x = tf.clip_by_value(adv_x, self.clip_min, self.clip_max) return adv_x
def test_xe_smoothing(self): loss = LossCrossEntropy(self.model, smoothing=0.1) l = loss.fprop(self.x, self.y) with tf.Session() as sess: vl1 = sess.run(l, feed_dict={self.x: self.vx, self.y: self.vy}) vl2 = sess.run(l, feed_dict={self.x: self.vx, self.y: self.vy}) self.assertClose(vl1, [2.10587597, 1.47194624], atol=1e-6) self.assertClose(vl2, [2.10587597, 1.47194624], atol=1e-6)
def test_xe(self): loss = LossCrossEntropy(self.model, smoothing=0.) l = loss.fprop(self.x, self.y) with tf.Session() as sess: vl1 = sess.run(l, feed_dict={self.x: self.vx, self.y: self.vy}) vl2 = sess.run(l, feed_dict={self.x: self.vx, self.y: self.vy}) self.assertClose(vl1, [2.210599660, 1.53666997], atol=1e-6) self.assertClose(vl2, [2.210599660, 1.53666997], atol=1e-6)
def fgm(self, x, labels, targeted=False): """ TensorFlow Eager implementation of the Fast Gradient Method. :param x: the input variable :param targeted: Is the attack targeted or untargeted? Untargeted, the default, will try to make the label incorrect. Targeted will instead try to move in the direction of being more like y. :return: a tensor for the adversarial example """ # Compute loss with tf.GradientTape() as tape: loss_obj = LossCrossEntropy(self.model, smoothing=0.) loss = loss_obj.fprop(x=x, y=labels) if targeted: loss = -loss # Define gradient of loss wrt input grad = tape.gradient(loss, x) if self.ord == np.inf: # Take sign of gradient normalized_grad = tf.sign(grad) # The following line should not change the numerical results. # It applies only because `normalized_grad` is the output of # a `sign` op, which has zero derivative anyway. # It should not be applied for the other norms, where the # perturbation has a non-zero derivative. normalized_grad = tf.stop_gradient(normalized_grad) elif self.ord == 1: red_ind = list(xrange(1, len(x.get_shape()))) normalized_grad = grad / tf.reduce_sum( tf.abs(grad), reduction_indices=red_ind, keep_dims=True) elif self.ord == 2: red_ind = list(xrange(1, len(x.get_shape()))) square = tf.reduce_sum(tf.square(grad), reduction_indices=red_ind, keep_dims=True) normalized_grad = grad / tf.sqrt(square) else: raise NotImplementedError("Only L-inf, L1 and L2 norms are " "currently implemented.") # Multiply by constant epsilon scaled_grad = self.eps * normalized_grad # Add perturbation to original example to obtain adversarial example adv_x = x + scaled_grad # If clipping is needed # reset all values outside of [clip_min, clip_max] if (self.clip_min is not None) and (self.clip_max is not None): adv_x = tf.clip_by_value(adv_x, self.clip_min, self.clip_max) return adv_x
def train(model, X_train=None, Y_train=None, save=False, predictions_adv=None, evaluate=None, args=None, rng=None, var_list=None, attack=None, attack_args=None): """ Train a TF Eager model :param model: instance of cleverhans model, takes in input batch, gives out probs(softmax layer). :param X_train: numpy array with training inputs :param Y_train: numpy array with training outputs :param save: boolean controlling the save operation :param predictions_adv: if set with the adversarial example tensor, will run adversarial training :param evaluate: function that is run after each training iteration (typically to display the test/validation accuracy). :param args: dict or argparse `Namespace` object. Should contain `nb_epochs`, `learning_rate`, `batch_size` If save is True, should also contain 'train_dir' and 'filename' :param rng: Instance of numpy.random.RandomState :param var_list: List of variables to train. :param attack: Instance of the class cleverhans.attacks.attacks_eager :param attack_args: Parameters required for the attack. :return: True if model trained """ args = _ArgsWrapper(args or {}) if ((attack is None) != (attack_args is None)): raise ValueError("attack and attack_args must be " "passed together.") if X_train is None or Y_train is None: raise ValueError("X_train argument and Y_train argument " "must be supplied.") # Check that necessary arguments were given (see doc above) assert args.nb_epochs, "Number of epochs was not given in args dict" assert args.learning_rate, "Learning rate was not given in args dict" assert args.batch_size, "Batch size was not given in args dict" if save: assert args.train_dir, "Directory for save was not given in args dict" assert args.filename, "Filename for save was not given in args dict" if rng is None: rng = np.random.RandomState() # Optimizer tfe = tf.contrib.eager optimizer = tf.train.AdamOptimizer(learning_rate=args.learning_rate) batch_x = tfe.Variable(X_train[0:args.batch_size], dtype=tf.float32) batch_y = tfe.Variable(Y_train[0:args.batch_size], dtype=tf.float32) # One epoch of training. for epoch in xrange(args.nb_epochs): # Compute number of batches nb_batches = int(math.ceil(float(len(X_train)) / args.batch_size)) assert nb_batches * args.batch_size >= len(X_train) # Indices to shuffle training set index_shuf = list(range(len(X_train))) rng.shuffle(index_shuf) prev = time.time() for batch in range(nb_batches): # Compute batch start and end indices start, end = batch_indices(batch, len(X_train), args.batch_size) # Perform one training step tf.assign(batch_x, X_train[index_shuf[start:end]]) tf.assign(batch_y, Y_train[index_shuf[start:end]]) # Compute grads with tf.GradientTape() as tape: # Define loss loss_clean_obj = LossCrossEntropy(model, smoothing=0.) loss_clean = loss_clean_obj.fprop(x=batch_x, y=batch_y) loss = loss_clean # Adversarial training if attack is not None: batch_adv_x = attack.generate(batch_x, **attack_args) loss_adv_obj = LossCrossEntropy(model, smoothing=0.) loss_adv = loss_adv_obj.fprop(x=batch_adv_x, y=batch_y) loss = (loss_clean + loss_adv) / 2.0 # Apply grads model_variables = model.get_params() grads = tape.gradient(loss, model_variables) optimizer.apply_gradients(zip(grads, model_variables)) assert end >= len(X_train) # Check that all examples were used cur = time.time() _logger.info("Epoch " + str(epoch) + " took " + str(cur - prev) + " seconds") if evaluate is not None: evaluate() if save: save_path = os.path.join(args.train_dir, args.filename) saver = tf.train.Saver() saver.save(save_path, model_variables) _logger.info("Completed model training and saved at: " + str(save_path)) else: _logger.info("Completed model training.") return True