def test_two_attacks(self): (x_train, y_train), (x_test, y_test) = self.mnist x_test_original = x_test.copy() attack1 = FastGradientMethod(estimator=self.classifier, batch_size=16) attack2 = DeepFool(classifier=self.classifier, max_iter=5, batch_size=16) x_test_adv = attack1.generate(x_test) predictions = np.argmax(self.classifier.predict(x_test_adv), axis=1) accuracy = np.sum(predictions == np.argmax(y_test, axis=1)) / NB_TEST adv_trainer = AdversarialTrainer(self.classifier, attacks=[attack1, attack2]) adv_trainer.fit(x_train, y_train, nb_epochs=2, batch_size=16) predictions_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1) accuracy_new = np.sum( predictions_new == np.argmax(y_test, axis=1)) / NB_TEST self.assertEqual(accuracy_new, 0.36) self.assertEqual(accuracy, 0.13) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
class AdversarialTrainerMadryPGD(Trainer): """ Class performing adversarial training following Madry's Protocol. Paper link: https://arxiv.org/abs/1706.06083 Please keep in mind the limitations of defences. While adversarial training is widely regarded as a promising, principled approach to making classifiers more robust (see https://arxiv.org/abs/1802.00420), very careful evaluations are required to assess its effectiveness case by case (see https://arxiv.org/abs/1902.06705). """ def __init__(self, classifier, eps=0.03, eps_step=0.008, max_iter=7, ratio=1.0): self.attack = ProjectedGradientDescent( classifier, eps=eps, eps_step=eps_step, max_iter=max_iter, ) self.trainer = AdversarialTrainer(classifier, self.attack, ratio=ratio) def fit(self, x, y, **kwargs): self.trainer.fit(x, y, **kwargs) def fit_generator(self, generator, nb_epochs, **kwargs): self.trainer.fit_generator(generator, nb_epochs=nb_epochs, **kwargs) def get_classifier(self): return self.trainer.get_classifier()
def test_fit_predict_different_classifiers(self): (x_train, y_train), (x_test, y_test) = self.mnist x_test_original = x_test.copy() attack = FastGradientMethod(self.classifier) x_test_adv = attack.generate(x_test) predictions = np.argmax(self.classifier.predict(x_test_adv), axis=1) accuracy = np.sum(predictions == np.argmax(y_test, axis=1)) / NB_TEST adv_trainer = AdversarialTrainer(self.classifier_2, attack) adv_trainer.fit(x_train, y_train, nb_epochs=5, batch_size=128) predictions_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1) accuracy_new = np.sum( predictions_new == np.argmax(y_test, axis=1)) / NB_TEST self.assertEqual(accuracy_new, 0.32) self.assertEqual(accuracy, 0.13) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001) # fit_generator class MyDataGenerator(DataGenerator): def __init__(self, x, y, size, batch_size): super().__init__(size=size, batch_size=batch_size) self.x = x self.y = y self._size = size self._batch_size = batch_size def get_batch(self): ids = np.random.choice(self.size, size=min(self.size, self.batch_size), replace=False) return self.x[ids], self.y[ids] generator = MyDataGenerator(x_train, y_train, size=x_train.shape[0], batch_size=16) adv_trainer.fit_generator(generator, nb_epochs=5) adv_trainer_2 = AdversarialTrainer(self.classifier_2, attack, ratio=1.0) adv_trainer_2.fit_generator(generator, nb_epochs=5)
def pgd_adv_train(model, data, outpath, model_name): attack = ProjectedGradientDescent(model, eps=0.015, eps_step=0.001, max_iter=2, targeted=False, num_random_init=0, ) adv_trainer = AdversarialTrainer(model, attacks=attack, ratio=1.0) print('>>> Processing adversarial training, it will take a while...') x_train, y_train = data adv_trainer.fit(x_train, y_train, nb_epochs=30, batch_size=32) savefile = os.path.join(outpath, model_name) print('>>>Save the model to [{}]'.format(savefile)) adv_trainer.classifier.save(savefile) return adv_trainer.classifier
def test_fit_predict(self): (x_train, y_train), (x_test, y_test) = self.mnist x_test_original = x_test.copy() attack = FastGradientMethod(self.classifier) x_test_adv = attack.generate(x_test) predictions = np.argmax(self.classifier.predict(x_test_adv), axis=1) accuracy = np.sum(predictions == np.argmax(y_test, axis=1)) / NB_TEST adv_trainer = AdversarialTrainer(self.classifier, attack) adv_trainer.fit(x_train, y_train, nb_epochs=5, batch_size=128) predictions_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1) accuracy_new = np.sum( predictions_new == np.argmax(y_test, axis=1)) / NB_TEST self.assertEqual(accuracy_new, 0.12) self.assertEqual(accuracy, 0.13) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
class AdversarialTrainerMadryPGD(Trainer): """ Class performing adversarial training following Madry's Protocol. | Paper link: https://arxiv.org/abs/1706.06083 | Please keep in mind the limitations of defences. While adversarial training is widely regarded as a promising, principled approach to making classifiers more robust (see https://arxiv.org/abs/1802.00420), very careful evaluations are required to assess its effectiveness case by case (see https://arxiv.org/abs/1902.06705). """ def __init__( self, classifier: "ClassifierGradients", nb_epochs: int = 391, batch_size: int = 128, eps: float = 8.0, eps_step: float = 2.0, max_iter: int = 7, num_random_init: Union[bool, int] = True, ) -> None: """ Create an :class:`.AdversarialTrainerMadryPGD` instance. Default values are for CIFAR-10 in pixel range 0-255. :param classifier: Classifier to train adversarially. :param nb_epochs: Number of training epochs. :param batch_size: Size of the batch on which adversarial samples are generated. :param eps: Maximum perturbation that the attacker can introduce. :param eps_step: Attack step size (input variation) at each iteration. :param max_iter: The maximum number of iterations. :param num_random_init: Number of random initialisations within the epsilon ball. For num_random_init=0 starting at the original input. """ super(AdversarialTrainerMadryPGD, self).__init__(classifier=classifier) # type: ignore self.batch_size = batch_size self.nb_epochs = nb_epochs # Setting up adversary and perform adversarial training: self.attack = ProjectedGradientDescent( classifier, eps=eps, eps_step=eps_step, max_iter=max_iter, num_random_init=num_random_init, ) self.trainer = AdversarialTrainer(classifier, self.attack, ratio=1.0) # type: ignore def fit(self, x: np.ndarray, y: np.ndarray, validation_data: Optional[np.ndarray] = None, **kwargs) -> None: """ Train a model adversarially. See class documentation for more information on the exact procedure. :param x: Training data. :param y: Labels for the training data. :param validation_data: Validation data. :param kwargs: Dictionary of framework-specific arguments. """ self.trainer.fit( x, y, validation_data=validation_data, nb_epochs=self.nb_epochs, batch_size=self.batch_size, **kwargs ) def get_classifier(self) -> "Classifier": return self.trainer.get_classifier()