def __init__(
        self,
        classifier: "ClassifierGradients",
        nb_epochs: int = 391,
        batch_size: int = 128,
        eps: float = 8.0,
        eps_step: float = 2.0,
        max_iter: int = 7,
        num_random_init: Union[bool, int] = True,
    ) -> None:
        """
        Create an :class:`.AdversarialTrainerMadryPGD` instance.

        Default values are for CIFAR-10 in pixel range 0-255.

        :param classifier: Classifier to train adversarially.
        :param nb_epochs: Number of training epochs.
        :param batch_size: Size of the batch on which adversarial samples are generated.
        :param eps: Maximum perturbation that the attacker can introduce.
        :param eps_step: Attack step size (input variation) at each iteration.
        :param max_iter: The maximum number of iterations.
        :param num_random_init: Number of random initialisations within the epsilon ball. For num_random_init=0
            starting at the original input.
        """
        super(AdversarialTrainerMadryPGD, self).__init__(classifier=classifier)  # type: ignore
        self.batch_size = batch_size
        self.nb_epochs = nb_epochs

        # Setting up adversary and perform adversarial training:
        self.attack = ProjectedGradientDescent(
            classifier, eps=eps, eps_step=eps_step, max_iter=max_iter, num_random_init=num_random_init,
        )

        self.trainer = AdversarialTrainer(classifier, self.attack, ratio=1.0)  # type: ignore
    def test_two_attacks(self):
        (x_train, y_train), (x_test, y_test) = self.mnist
        x_test_original = x_test.copy()

        attack1 = FastGradientMethod(estimator=self.classifier, batch_size=16)
        attack2 = DeepFool(classifier=self.classifier,
                           max_iter=5,
                           batch_size=16)
        x_test_adv = attack1.generate(x_test)
        predictions = np.argmax(self.classifier.predict(x_test_adv), axis=1)
        accuracy = np.sum(predictions == np.argmax(y_test, axis=1)) / NB_TEST

        adv_trainer = AdversarialTrainer(self.classifier,
                                         attacks=[attack1, attack2])
        adv_trainer.fit(x_train, y_train, nb_epochs=2, batch_size=16)

        predictions_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1)
        accuracy_new = np.sum(
            predictions_new == np.argmax(y_test, axis=1)) / NB_TEST

        self.assertEqual(accuracy_new, 0.36)
        self.assertEqual(accuracy, 0.13)

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))),
                               0.0,
                               delta=0.00001)
    def test_classifier_match(self):
        attack = FastGradientMethod(self.classifier)
        adv_trainer = AdversarialTrainer(self.classifier, attack)

        self.assertEqual(len(adv_trainer.attacks), 1)
        self.assertEqual(adv_trainer.attacks[0].estimator,
                         adv_trainer.get_classifier())
Exemplo n.º 4
0
class AdversarialTrainerMadryPGD(Trainer):
    """
    Class performing adversarial training following Madry's Protocol.
    Paper link: https://arxiv.org/abs/1706.06083
    Please keep in mind the limitations of defences. While adversarial training is
    widely regarded as a promising, principled approach to making classifiers more
    robust (see https://arxiv.org/abs/1802.00420), very careful evaluations are
    required to assess its effectiveness case by case (see https://arxiv.org/abs/1902.06705).
    """
    def __init__(self,
                 classifier,
                 eps=0.03,
                 eps_step=0.008,
                 max_iter=7,
                 ratio=1.0):
        self.attack = ProjectedGradientDescent(
            classifier,
            eps=eps,
            eps_step=eps_step,
            max_iter=max_iter,
        )

        self.trainer = AdversarialTrainer(classifier, self.attack, ratio=ratio)

    def fit(self, x, y, **kwargs):
        self.trainer.fit(x, y, **kwargs)

    def fit_generator(self, generator, nb_epochs, **kwargs):
        self.trainer.fit_generator(generator, nb_epochs=nb_epochs, **kwargs)

    def get_classifier(self):
        return self.trainer.get_classifier()
Exemplo n.º 5
0
    def test_excpetions(self):
        with self.assertRaises(ValueError):
            _ = AdversarialTrainer(self.classifier, "attack")

        with self.assertRaises(ValueError):
            attack = FastGradientMethod(self.classifier)
            _ = AdversarialTrainer(self.classifier, attack, ratio=1.5)
    def test_two_attacks_with_generator(self):
        (x_train, y_train), (x_test, y_test) = self.mnist
        x_train_original = x_train.copy()
        x_test_original = x_test.copy()

        class MyDataGenerator(DataGenerator):
            def __init__(self, x, y, size, batch_size):
                super().__init__(size=size, batch_size=batch_size)
                self.x = x
                self.y = y
                self._size = size
                self._batch_size = batch_size

            def get_batch(self):
                ids = np.random.choice(self.size,
                                       size=min(self.size, self.batch_size),
                                       replace=False)
                return self.x[ids], self.y[ids]

        generator = MyDataGenerator(x_train,
                                    y_train,
                                    size=x_train.shape[0],
                                    batch_size=16)

        attack1 = FastGradientMethod(estimator=self.classifier, batch_size=16)
        attack2 = DeepFool(classifier=self.classifier,
                           max_iter=5,
                           batch_size=16)
        x_test_adv = attack1.generate(x_test)
        predictions = np.argmax(self.classifier.predict(x_test_adv), axis=1)
        accuracy = np.sum(predictions == np.argmax(y_test, axis=1)) / NB_TEST

        adv_trainer = AdversarialTrainer(self.classifier,
                                         attacks=[attack1, attack2])
        adv_trainer.fit_generator(generator, nb_epochs=3)

        predictions_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1)
        accuracy_new = np.sum(
            predictions_new == np.argmax(y_test, axis=1)) / NB_TEST

        self.assertAlmostEqual(accuracy_new, 0.25, delta=0.02)
        self.assertAlmostEqual(accuracy, 0.11, delta=0.0)

        # Check that x_train and x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_train_original -
                                                   x_train))),
                               0.0,
                               delta=0.00001)
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))),
                               0.0,
                               delta=0.00001)
Exemplo n.º 7
0
    def __init__(self,
                 classifier,
                 eps=0.03,
                 eps_step=0.008,
                 max_iter=7,
                 ratio=1.0):
        self.attack = ProjectedGradientDescent(
            classifier,
            eps=eps,
            eps_step=eps_step,
            max_iter=max_iter,
        )

        self.trainer = AdversarialTrainer(classifier, self.attack, ratio=ratio)
    def __init__(self,
                 classifier,
                 nb_epochs=391,
                 batch_size=128,
                 eps=8,
                 eps_step=2,
                 max_iter=7,
                 num_random_init=True,
                 **kwargs):
        """
        Create an :class:`.AdversarialTrainerMadryPGD` instance.

        Default values are for CIFAR-10 in pixel range 0-255.

        :param classifier: Classifier to train adversarially.
        :type classifier: :class:`.Classifier`
        :param nb_epochs: Number of training epochs.
        :type nb_epochs: `int`
        :param batch_size: Size of the batch on which adversarial samples are generated.
        :type batch_size: `int`
        :param eps: Maximum perturbation that the attacker can introduce.
        :type eps: `float`
        :param eps_step: Attack step size (input variation) at each iteration.
        :type eps_step: `float`
        :param max_iter: The maximum number of iterations.
        :type max_iter: `int`
        :param num_random_init: Number of random initialisations within the epsilon ball. For num_random_init=0
            starting at the original input.
        :type num_random_init: `int`
        """

        self.batch_size = batch_size
        self.nb_epochs = nb_epochs

        # Setting up adversary and perform adversarial training:
        self.attack = ProjectedGradientDescent(classifier,
                                               eps=eps,
                                               eps_step=eps_step,
                                               max_iter=max_iter,
                                               num_random_init=num_random_init)

        self.trainer = AdversarialTrainer(classifier, self.attack, ratio=1.0)
Exemplo n.º 9
0
def pgd_adv_train(model, data, outpath, model_name):
    attack = ProjectedGradientDescent(model,
                                      eps=0.015,
                                      eps_step=0.001,
                                      max_iter=2,
                                      targeted=False,
                                      num_random_init=0,
                                      )

    adv_trainer = AdversarialTrainer(model,
                                     attacks=attack,
                                     ratio=1.0)
    print('>>> Processing adversarial training, it will take a while...')
    x_train, y_train = data
    adv_trainer.fit(x_train, y_train, nb_epochs=30, batch_size=32)

    savefile = os.path.join(outpath, model_name)
    print('>>>Save the model to [{}]'.format(savefile))
    adv_trainer.classifier.save(savefile)

    return adv_trainer.classifier
    def test_targeted_attack_error(self):
        """
        Test the adversarial trainer using a targeted attack, which will currently result in a NotImplementError.

        :return: None
        """
        (x_train, y_train), (_, _) = self.mnist
        params = {"nb_epochs": 2, "batch_size": BATCH_SIZE}

        adv = FastGradientMethod(self.classifier, targeted=True)
        adv_trainer = AdversarialTrainer(self.classifier, attacks=adv)
        self.assertRaises(NotImplementedError, adv_trainer.fit, x_train,
                          y_train, **params)
    def test_fit_predict(self):
        (x_train, y_train), (x_test, y_test) = self.mnist
        x_test_original = x_test.copy()

        attack = FastGradientMethod(self.classifier)
        x_test_adv = attack.generate(x_test)
        predictions = np.argmax(self.classifier.predict(x_test_adv), axis=1)
        accuracy = np.sum(predictions == np.argmax(y_test, axis=1)) / NB_TEST

        adv_trainer = AdversarialTrainer(self.classifier, attack)
        adv_trainer.fit(x_train, y_train, nb_epochs=5, batch_size=128)

        predictions_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1)
        accuracy_new = np.sum(
            predictions_new == np.argmax(y_test, axis=1)) / NB_TEST

        self.assertEqual(accuracy_new, 0.12)
        self.assertEqual(accuracy, 0.13)

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))),
                               0.0,
                               delta=0.00001)
class AdversarialTrainerMadryPGD(Trainer):
    """
    Class performing adversarial training following Madry's Protocol.

    | Paper link: https://arxiv.org/abs/1706.06083

    | Please keep in mind the limitations of defences. While adversarial training is widely regarded as a promising,
        principled approach to making classifiers more robust (see https://arxiv.org/abs/1802.00420), very careful
        evaluations are required to assess its effectiveness case by case (see https://arxiv.org/abs/1902.06705).
    """

    def __init__(
        self,
        classifier: "ClassifierGradients",
        nb_epochs: int = 391,
        batch_size: int = 128,
        eps: float = 8.0,
        eps_step: float = 2.0,
        max_iter: int = 7,
        num_random_init: Union[bool, int] = True,
    ) -> None:
        """
        Create an :class:`.AdversarialTrainerMadryPGD` instance.

        Default values are for CIFAR-10 in pixel range 0-255.

        :param classifier: Classifier to train adversarially.
        :param nb_epochs: Number of training epochs.
        :param batch_size: Size of the batch on which adversarial samples are generated.
        :param eps: Maximum perturbation that the attacker can introduce.
        :param eps_step: Attack step size (input variation) at each iteration.
        :param max_iter: The maximum number of iterations.
        :param num_random_init: Number of random initialisations within the epsilon ball. For num_random_init=0
            starting at the original input.
        """
        super(AdversarialTrainerMadryPGD, self).__init__(classifier=classifier)  # type: ignore
        self.batch_size = batch_size
        self.nb_epochs = nb_epochs

        # Setting up adversary and perform adversarial training:
        self.attack = ProjectedGradientDescent(
            classifier, eps=eps, eps_step=eps_step, max_iter=max_iter, num_random_init=num_random_init,
        )

        self.trainer = AdversarialTrainer(classifier, self.attack, ratio=1.0)  # type: ignore

    def fit(self, x: np.ndarray, y: np.ndarray, validation_data: Optional[np.ndarray] = None, **kwargs) -> None:
        """
        Train a model adversarially. See class documentation for more information on the exact procedure.

        :param x: Training data.
        :param y: Labels for the training data.
        :param validation_data: Validation data.
        :param kwargs: Dictionary of framework-specific arguments.
        """
        self.trainer.fit(
            x, y, validation_data=validation_data, nb_epochs=self.nb_epochs, batch_size=self.batch_size, **kwargs
        )

    def get_classifier(self) -> "Classifier":
        return self.trainer.get_classifier()
Exemplo n.º 13
0
    def test_fit_predict_different_classifiers(self):
        (x_train, y_train), (x_test, y_test) = self.mnist
        x_test_original = x_test.copy()

        attack = FastGradientMethod(self.classifier)
        x_test_adv = attack.generate(x_test)
        predictions = np.argmax(self.classifier.predict(x_test_adv), axis=1)
        accuracy = np.sum(predictions == np.argmax(y_test, axis=1)) / NB_TEST

        adv_trainer = AdversarialTrainer(self.classifier_2, attack)
        adv_trainer.fit(x_train, y_train, nb_epochs=5, batch_size=128)

        predictions_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1)
        accuracy_new = np.sum(
            predictions_new == np.argmax(y_test, axis=1)) / NB_TEST

        self.assertEqual(accuracy_new, 0.32)
        self.assertEqual(accuracy, 0.13)

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))),
                               0.0,
                               delta=0.00001)

        # fit_generator
        class MyDataGenerator(DataGenerator):
            def __init__(self, x, y, size, batch_size):
                super().__init__(size=size, batch_size=batch_size)
                self.x = x
                self.y = y
                self._size = size
                self._batch_size = batch_size

            def get_batch(self):
                ids = np.random.choice(self.size,
                                       size=min(self.size, self.batch_size),
                                       replace=False)
                return self.x[ids], self.y[ids]

        generator = MyDataGenerator(x_train,
                                    y_train,
                                    size=x_train.shape[0],
                                    batch_size=16)
        adv_trainer.fit_generator(generator, nb_epochs=5)
        adv_trainer_2 = AdversarialTrainer(self.classifier_2,
                                           attack,
                                           ratio=1.0)
        adv_trainer_2.fit_generator(generator, nb_epochs=5)