class AdversarialTrainerMadryPGD(Trainer):
    """
    Class performing adversarial training following Madry's Protocol.
    Paper link: https://arxiv.org/abs/1706.06083
    Please keep in mind the limitations of defences. While adversarial training is
    widely regarded as a promising, principled approach to making classifiers more
    robust (see https://arxiv.org/abs/1802.00420), very careful evaluations are
    required to assess its effectiveness case by case (see https://arxiv.org/abs/1902.06705).
    """
    def __init__(self,
                 classifier,
                 eps=0.03,
                 eps_step=0.008,
                 max_iter=7,
                 ratio=1.0):
        self.attack = ProjectedGradientDescent(
            classifier,
            eps=eps,
            eps_step=eps_step,
            max_iter=max_iter,
        )

        self.trainer = AdversarialTrainer(classifier, self.attack, ratio=ratio)

    def fit(self, x, y, **kwargs):
        self.trainer.fit(x, y, **kwargs)

    def fit_generator(self, generator, nb_epochs, **kwargs):
        self.trainer.fit_generator(generator, nb_epochs=nb_epochs, **kwargs)

    def get_classifier(self):
        return self.trainer.get_classifier()
    def test_two_attacks_with_generator(self):
        (x_train, y_train), (x_test, y_test) = self.mnist
        x_train_original = x_train.copy()
        x_test_original = x_test.copy()

        class MyDataGenerator(DataGenerator):
            def __init__(self, x, y, size, batch_size):
                super().__init__(size=size, batch_size=batch_size)
                self.x = x
                self.y = y
                self._size = size
                self._batch_size = batch_size

            def get_batch(self):
                ids = np.random.choice(self.size,
                                       size=min(self.size, self.batch_size),
                                       replace=False)
                return self.x[ids], self.y[ids]

        generator = MyDataGenerator(x_train,
                                    y_train,
                                    size=x_train.shape[0],
                                    batch_size=16)

        attack1 = FastGradientMethod(estimator=self.classifier, batch_size=16)
        attack2 = DeepFool(classifier=self.classifier,
                           max_iter=5,
                           batch_size=16)
        x_test_adv = attack1.generate(x_test)
        predictions = np.argmax(self.classifier.predict(x_test_adv), axis=1)
        accuracy = np.sum(predictions == np.argmax(y_test, axis=1)) / NB_TEST

        adv_trainer = AdversarialTrainer(self.classifier,
                                         attacks=[attack1, attack2])
        adv_trainer.fit_generator(generator, nb_epochs=3)

        predictions_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1)
        accuracy_new = np.sum(
            predictions_new == np.argmax(y_test, axis=1)) / NB_TEST

        self.assertAlmostEqual(accuracy_new, 0.25, delta=0.02)
        self.assertAlmostEqual(accuracy, 0.11, delta=0.0)

        # Check that x_train and x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_train_original -
                                                   x_train))),
                               0.0,
                               delta=0.00001)
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))),
                               0.0,
                               delta=0.00001)
示例#3
0
    def test_fit_predict_different_classifiers(self):
        (x_train, y_train), (x_test, y_test) = self.mnist
        x_test_original = x_test.copy()

        attack = FastGradientMethod(self.classifier)
        x_test_adv = attack.generate(x_test)
        predictions = np.argmax(self.classifier.predict(x_test_adv), axis=1)
        accuracy = np.sum(predictions == np.argmax(y_test, axis=1)) / NB_TEST

        adv_trainer = AdversarialTrainer(self.classifier_2, attack)
        adv_trainer.fit(x_train, y_train, nb_epochs=5, batch_size=128)

        predictions_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1)
        accuracy_new = np.sum(
            predictions_new == np.argmax(y_test, axis=1)) / NB_TEST

        self.assertEqual(accuracy_new, 0.32)
        self.assertEqual(accuracy, 0.13)

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))),
                               0.0,
                               delta=0.00001)

        # fit_generator
        class MyDataGenerator(DataGenerator):
            def __init__(self, x, y, size, batch_size):
                super().__init__(size=size, batch_size=batch_size)
                self.x = x
                self.y = y
                self._size = size
                self._batch_size = batch_size

            def get_batch(self):
                ids = np.random.choice(self.size,
                                       size=min(self.size, self.batch_size),
                                       replace=False)
                return self.x[ids], self.y[ids]

        generator = MyDataGenerator(x_train,
                                    y_train,
                                    size=x_train.shape[0],
                                    batch_size=16)
        adv_trainer.fit_generator(generator, nb_epochs=5)
        adv_trainer_2 = AdversarialTrainer(self.classifier_2,
                                           attack,
                                           ratio=1.0)
        adv_trainer_2.fit_generator(generator, nb_epochs=5)