Exemplo n.º 1
0
    def test_one_attack_mnist(self):
        """
        Test the adversarial trainer using one FGSM attacker. The source and target models of the attack
        are two CNNs on MNIST (TensorFlow and Keras backends). The test cast check if accuracy on adversarial samples
        increases after adversarially training the model.

        :return: None
        """
        (x_train, y_train), (x_test, y_test) = self.mnist

        # Get source and target classifiers
        classifier_src = self.classifier_k
        classifier_tgt = self.classifier_tf

        # Create FGSM attacker
        adv = FastGradientMethod(classifier_src)
        x_adv = adv.generate(x_test)
        preds = classifier_tgt.predict(x_adv)
        acc = np.sum(np.argmax(preds, axis=1) == np.argmax(
            y_test, axis=1)) / x_adv.shape[0]

        # Perform adversarial training
        adv_trainer = StaticAdversarialTrainer(classifier_tgt, adv)
        adv_trainer.fit(x_train, y_train, nb_epochs=1)

        # Evaluate that accuracy on adversarial sample has improved
        preds_adv_trained = adv_trainer.classifier.predict(x_adv)
        acc_adv_trained = np.sum(
            np.argmax(preds_adv_trained, axis=1) == np.argmax(
                y_test, axis=1)) / x_adv.shape[0]
        print('\nAccuracy before adversarial training: %.2f%%' % (acc * 100))
        print('\nAccuracy after adversarial training: %.2f%%' %
              (acc_adv_trained * 100))
    def __init__(self,
                 model,
                 targeted=False,
                 step_size_iter=0.3,
                 max_perturbation=0.1,
                 norm_order=np.inf,
                 num_random_init=0,
                 minimal=False,
                 batch_size=16):
        super().__init__(model=model)
        self._targeted = targeted
        self._step_size_iter = step_size_iter
        self._max_perturbation = max_perturbation
        self._norm_order = norm_order
        self._num_random_init = num_random_init
        self._minimal = minimal

        self._method = FastGradientMethod(
            classifier=self.model,
            norm=self._norm_order,
            eps=self._max_perturbation,
            eps_step=self._step_size_iter,
            targeted=self._targeted,
            num_random_init=self._num_random_init,
            batch_size=batch_size,
            minimal=self._minimal)
Exemplo n.º 3
0
    def test_shared_model_mnist(self):
        """
        Test the adversarial trainer using one FGSM attacker. The source and target models of the attack are the same
        CNN on MNIST trained for 2 epochs. The test cast check if accuracy on adversarial samples increases
        after adversarially training the model.

        :return: None
        """
        (x_train, y_train), (x_test, y_test) = self.mnist

        # Create and fit classifier
        params = {'nb_epochs': 2, 'batch_size': BATCH_SIZE}
        classifier = self.classifier_k

        # Create FGSM attacker
        adv = FastGradientMethod(classifier)
        x_adv = adv.generate(x_test)
        preds = classifier.predict(x_adv)
        acc = np.sum(np.argmax(preds, axis=1) == np.argmax(
            y_test, axis=1)) / y_test.shape[0]

        # Perform adversarial training
        adv_trainer = StaticAdversarialTrainer(classifier, adv)
        adv_trainer.fit(x_train, y_train, **params)

        # Evaluate that accuracy on adversarial sample has improved
        preds_adv_trained = adv_trainer.classifier.predict(x_adv)
        acc_adv_trained = np.sum(
            np.argmax(preds_adv_trained, axis=1) == np.argmax(
                y_test, axis=1)) / y_test.shape[0]
        print('\nAccuracy before adversarial training: %.2f%%' % (acc * 100))
        print('\nAccuracy after adversarial training: %.2f%%' %
              (acc_adv_trained * 100))
Exemplo n.º 4
0
    def test_two_attacks_with_generator(self):
        (x_train, y_train), (x_test, y_test) = self.mnist
        x_train_original = x_train.copy()

        class MyDataGenerator(DataGenerator):
            def __init__(self, x, y, size, batch_size):
                self.x = x
                self.y = y
                self.size = size
                self.batch_size = batch_size

            def get_batch(self):
                ids = np.random.choice(self.size, size=min(self.size, self.batch_size), replace=False)
                return self.x[ids], self.y[ids]
        generator = MyDataGenerator(x_train, y_train, x_train.shape[0], 128)

        attack1 = FastGradientMethod(self.classifier_k)
        attack2 = DeepFool(self.classifier_tf)
        x_test_adv = attack1.generate(x_test)
        preds = np.argmax(self.classifier_k.predict(x_test_adv), axis=1)
        acc = np.sum(preds == np.argmax(y_test, axis=1)) / NB_TEST

        adv_trainer = AdversarialTrainer(self.classifier_k, attacks=[attack1, attack2])
        adv_trainer.fit_generator(generator, nb_epochs=5)

        preds_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1)
        acc_new = np.sum(preds_new == np.argmax(y_test, axis=1)) / NB_TEST
        # No reason to assert the newer accuracy is higher. It might go down slightly
        self.assertGreaterEqual(acc_new, acc * ACCURACY_DROP)

        logger.info('Accuracy before adversarial training: %.2f%%', (acc * 100))
        logger.info('\nAccuracy after adversarial training: %.2f%%', (acc_new * 100))

        # Finally assert that the original training data hasn't changed:
        self.assertTrue((x_train == x_train_original).all())
Exemplo n.º 5
0
    def test_iris_unbounded(self):
        (_, _), (x_test, y_test) = self.iris
        classifier = get_iris_classifier_kr()

        def t(x):
            return x

        def transformation():
            while True:
                yield t

        # Recreate a classifier without clip values
        classifier = KerasClassifier(model=classifier._model,
                                     use_logits=False,
                                     channel_index=1)
        classifier = ExpectationOverTransformations(
            classifier, sample_size=1, transformation=transformation)
        attack = FastGradientMethod(classifier, eps=1)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv > 1).any())
        self.assertTrue((x_test_adv < 0).any())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on Iris with limited query info: %.2f%%',
                    (acc * 100))
    def test_iris_unbounded(self):
        (_, _), (x_test, y_test) = self.iris
        classifier = get_iris_classifier_kr()

        # Recreate a classifier without clip values
        krc = KerasClassifier(model=classifier._model,
                              use_logits=False,
                              channel_index=1)
        rs = RandomizedSmoothing(classifier=krc,
                                 sample_size=100,
                                 scale=0.01,
                                 alpha=0.001)
        attack = FastGradientMethod(rs, eps=1)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv > 1).any())
        self.assertTrue((x_test_adv < 0).any())

        preds_smooth = np.argmax(rs.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_smooth).all())

        pred = rs.predict(x_test)
        pred2 = rs.predict(x_test_adv)
        acc, cov = compute_accuracy(pred, y_test)
        acc2, cov2 = compute_accuracy(pred2, y_test)
        logger.info(
            'Accuracy on Iris with smoothing on adversarial examples: %.2f%%',
            (acc * 100))
        logger.info(
            'Coverage on Iris with smoothing on adversarial examples: %.2f%%',
            (cov * 100))
        logger.info('Accuracy on Iris with smoothing: %.2f%%', (acc2 * 100))
        logger.info('Coverage on Iris with smoothing: %.2f%%', (cov2 * 100))
Exemplo n.º 7
0
    def test_multi_attack_mnist(self):
        """
        Test the adversarial trainer using two attackers: FGSM and DeepFool. The source and target models of the attack
        are two CNNs on MNIST trained for 2 epochs. FGSM and DeepFool both generate the attack images on the same
        source classifier. The test cast check if accuracy on adversarial samples increases
        after adversarially training the model.

        :return: None
        """
        (x_train, y_train), (x_test, y_test) = self.mnist

        # Get source and target classifiers
        classifier_tgt = self.classifier_k
        classifier_src = self.classifier_tf

        # Create FGSM and DeepFool attackers
        adv1 = FastGradientMethod(classifier_src)
        adv2 = DeepFool(classifier_src)
        x_adv = np.vstack((adv1.generate(x_test), adv2.generate(x_test)))
        y_adv = np.vstack((y_test, y_test))
        preds = classifier_tgt.predict(x_adv)
        acc = np.sum(np.argmax(preds, axis=1) == np.argmax(y_adv, axis=1)) / y_adv.shape[0]

        # Perform adversarial training
        adv_trainer = StaticAdversarialTrainer(classifier_tgt, [adv1, adv2])
        params = {'nb_epochs': 2, 'batch_size': BATCH_SIZE}
        adv_trainer.fit(x_train, y_train, **params)

        # Evaluate that accuracy on adversarial sample has improved
        preds_adv_trained = adv_trainer.classifier.predict(x_adv)
        acc_adv_trained = np.sum(np.argmax(preds_adv_trained, axis=1) == np.argmax(y_adv, axis=1)) / y_adv.shape[0]
        logger.info('Accuracy before adversarial training: %.2f%%', (acc * 100))
        logger.info('Accuracy after adversarial training: %.2f%%', (acc_adv_trained * 100))
Exemplo n.º 8
0
    def test_two_attacks(self):
        (x_train, y_train), (x_test, y_test) = self.mnist
        x_test_original = x_test.copy()

        attack1 = FastGradientMethod(classifier=self.classifier, batch_size=16)
        attack2 = DeepFool(classifier=self.classifier,
                           max_iter=5,
                           batch_size=16)
        x_test_adv = attack1.generate(x_test)
        predictions = np.argmax(self.classifier.predict(x_test_adv), axis=1)
        accuracy = np.sum(predictions == np.argmax(y_test, axis=1)) / NB_TEST

        adv_trainer = AdversarialTrainer(self.classifier,
                                         attacks=[attack1, attack2])
        adv_trainer.fit(x_train, y_train, nb_epochs=2, batch_size=16)

        predictions_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1)
        accuracy_new = np.sum(
            predictions_new == np.argmax(y_test, axis=1)) / NB_TEST

        self.assertEqual(accuracy_new, 0.36)
        self.assertEqual(accuracy, 0.13)

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))),
                               0.0,
                               delta=0.00001)
Exemplo n.º 9
0
    def test_iris_clipped(self):
        (_, _), (x_test, y_test) = self.iris

        def t(x):
            return x

        def transformation():
            while True:
                yield t

        classifier = get_iris_classifier_kr()
        classifier = ExpectationOverTransformations(
            classifier, sample_size=1, transformation=transformation)

        # Test untargeted attack
        attack = FastGradientMethod(classifier, eps=.1)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on Iris with limited query info: %.2f%%',
                    (acc * 100))
def test_minimal_perturbations_images(fix_get_mnist_subset,
                                      get_image_classifier_list_for_attack):
    classifier_list = get_image_classifier_list_for_attack(FastGradientMethod)
    # TODO this if statement must be removed once we have a classifier for both image and tabular data
    if classifier_list is None:
        logging.warning(
            "Couldn't perform  this test because no classifier is defined")
        return

    for classifier in classifier_list:
        attack = FastGradientMethod(classifier, eps=1.0, batch_size=11)
        attack_params = {"minimal": True, "eps_step": 0.1, "eps": 5.0}
        attack.set_params(**attack_params)

        expected_values = {
            "x_test_mean":
            ExpectedValue(0.03896513, 0.01),
            "x_test_min":
            ExpectedValue(-0.30000000, 0.00001),
            "x_test_max":
            ExpectedValue(0.30000000, 0.00001),
            "y_test_pred_adv_expected":
            ExpectedValue(np.asarray([4, 2, 4, 7, 0, 4, 7, 2, 0, 7, 0]), 2),
        }
        backend_check_adverse_values(attack, fix_get_mnist_subset,
                                     expected_values)
    def test_subsetscan_detector(self):
        (x_train, y_train), (x_test, y_test), _, _ = load_dataset('mnist')
        x_train, y_train = x_train[:NB_TRAIN], y_train[:NB_TRAIN]
        x_test, y_test = x_test[:NB_TEST], y_test[:NB_TEST]

        # Keras classifier
        classifier = get_classifier_kr()

        # Generate adversarial samples:
        attacker = FastGradientMethod(classifier, eps=0.5)
        x_train_adv = attacker.generate(x_train)
        x_test_adv = attacker.generate(x_test)

        # Compile training data for detector:
        x_train_detector = np.concatenate((x_train, x_train_adv), axis=0)

        bgd = x_train
        clean = x_test
        anom = x_test_adv

        detector = SubsetScanningDetector(classifier, bgd, layer=1)

        _, _, dpwr = detector.scan(clean, clean)
        self.assertAlmostEqual(dpwr, 0.5)

        _, _, dpwr = detector.scan(clean, anom)
        self.assertGreater(dpwr, 0.5)

        _, _, dpwr = detector.scan(clean, x_train_detector, 85, 15)
        self.assertGreater(dpwr, 0.5)
Exemplo n.º 12
0
    def test_multi_attack_mnist_with_generator(self):
        """
        Test the adversarial trainer using two attackers: FGSM and DeepFool. The source and target models of the attack
        are two CNNs on MNIST trained for 2 epochs. FGSM and DeepFool both generate the attack images on the same
        source classifier. The test cast check if accuracy on adversarial samples increases
        after adversarially training the model. Here a generator is used to yield the data for adversarial training

        :return: None
        """
        (x_train, y_train), (x_test, y_test) = self.mnist
        x_train_original = x_train.copy()

        class MyDataGenerator(DataGenerator):
            def __init__(self, x, y, size, batch_size):
                self.x = x
                self.y = y
                self.size = size
                self.batch_size = batch_size

            def get_batch(self):
                ids = np.random.choice(self.size,
                                       size=min(self.size, self.batch_size),
                                       replace=False)
                return (self.x[ids], self.y[ids])

        generator = MyDataGenerator(x_train, y_train, x_train.shape[0],
                                    BATCH_SIZE)

        # Get source and target classifiers
        classifier_tgt = self.classifier_k
        classifier_src = self.classifier_tf

        # Create FGSM and DeepFool attackers
        adv1 = FastGradientMethod(classifier_src)
        adv2 = DeepFool(classifier_src)
        x_adv = np.vstack((adv1.generate(x_test), adv2.generate(x_test)))
        y_adv = np.vstack((y_test, y_test))
        preds = classifier_tgt.predict(x_adv)
        acc = np.sum(np.argmax(preds, axis=1) == np.argmax(
            y_adv, axis=1)) / y_adv.shape[0]

        # Perform adversarial training
        adv_trainer = StaticAdversarialTrainer(classifier_tgt, [adv1, adv2])
        params = {'nb_epochs': 2}
        adv_trainer.fit_generator(generator, **params)

        # Evaluate that accuracy on adversarial sample has improved
        preds_adv_trained = adv_trainer.classifier.predict(x_adv)
        acc_adv_trained = np.sum(
            np.argmax(preds_adv_trained, axis=1) == np.argmax(
                y_adv, axis=1)) / y_adv.shape[0]
        logger.info('Accuracy before adversarial training: %.2f%%',
                    (acc * 100))
        logger.info('Accuracy after adversarial training: %.2f%%',
                    (acc_adv_trained * 100))

        # Finally assert that the original training data hasn't changed:
        self.assertTrue((x_train == x_train_original).all())
    def __init__(self, model, loss_criterion, norm, batch_size=128):
        self.wrapped_pytorch_model = wrapModel(model, loss_criterion)
        self.norm = norm
        self.batch_size = batch_size
        self.attack = FastGradientMethod(self.wrapped_pytorch_model,
                                         batch_size=batch_size)

        # Use GPU for computation if it is available
        self.device = torch.device(
            "cuda:0" if torch.cuda.is_available() else "cpu")
Exemplo n.º 14
0
def atk_FastGradient(x_train, x_test, y_train, y_test, classifier):
    epsilon = 0.1
    #print('Create FastGradientMethod attack \n')
    adv_crafter = FastGradientMethod(classifier)
    x_train_adv = adv_crafter.generate(x_train, eps=epsilon)
    x_test_adv = adv_crafter.generate(x_test, eps=epsilon)
    
    print("After FastGradient Attack  \n")
    evaluate(x_train, x_test, y_train, y_test, x_train_adv, x_test_adv, classifier)
    return x_test_adv, x_train_adv
    def test_iris_clipped(self):
        (_, _), (x_test, y_test) = self.iris

        krc, _ = get_iris_classifier_kr()
        rs = RandomizedSmoothing(classifier=krc,
                                 sample_size=100,
                                 scale=0.01,
                                 alpha=0.001)

        # Test untargeted attack
        attack = FastGradientMethod(krc, eps=.1)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_base = np.argmax(rs.predict(x_test), axis=1)
        preds_smooth = np.argmax(rs.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_smooth).all())

        pred = rs.predict(x_test)
        pred2 = rs.predict(x_test_adv)
        acc, cov = compute_accuracy(pred, y_test)
        acc2, cov2 = compute_accuracy(pred2, y_test)
        logger.info(
            'Accuracy on Iris with smoothing on adversarial examples: %.2f%%',
            (acc * 100))
        logger.info(
            'Coverage on Iris with smoothing on adversarial examples: %.2f%%',
            (cov * 100))
        logger.info('Accuracy on Iris with smoothing: %.2f%%', (acc2 * 100))
        logger.info('Coverage on Iris with smoothing: %.2f%%', (cov2 * 100))

        # Check basic functionality of RS object
        # check predict
        y_test_smooth = rs.predict(x=x_test)
        self.assertEqual(y_test_smooth.shape, y_test.shape)
        self.assertTrue((np.sum(y_test_smooth, axis=1) <= 1).all())

        # check gradients
        grad_smooth1 = rs.loss_gradient(x=x_test, y=y_test)
        grad_smooth2 = rs.class_gradient(x=x_test, label=None)
        grad_smooth3 = rs.class_gradient(x=x_test,
                                         label=np.argmax(y_test, axis=1))
        self.assertEqual(grad_smooth1.shape, x_test_adv.shape)
        self.assertEqual(grad_smooth2.shape[0], len(x_test))
        self.assertEqual(grad_smooth3.shape[0], len(x_test))

        # check certification
        pred, radius = rs.certify(x=x_test, n=250)
        self.assertEqual(len(pred), len(x_test))
        self.assertEqual(len(radius), len(x_test))
        self.assertTrue((radius <= 1).all())
        self.assertTrue((pred < y_test.shape[1]).all())
Exemplo n.º 16
0
def run_fgsm_attacks(
    classifier,
    target_image,
    eps,
    mask_width=20,
    masked=True,
    img_show=True,
    debug=True,
    use_art=True,
    feature_extractor="blazeface",
    iter_step=1,
):
    adv_image = None
    if use_art:
        attack = FastGradientMethod(classifier=classifier, eps=eps)
        x_adv = None
        for i in range(iter_step):
            try:
                start = time.time()
                x_adv = attack.generate(x=np.array([target_image]),
                                        x_adv_init=x_adv,
                                        resume=True)
                end = time.time()

                ### Apply mask
                adv_image = x_adv[0].astype(np.uint)
                target_image_copy = target_image.copy()
                if masked:
                    if feature_extractor == "blazeface":
                        adv_image, _ = apply_mask_to_adv_noise(
                            target_image, adv_image, mask_width=mask_width)
                    else:
                        adv_image, _ = apply_mask_to_adv_noise_mtcnn(
                            target_image, adv_image)
                    target_image_copy = cv.resize(target_image_copy,
                                                  (128, 128))

                norm = np.linalg.norm(np.reshape(adv_image - target_image_copy,
                                                 [-1]),
                                      ord=np.inf)
                logging.debug(f'debug: norm: {norm}')

            except Exception as e:
                logging.error(e)
            attack.max_iter = iter_step
    else:
        raise NotImplementedError
        # loss_object = keras.losses.CategoricalCrossentropy()
        # with tf.GradientTape() as tape:
        #     tape.watch(target_image)
        #     prediction = classifier(target_image)
        #     loss = loss_object(prediciton, )
    return adv_image
def fgsm(model, X, y, optimizer,epsilon=0.1):
    """ Construct FGSM adversarial examples on the examples X"""
    classifier = PyTorchClassifier(
    model=model_concetenate,
    loss = custom_loss,
    optimizer=optimizer,
    input_shape=(1,28,28),
    nb_classes=10,
    device_type='gpu'
    )
    attack = FastGradientMethod(classifier=classifier,eps=epsilon)
    x_adv = attack.generate(X.numpy(),y=y.numpy())
    return torch.Tensor(x_adv)
Exemplo n.º 18
0
    def test_two_attacks_with_generator(self):
        (x_train, y_train), (x_test, y_test) = self.mnist
        x_train_original = x_train.copy()
        x_test_original = x_test.copy()

        class MyDataGenerator(DataGenerator):
            def __init__(self, x, y, size, batch_size):
                super().__init__(size=size, batch_size=batch_size)
                self.x = x
                self.y = y
                self._size = size
                self._batch_size = batch_size

            def get_batch(self):
                ids = np.random.choice(self.size,
                                       size=min(self.size, self.batch_size),
                                       replace=False)
                return self.x[ids], self.y[ids]

        generator = MyDataGenerator(x_train,
                                    y_train,
                                    size=x_train.shape[0],
                                    batch_size=16)

        attack1 = FastGradientMethod(classifier=self.classifier, batch_size=16)
        attack2 = DeepFool(classifier=self.classifier,
                           max_iter=5,
                           batch_size=16)
        x_test_adv = attack1.generate(x_test)
        predictions = np.argmax(self.classifier.predict(x_test_adv), axis=1)
        accuracy = np.sum(predictions == np.argmax(y_test, axis=1)) / NB_TEST

        adv_trainer = AdversarialTrainer(self.classifier,
                                         attacks=[attack1, attack2])
        adv_trainer.fit_generator(generator, nb_epochs=3)

        predictions_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1)
        accuracy_new = np.sum(
            predictions_new == np.argmax(y_test, axis=1)) / NB_TEST

        self.assertAlmostEqual(accuracy_new, 0.25, delta=0.02)
        self.assertAlmostEqual(accuracy, 0.11, delta=0.0)

        # Check that x_train and x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_train_original -
                                                   x_train))),
                               0.0,
                               delta=0.00001)
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))),
                               0.0,
                               delta=0.00001)
Exemplo n.º 19
0
    def evaluate_fgsm(self, data_loader):
        """Adversarial evaluation by FGSM"""
        norm, eps = np.inf, attack_configs['FGSM'][self.dataset]['epsilon']
        adv_crafter = FastGradientMethod(self.classifier, norm=norm, eps=eps)

        data_iter = iter(data_loader)
        examples, labels = next(data_iter)
        examples, labels = examples.cpu().numpy(), labels.cpu().numpy()
        labels_one_hot = np.eye(self.nb_classes)[labels]
        examples_adv = adv_crafter.generate(examples, y=labels_one_hot)

        preds = np.argmax(self.classifier.predict(examples_adv), axis=1)
        acc = np.sum(preds == labels) / labels.shape[0]
        return acc
Exemplo n.º 20
0
    def test_binary_activation_detector(self):
        """
        Test the binary activation detector end-to-end.
        :return:
        """
        # Get MNIST
        (x_train, y_train), (x_test, y_test), _, _ = load_mnist()
        x_train, y_train = x_train[:NB_TRAIN], y_train[:NB_TRAIN]
        x_test, y_test = x_test[:NB_TEST], y_test[:NB_TEST]

        # Keras classifier
        classifier = get_classifier_kr()

        # Generate adversarial samples:
        attacker = FastGradientMethod(classifier, eps=0.1)
        x_train_adv = attacker.generate(x_train[:NB_TRAIN])
        x_test_adv = attacker.generate(x_test[:NB_TRAIN])

        # Compile training data for detector:
        x_train_detector = np.concatenate((x_train[:NB_TRAIN], x_train_adv), axis=0)
        y_train_detector = np.concatenate((np.array([[1, 0]] * NB_TRAIN), np.array([[0, 1]] * NB_TRAIN)), axis=0)

        # Create a simple CNN for the detector
        activation_shape = classifier.get_activations(x_test[:1], 0).shape[1:]
        number_outputs = 2
        model = Sequential()
        model.add(MaxPooling2D(pool_size=(2, 2), input_shape=activation_shape))
        model.add(Flatten())
        model.add(Dense(number_outputs, activation='softmax'))
        model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(lr=0.01),
                      metrics=['accuracy'])

        # Create detector and train it.
        # Detector consider activations at layer=0:
        detector = BinaryActivationDetector(classifier=classifier,
                                            detector=KerasClassifier(model=model, clip_values=(0, 1), use_logits=False),
                                            layer=0)
        detector.fit(x_train_detector, y_train_detector, nb_epochs=2, batch_size=128)

        # Apply detector on clean and adversarial test data:
        test_detection = np.argmax(detector.predict(x_test), axis=1)
        test_adv_detection = np.argmax(detector.predict(x_test_adv), axis=1)

        # Assert there is at least one true positive and negative
        nb_true_positives = len(np.where(test_adv_detection == 1)[0])
        nb_true_negatives = len(np.where(test_detection == 0)[0])
        logger.debug('Number of true positives detected: %i', nb_true_positives)
        logger.debug('Number of true negatives detected: %i', nb_true_negatives)
        self.assertGreater(nb_true_positives, 0)
        self.assertGreater(nb_true_negatives, 0)
def test_targeted_images(fix_get_mnist_subset,
                         get_image_classifier_list_for_attack):
    classifier_list = get_image_classifier_list_for_attack(FastGradientMethod)
    # TODO this if statement must be removed once we have a classifier for both image and tabular data
    if classifier_list is None:
        logging.warning(
            "Couldn't perform  this test because no classifier is defined")
        return

    for classifier in classifier_list:
        attack = FastGradientMethod(classifier, eps=1.0, targeted=True)
        attack_params = {"minimal": True, "eps_step": 0.01, "eps": 1.0}
        attack.set_params(**attack_params)

        backend_targeted_images(attack, fix_get_mnist_subset)
Exemplo n.º 22
0
    def test_classifier_match(self):
        attack = FastGradientMethod(self.classifier_k)
        adv_trainer = AdversarialTrainer(self.classifier_k, attack)

        self.assertEqual(len(adv_trainer.attacks), 1)
        self.assertEqual(adv_trainer.attacks[0].classifier,
                         adv_trainer.classifier)
class FGMAttack(AdversarialAttack):
    def __init__(self,
                 model,
                 targeted=False,
                 step_size_iter=0.3,
                 max_perturbation=0.1,
                 norm_order=np.inf,
                 num_random_init=0,
                 minimal=False,
                 batch_size=16):
        super().__init__(model=model)
        self._targeted = targeted
        self._step_size_iter = step_size_iter
        self._max_perturbation = max_perturbation
        self._norm_order = norm_order
        self._num_random_init = num_random_init
        self._minimal = minimal

        self._method = FastGradientMethod(
            classifier=self.model,
            norm=self._norm_order,
            eps=self._max_perturbation,
            eps_step=self._step_size_iter,
            targeted=self._targeted,
            num_random_init=self._num_random_init,
            batch_size=batch_size,
            minimal=self._minimal)

    def attack_method(self, x, y=None):
        params = {'minimal': self._minimal}
        if y is not None:
            params['y'] = y
        return self._method.generate(x=x, **params)
Exemplo n.º 24
0
def main(config_filepath):

    config = load_config(config_filepath)

    if os.path.isfile(config.x_adv_output_path):
        click.confirm(f"Overwrite {config.x_adv_output_path}?", abort=True)

    seed = 45616451
    np.random.seed(seed)
    torch.manual_seed(seed)

    # Load data
    x = torch.load(config.x_filepath)
    x_shape = x.shape
    y = torch.load(config.y_filepath)

    # Flatten test set
    x = x.reshape(x.shape[0], -1)

    model = torch.load(config.model_filepath)

    clip_values = {}
    with open(config.clip_values_filepath, "r") as f:
        clip_values = json.load(f)
    clip_values = (
        clip_values.get("min_pixel_value"),
        clip_values.get("max_pixel_value"),
    )

    classifier = PyTorchClassifier(
        model=model,
        clip_values=clip_values,
        loss=model.criterion,
        optimizer=model.optimizer,
        input_shape=(1, 28, 28),
        nb_classes=10,
    )  # TODO: move these parameters to config

    # Generate attacks
    attack = FastGradientMethod(
        classifier=classifier,
        eps=0.2)  # TODO: move these parameters to config
    x_adv = attack.generate(x=x)

    # Reshape adversarial examples back to original test data shape
    x_adv = torch.from_numpy(x_adv.reshape(x_shape))
    torch.save(x_adv, config.x_adv_output_path)
Exemplo n.º 25
0
    def test_fit_predict(self):
        (x_train, y_train), (x_test, y_test) = self.mnist

        attack = FastGradientMethod(self.classifier_k)
        x_test_adv = attack.generate(x_test)
        preds = np.argmax(self.classifier_k.predict(x_test_adv), axis=1)
        acc = np.sum(preds == np.argmax(y_test, axis=1)) / NB_TEST

        adv_trainer = AdversarialTrainer(self.classifier_k, attack)
        adv_trainer.fit(x_train, y_train, nb_epochs=5, batch_size=128)

        preds_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1)
        acc_new = np.sum(preds_new == np.argmax(y_test, axis=1)) / NB_TEST
        self.assertGreaterEqual(acc_new, acc * ACCURACY_DROP)

        logger.info('Accuracy before adversarial training: %.2f%%', (acc * 100))
        logger.info('Accuracy after adversarial training: %.2f%%', (acc_new * 100))
def general_test_v2(model,
                    optimizer,
                    input_shape,
                    nb_classes,
                    test_loader,
                    method,
                    conf,
                    btrain=False,
                    model_file='last_model_92_sgd.pkl'):
    global _classes
    if not btrain:
        checked_state = torch.load(model_file)['state_dict']
        model.load_state_dict(checked_state)
    model.eval()

    loss = nn.CrossEntropyLoss()
    warped_model = PyTorchClassifier(model,
                                     loss,
                                     optimizer,
                                     input_shape,
                                     nb_classes,
                                     clip_values=(.0, 1.))
    if method == 'Deepfool':
        adv_crafter = DeepFool(warped_model)
    elif method == 'BIM':
        adv_crafter = BasicIterativeMethod(warped_model, batch_size=32)
    elif method == 'JSMA':
        adv_crafter = SaliencyMapMethod(warped_model, batch_size=32)
    elif method == 'CW2':
        adv_crafter = CarliniL2Method(warped_model, batch_size=32)
    elif method == 'CWI':
        adv_crafter = CarliniLInfMethod(warped_model, batch_size=32)
    elif method == 'FGSM':
        adv_crafter = FastGradientMethod(warped_model, batch_size=32)

    correct, total = 0, 0

    adv_dataset = adv_generalization(test_loader, adv_crafter, conf)
    temp_loader = DataLoader(dataset=adv_dataset,
                             batch_size=32,
                             shuffle=False,
                             drop_last=True)
    # temp_loader = test_loader

    for images, labels in temp_loader:
        images = Variable(images.cuda())
        labels = Variable(labels.cuda())

        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels.data).sum()

    print('Accuracy of the model on the test images: %d %%' %
          (100 * float(correct) / total))
    print('Accuracy of the model on the test images:', float(correct) / total)
    return correct / total
Exemplo n.º 27
0
    def test_two_attacks(self):
        (x_train, y_train), (x_test, y_test) = self.mnist

        attack1 = FastGradientMethod(self.classifier_k)
        attack2 = DeepFool(self.classifier_tf)
        x_test_adv = attack1.generate(x_test)
        preds = np.argmax(self.classifier_k.predict(x_test_adv), axis=1)
        acc = np.sum(preds == np.argmax(y_test, axis=1)) / NB_TEST

        adv_trainer = AdversarialTrainer(self.classifier_k, attacks=[attack1, attack2])
        adv_trainer.fit(x_train, y_train, nb_epochs=5, batch_size=128)

        preds_new = np.argmax(adv_trainer.predict(x_test_adv), axis=1)
        acc_new = np.sum(preds_new == np.argmax(y_test, axis=1)) / NB_TEST
        # No reason to assert the newer accuracy is higher. It might go down slightly
        self.assertGreaterEqual(acc_new, acc * ACCURACY_DROP)

        logger.info('Accuracy before adversarial training: %.2f%%', (acc * 100))
        logger.info('\nAccuracy after adversarial training: %.2f%%', (acc_new * 100))
def robust_score_test(eps=0.1,
                      X_test=None,
                      y_test=None,
                      model=None,
                      feature_selector=None,
                      scorer=None):
    X_test_filtered = feature_selector.transform(X_test)

    best_model = copy.deepcopy(model)

    classifier = SklearnClassifier(model=best_model)
    attack = FastGradientMethod(classifier, eps=eps, batch_size=1)

    X_test_adv = attack.generate(X_test_filtered)

    score_original_test = scorer(best_model, X_test_filtered, y_test)
    score_corrupted_test = scorer(best_model, X_test_adv, y_test)

    diff = score_original_test - score_corrupted_test
    return diff
Exemplo n.º 29
0
    def _test_mnist_targeted(self, classifier, x_test, y_test):
        # Test FGSM with np.inf norm
        attack = FastGradientMethod(classifier, eps=1.0, targeted=True)

        pred_sort = classifier.predict(x_test).argsort(axis=1)
        y_test_adv = np.zeros((x_test.shape[0], 10))
        for i in range(x_test.shape[0]):
            y_test_adv[i, pred_sort[i, -2]] = 1.0

        attack_params = {"minimal": True, "eps_step": 0.01, "eps": 1.0}
        attack.set_params(**attack_params)

        x_test_adv = attack.generate(x_test, y=y_test_adv)
        self.assertFalse((x_test == x_test_adv).all())

        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))

        self.assertEqual(y_test_adv.shape, test_y_pred.shape)
        self.assertGreaterEqual((y_test_adv == test_y_pred).sum(),
                                x_test.shape[0] // 2)
Exemplo n.º 30
0
    def test_keras_iris_unbounded(self):
        (_, _), (x_test, y_test) = self.iris
        classifier = get_iris_classifier_kr()

        # Recreate a classifier without clip values
        classifier = KerasClassifier(model=classifier._model,
                                     use_logits=False,
                                     channel_index=1)
        attack = FastGradientMethod(classifier, eps=1)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv > 1).any())
        self.assertTrue((x_test_adv < 0).any())

        predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == predictions_adv).all())
        accuracy = np.sum(
            predictions_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on Iris with FGM adversarial examples: %.2f%%',
                    (accuracy * 100))