def _test_backend_mnist(self, classifier):
        # Get MNIST
        (x_train, y_train), (x_test, y_test), _, _ = load_mnist()
        x_train, y_train = x_train[:NB_TRAIN], y_train[:NB_TRAIN]
        x_test, y_test = x_test[:NB_TEST], y_test[:NB_TEST]

        # Test DeepFool
        attack = DeepFool(classifier, max_iter=5)
        x_test_adv = attack.generate(x_test)
        x_train_adv = attack.generate(x_train)

        self.assertFalse((x_train == x_train_adv).all())
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = get_labels_np_array(classifier.predict(x_train_adv))
        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))

        self.assertFalse((y_train == train_y_pred).all())
        self.assertFalse((y_test == test_y_pred).all())

        acc = np.sum(
            np.argmax(train_y_pred, axis=1) == np.argmax(
                y_train, axis=1)) / y_train.shape[0]
        print('\nAccuracy on adversarial train examples: %.2f%%' % (acc * 100))

        acc = np.sum(
            np.argmax(test_y_pred, axis=1) == np.argmax(
                y_test, axis=1)) / y_test.shape[0]
        print('\nAccuracy on adversarial test examples: %.2f%%' % (acc * 100))
Exemplo n.º 2
0
    def _test_backend_mnist(self, classifier):
        # Get MNIST
        (x_train, y_train), (x_test, y_test) = self.mnist

        # Test DeepFool
        attack = DeepFool(classifier, max_iter=5)
        x_test_adv = attack.generate(x_test)
        x_train_adv = attack.generate(x_train)

        self.assertFalse((x_train == x_train_adv).all())
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = get_labels_np_array(classifier.predict(x_train_adv))
        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))

        self.assertFalse((y_train == train_y_pred).all())
        self.assertFalse((y_test == test_y_pred).all())

        acc = np.sum(
            np.argmax(train_y_pred, axis=1) == np.argmax(
                y_train, axis=1)) / y_train.shape[0]
        logger.info('Accuracy on adversarial train examples: %.2f%%',
                    (acc * 100))

        acc = np.sum(
            np.argmax(test_y_pred, axis=1) == np.argmax(
                y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on adversarial test examples: %.2f%%',
                    (acc * 100))
    def test_mnist(self):
        session = tf.Session()
        k.set_session(session)

        comp_params = {"loss": 'categorical_crossentropy',
                       "optimizer": 'adam',
                       "metrics": ['accuracy']}

        # get MNIST
        batch_size, nb_train, nb_test = 100, 1000, 11
        (X_train, Y_train), (X_test, Y_test), _, _ = load_mnist()
        X_train, Y_train = X_train[:nb_train], Y_train[:nb_train]
        X_test, Y_test = X_test[:nb_test], Y_test[:nb_test]
        im_shape = X_train[0].shape

        # get classifier
        classifier = CNN(im_shape, act="relu")
        classifier.compile(comp_params)
        classifier.fit(X_train, Y_train, epochs=1, batch_size=batch_size, verbose=0)
        scores = classifier.evaluate(X_test, Y_test)
        print("\naccuracy on test set: %.2f%%" % (scores[1] * 100))

        df = DeepFool(classifier, sess=session)
        df.set_params(clip_min=0., clip_max=1.)
        x_test_adv = df.generate(X_test)
        self.assertFalse((X_test == x_test_adv).all())
        y_pred = classifier.predict(x_test_adv)
        self.assertFalse((Y_test == y_pred).all())

        scores = classifier.evaluate(x_test_adv, Y_test)
        print('\naccuracy on adversarial examples: %.2f%%' % (scores[1] * 100))
Exemplo n.º 4
0
    def test_multi_attack_mnist(self):
        """
        Test the adversarial trainer using two attackers: FGSM and DeepFool. The source and target models of the attack
        are two CNNs on MNIST trained for 5 epochs. FGSM and DeepFool both generate the attack images on the same
        source classifier. The test cast check if accuracy on adversarial samples increases
        after adversarially training the model.

        :return: None
        """
        session = tf.Session()
        k.set_session(session)

        # Load MNIST
        (x_train, y_train), (x_test, y_test), _, _ = load_dataset('mnist')
        x_train, y_train, x_test, y_test = x_train[:
                                                   NB_TRAIN], y_train[:
                                                                      NB_TRAIN], x_test[:
                                                                                        NB_TEST], y_test[:
                                                                                                         NB_TEST]
        im_shape = x_train[0].shape

        # Create and fit target classifier
        comp_params = {
            'loss': 'categorical_crossentropy',
            'optimizer': 'adam',
            'metrics': ['accuracy']
        }
        params = {'epochs': 5, 'batch_size': BATCH_SIZE}
        classifier_tgt = CNN(im_shape, dataset='mnist')
        classifier_tgt.compile(comp_params)
        classifier_tgt.fit(x_train, y_train, **params)

        # Create source classifier
        classifier_src = CNN(im_shape, dataset='mnist')
        classifier_src.compile(comp_params)
        classifier_tgt.fit(x_train, y_train, **params)

        # Create FGSM and DeepFool attackers
        adv1 = FastGradientMethod(classifier_src, session)
        adv2 = DeepFool(classifier_src, session)
        x_adv = np.vstack((adv1.generate(x_test), adv2.generate(x_test)))
        y_adv = np.vstack((y_test, y_test))
        print(y_adv.shape)
        acc = classifier_tgt.evaluate(x_adv, y_adv)

        # Perform adversarial training
        adv_trainer = AdversarialTrainer(classifier_tgt, [adv1, adv2])
        adv_trainer.fit(x_train, y_train, **params)

        # Evaluate that accuracy on adversarial sample has improved
        acc_adv_trained = adv_trainer.classifier.evaluate(x_adv, y_adv)
        self.assertTrue(acc_adv_trained >= acc)
Exemplo n.º 5
0
    def test_iris_pt(self):
        (_, _), (x_test, y_test) = self.iris
        classifier = get_iris_classifier_pt()

        attack = DeepFool(classifier, max_iter=5, batch_size=128)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info(
            'Accuracy on Iris with DeepFool adversarial examples: %.2f%%',
            (acc * 100))
Exemplo n.º 6
0
    def test_partial_grads(self):
        # Get MNIST
        (_, _), (x_test, y_test) = self.mnist

        attack = DeepFool(self.classifier_k, max_iter=2, nb_grads=3)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())

        test_y_pred = get_labels_np_array(
            self.classifier_k.predict(x_test_adv))
        self.assertFalse((y_test == test_y_pred).all())

        acc = np.sum(
            np.argmax(test_y_pred, axis=1) == np.argmax(
                y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on adversarial test examples: %.2f%%',
                    (acc * 100))
Exemplo n.º 7
0
    def test_iris_k_unbounded(self):
        (_, _), (x_test, y_test) = self.iris
        classifier, _ = get_iris_classifier_kr()

        # Recreate a classifier without clip values
        classifier = KerasClassifier(model=classifier._model,
                                     use_logits=False,
                                     channel_index=1)
        attack = DeepFool(classifier, max_iter=5, batch_size=128)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info(
            'Accuracy on Iris with DeepFool adversarial examples: %.2f%%',
            (acc * 100))
Exemplo n.º 8
0
x_test_adv_robust_pred = np.argmax(
    robust_classifier.predict(x_test_adv_robust), axis=1)
nb_correct_adv_robust_pred = np.sum(x_test_adv_robust_pred == true_label)
print("Correctly classified against PGD attack: {}".format(
    nb_correct_adv_robust_pred))
# CW
attacker_robust = cw(robust_classifier, targeted=False, batch_size=100)
x_test_adv_robust = attacker_robust.generate(x_test[:100])
x_test_adv_robust_pred = np.argmax(
    robust_classifier.predict(x_test_adv_robust), axis=1)
nb_correct_adv_robust_pred = np.sum(x_test_adv_robust_pred == true_label)
print("Correctly classified against CW attack: {}".format(
    nb_correct_adv_robust_pred))
# DeepFool
adv_crafter_df = DeepFool(robust_classifier)
img_adv_df = adv_crafter_df.generate(x_test[0:100])
x_test_adv_robust_pred_df = np.argmax(robust_classifier.predict(img_adv_df),
                                      axis=1)
nb_correct_adv_robust_pred_df = np.sum(x_test_adv_robust_pred_df == true_label)
print("Correctly classified against DeepFool attack: {}".format(
    nb_correct_adv_robust_pred_df))

# Normal images
original_model = load_model('saved_models/mnist_cnn_original.h5')  # original
classifier = KerasClassifier(clip_values=(0, 1),
                             model=original_model,
                             use_logits=False)
x_test_pred = np.argmax(classifier.predict(x_test), axis=1)
nb_correct_pred = np.sum(
    x_test_pred == np.argmax(y_test, axis=1)) / y_test.shape[0] * 100
print("Test accuracy for normal instances: %.1f%%" % (nb_correct_pred))
Exemplo n.º 9
0
    def attack(self, model=None, attack_str=""):
        imgs = self._load_images(attack_str, self._test_or_val_dataset)

        if self._test_or_val_dataset == "_x_test_set_":
            X = self.__data.x_test
            Y = self.__data.y_test
        else:
            X = self.__data.x_val
            Y = self.__data.y_val

        if type(imgs) != type(None):
            print('\n{0} adversarial examples using {1} attack loaded...\n'.
                  format(self.__dataset, self.__attack))
            return imgs

        if type(model) == type(None):
            model = self.surrogate_model.fit(self.__data.x_train,
                                             self.__data.y_train,
                                             verbose=1,
                                             epochs=self.__epochs,
                                             batch_size=128)
            wrap = KerasClassifier((0., 1.), model=self.surrogate_model)
        else:
            wrap = KerasClassifier((0., 1.), model=model)

        if self.__attack == 'FGSM':
            print('\nCrafting adversarial examples using FGSM attack...\n')
            fgsm = FastGradientMethod(wrap)

            if self.__data.dataset_name == 'MNIST':
                x_adv_images = fgsm.generate(x=X[self.idx_adv][:self._length],
                                             eps=0.2)
            else:
                x_adv_images = fgsm.generate(x=X[self.idx_adv][:self._length],
                                             eps=0.025)

            path = os.path.join(
                self._attack_dir,
                self.__dataset.lower() + self._test_or_val_dataset +
                "fgsm.pkl")
            helpers.save_pkl(x_adv_images, path)

        elif self.__attack.startswith("CW"):
            print('\nCrafting adversarial examples using CW attack...\n')
            cw = CarliniL2Method(wrap,
                                 confidence=0.0,
                                 targeted=False,
                                 binary_search_steps=1,
                                 learning_rate=0.2,
                                 initial_const=10,
                                 max_iter=100)
            x_adv_images = cw.generate(X[self.idx_adv][:self._length])

            path = os.path.join(
                self._attack_dir,
                self.__dataset.lower() + self._test_or_val_dataset + "cw.pkl")
            helpers.save_pkl(x_adv_images, path)

        elif self.__attack == 'BIM':
            print('\nCrafting adversarial examples using BIM attack...\n')

            if self.__dataset == 'MNIST':
                bim = BasicIterativeMethod(wrap,
                                           eps=0.25,
                                           eps_step=0.2,
                                           max_iter=100,
                                           norm=np.inf)
            if self.__dataset == 'CIFAR':
                bim = BasicIterativeMethod(wrap,
                                           eps=0.025,
                                           eps_step=0.01,
                                           max_iter=1000,
                                           norm=np.inf)

            x_adv_images = bim.generate(x=X[self.idx_adv][:self._length])
            path = os.path.join(
                self._attack_dir,
                self.__dataset.lower() + self._test_or_val_dataset + "bim.pkl")
            helpers.save_pkl(x_adv_images, path)

        elif self.__attack == 'DEEPFOOL':
            print('\nCrafting adversarial examples using DeepFool attack...\n')

            deepfool = DeepFool(wrap)
            x_adv_images = deepfool.generate(x=X[self.idx_adv][:self._length])
            path = os.path.join(
                self._attack_dir,
                self.__dataset.lower() + self._test_or_val_dataset +
                "deepfool.pkl")
            helpers.save_pkl(x_adv_images, path)

        return x_adv_images
Exemplo n.º 10
0
}
classifier = CNN(im_shape, act='relu', dataset='cifar10')
classifier.compile(comp_params)
classifier.fit(x_train,
               y_train,
               validation_split=.1,
               epochs=10,
               batch_size=128)

# Craft adversarial samples with DeepFool
print('Create DeepFool attack')
epsilon = .1  # Maximum perturbation
adv_crafter = DeepFool(classifier, sess=session)
print('Craft training examples')
x_train_adv = adv_crafter.generate(x_val=x_train,
                                   eps=epsilon,
                                   clip_min=min_,
                                   clip_max=max_)
print('Craft test examples')
x_test_adv = adv_crafter.generate(x_val=x_test,
                                  eps=epsilon,
                                  clip_min=min_,
                                  clip_max=max_)

# Evaluate the classifier on the adversarial samples
scores = classifier.evaluate(x_test, y_test)
print("\nClassifier before adversarial training")
print(
    "\nLoss on adversarial samples: %.2f%%\nAccuracy on adversarial samples: %.2f%%"
    % (scores[0], scores[1] * 100))

# Data augmentation: expand the training set with the adversarial samples
    adv_crafter_fgsm = FastGradientMethod(cifar_classifier,
                                          eps=epsilon,
                                          eps_step=0.01,
                                          batch_size=batch_size)
    x_test_adv = adv_crafter_fgsm.generate(x=test_dataset_array)

    # Test the classifier on adversarial exmaples
    predictions = cifar_classifier.predict(x_test_adv)
    accuracy = np.sum(
        np.argmax(predictions, axis=1) == test_label_dataset_array) / len(
            test_label_dataset_array)
    print('Accuracy after FGSM attack: {}%'.format(accuracy * 100))

    # Deepfool
    adv_crafter_deepfool = DeepFool(cifar_classifier, batch_size=batch_size)
    x_test_adv = adv_crafter_deepfool.generate(x=test_dataset_array)

    predictions = cifar_classifier.predict(x_test_adv)
    accuracy = np.sum(
        np.argmax(predictions, axis=1) == test_label_dataset_array) / len(
            test_label_dataset_array)
    print('Accuracy after DeepFool attack: {}%'.format(accuracy * 100))

    # C&W

    adv_crafter_cwinf = CarliniLInfMethod(cifar_classifier,
                                          eps=epsilon,
                                          batch_size=batch_size)
    x_test_adv = adv_crafter_cwinf.generate(x=test_dataset_array)

    predictions = cifar_classifier.predict(x_test_adv)
Exemplo n.º 12
0
# Read MNIST dataset
(x_train, y_train), (x_test, y_test), min_, max_ = load_mnist()

# Construct and train a convolutional neural network on MNIST using Keras
source = cnn_mnist_k()
source.compile(loss=keras.losses.categorical_crossentropy,
               optimizer=Adam(lr=0.01),
               metrics=['accuracy'])
source = KerasClassifier(clip_values=(min_, max_),
                         model=source,
                         use_logits=False)
source.fit(x_train, y_train, nb_epochs=5, batch_size=128)

# Craft adversarial samples with DeepFool
adv_crafter = DeepFool(source)
x_test_adv = adv_crafter.generate(x_test)

# Compare with existing Adversarial Training (from ART)
robust_classifier = load_model('saved_models/mnist_cnn_robust.h5')
robust_classifier = KerasClassifier(clip_values=(0, 1),
                                    model=robust_classifier,
                                    use_logits=False)
print('compare_transfer.py for mnist dataset v2')
print('based on inf norm')

# Normal images
original_model = load_model('saved_models/mnist_cnn_original.h5')  # original
classifier = KerasClassifier(clip_values=(0, 1),
                             model=original_model,
                             use_logits=False)
    classifier = KerasClassifier((0, 1), model, use_logits=False)
    return classifier


# Get session
session = tf.Session()
k.set_session(session)

# Read MNIST dataset
(x_train, y_train), (x_test, y_test), min_, max_ = load_mnist()

# Construct and train a convolutional neural network on MNIST using Keras
source = cnn_mnist_k(x_train.shape[1:])
source.fit(x_train, y_train, nb_epochs=5, batch_size=128)

# Craft adversarial samples with DeepFool
adv_crafter = DeepFool(source)
x_train_adv = adv_crafter.generate(x_train)
x_test_adv = adv_crafter.generate(x_test)

# Construct and train a convolutional neural network
target = cnn_mnist_tf(x_train.shape[1:])
target.fit(x_train, y_train, nb_epochs=5, batch_size=128)

# Evaluate the CNN on the adversarial samples
preds = target.predict(x_test_adv)
acc = np.sum(np.equal(np.argmax(preds, axis=1), np.argmax(
    y_test, axis=1))) / y_test.shape[0]
print("\nAccuracy on adversarial samples: %.2f%%" % (acc * 100))
Exemplo n.º 14
0
         TP_comb_pgd = TP_comb_pgd + 1
     else:
         TP_comb_pgd = TP_comb_pgd + Tpgd
 TPR_pgd_random = TP_pgd_random / adv_sample_cw
 print("\nTPR for PGD when random noise is added: %.3f%%" %
       (TPR_pgd_random * 100))
 TPR_pgd_comb = TP_comb_pgd / adv_sample_cw
 TPR_pgd = TP_pgd / adv_sample_cw
 print("\nTPR for PGD: %.3f%%" % (TPR_pgd * 100))
 print("\nTPR for PGD when combining: %.3f%%" % (TPR_pgd_comb * 100))
 # =============================================================================
 #     # Craft adversarial samples using DeepFool
 #   check
 # =============================================================================
 attack_DeepFool = DeepFool(classifier)
 x_test_adv_df = attack_DeepFool.generate(
     x=x_test[3 * adv_sample:3 * adv_sample + adv_sample_cw])
 # Evaluate the classifier on the adversarial examples
 # add test image noise
 x_test_adv_df_random = x_test_adv_df + np.random.normal(
     mean, 0.01, x_test_adv_df.shape)
 preds_df_random = np.argmax(classifier.predict(x_test_adv_df_random),
                             axis=1)
 preds_df = np.argmax(classifier.predict(x_test_adv_df), axis=1)
 y_adv = y_test[3 * adv_sample:3 * adv_sample + adv_sample_cw]
 TP_df_random = 0
 TP_df = 0
 TP_comb_df = 0
 for i in np.arange(adv_sample_cw):
     diff_random = x_test[i + 3 * adv_sample] - x_test_adv_df_random[i]
     diff_random = diff_random.reshape((28, 28))
     perturbation_random = norm(diff_random) / 28