def test_8_keras_iris_vector_unbounded(self):
        classifier = get_tabular_classifier_kr()

        # Recreate a classifier without clip values
        classifier = KerasClassifier(model=classifier._model,
                                     use_logits=False,
                                     channels_first=True)
        attack = SaliencyMapMethod(classifier, theta=1, verbose=False)
        x_test_iris_adv = attack.generate(self.x_test_iris)
        self.assertFalse((self.x_test_iris == x_test_iris_adv).all())
示例#2
0
def _jsma(model, data, labels, attack_args):
    theta = attack_args.get('theta', 0.15)
    gamma = attack_args.get('gamma', 0.5)

    batch_size = attack_args.get('batch_size', 1)

    attacker = SaliencyMapMethod(classifier=model,
                                 theta=theta,
                                 gamma=gamma,
                                 batch_size=batch_size)
    return attacker.generate(data, labels)
示例#3
0
    def test_check_params(self):

        ptc = get_image_classifier_pt(from_logits=True)

        with self.assertRaises(ValueError):
            _ = SaliencyMapMethod(ptc, gamma=-1)

        with self.assertRaises(ValueError):
            _ = SaliencyMapMethod(ptc, batch_size=-1)

        with self.assertRaises(ValueError):
            _ = SaliencyMapMethod(ptc, verbose="False")
    def test_4_pytorch_iris_vector(self):
        classifier = get_tabular_classifier_pt()

        attack = SaliencyMapMethod(classifier, theta=1, verbose=False)
        x_test_iris_adv = attack.generate(self.x_test_iris)
        self.assertFalse((self.x_test_iris == x_test_iris_adv).all())
        self.assertTrue((x_test_iris_adv <= 1).all())
        self.assertTrue((x_test_iris_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_iris_adv), axis=1)
        self.assertFalse((np.argmax(self.y_test_iris,
                                    axis=1) == preds_adv).all())
        accuracy = np.sum(preds_adv == np.argmax(
            self.y_test_iris, axis=1)) / self.y_test_iris.shape[0]
        logger.info("Accuracy on Iris with JSMA adversarial examples: %.2f%%",
                    (accuracy * 100))
    def test_6_scikitlearn(self):
        from sklearn.linear_model import LogisticRegression
        from sklearn.svm import SVC, LinearSVC

        from art.estimators.classification.scikitlearn import SklearnClassifier

        scikitlearn_test_cases = [
            LogisticRegression(solver="lbfgs", multi_class="auto"),
            SVC(gamma="auto"),
            LinearSVC(),
        ]

        x_test_original = self.x_test_iris.copy()

        for model in scikitlearn_test_cases:
            classifier = SklearnClassifier(model=model, clip_values=(0, 1))
            classifier.fit(x=self.x_test_iris, y=self.y_test_iris)

            attack = SaliencyMapMethod(classifier,
                                       theta=1,
                                       batch_size=128,
                                       verbose=False)
            x_test_iris_adv = attack.generate(self.x_test_iris)
            self.assertFalse((self.x_test_iris == x_test_iris_adv).all())
            self.assertTrue((x_test_iris_adv <= 1).all())
            self.assertTrue((x_test_iris_adv >= 0).all())

            preds_adv = np.argmax(classifier.predict(x_test_iris_adv), axis=1)
            self.assertFalse((np.argmax(self.y_test_iris,
                                        axis=1) == preds_adv).all())
            accuracy = np.sum(preds_adv == np.argmax(
                self.y_test_iris, axis=1)) / self.y_test_iris.shape[0]
            logger.info(
                "Accuracy of " + classifier.__class__.__name__ +
                " on Iris with JSMA adversarial examples: "
                "%.2f%%",
                (accuracy * 100),
            )

            # Check that x_test has not been modified by attack and classifier
            self.assertAlmostEqual(float(
                np.max(np.abs(x_test_original - self.x_test_iris))),
                                   0.0,
                                   delta=0.00001)
示例#6
0
def get_adversarial_examples(X, Y, model, nb_classes, attack=None):
    assert model is not None
    assert attack is not None

    art_classifier = SklearnClassifier(model=model,
                                       clip_values=(0, nb_classes))

    attacker = None
    if attack == ATTACK.PGD:
        attacker = ProjectedGradientDescent(classifier=art_classifier,
                                            norm=np.inf,
                                            eps=0.2,
                                            eps_step=0.1,
                                            max_iter=3,
                                            targeted=False,
                                            num_random_init=0,
                                            batch_size=128)
    elif attack == ATTACK.DEEPFOOL:
        attacker = DeepFool(classifier=art_classifier,
                            max_iter=5,
                            epsilon=1e-6,
                            nb_grads=3,
                            batch_size=1)
    elif attack == ATTACK.FGSM:
        attacker = FastGradientMethod(classifier=art_classifier,
                                      norm=np.inf,
                                      eps=0.3,
                                      targeted=False,
                                      batch_size=128)
    elif attack == ATTACK.BIM:
        attacker = BasicIterativeMethod(classifier=art_classifier,
                                        eps=0.3,
                                        eps_step=0.1,
                                        targeted=False,
                                        batch_size=128)
    elif attack == ATTACK.JSMA:
        attacker = SaliencyMapMethod(classifier=art_classifier,
                                     theta=0.3,
                                     gamma=0.5,
                                     batch_size=128)
    elif attack == ATTACK.CW_L2:
        attacker = CarliniL2Method(classifier=art_classifier,
                                   learning_rate=0.1)
    elif attack == ATTACK.CW_Linf:
        attacker = CarliniLInfMethod(classifier=art_classifier,
                                     learning_rate=0.01)
    else:
        raise NotImplementedError(attack, 'is not implemented.')

    print(
        'Generating [{}] adversarial examples, it will take a while...'.format(
            attack))
    X_adv = attacker.generate(X, y=Y)

    del attacker
    return X_adv
    def test_9_keras_mnist(self):
        x_test_original = self.x_test_mnist.copy()

        # Keras classifier
        classifier = get_image_classifier_kr()

        scores = classifier._model.evaluate(self.x_train_mnist,
                                            self.y_train_mnist)
        logger.info("[Keras, MNIST] Accuracy on training set: %.2f%%",
                    (scores[1] * 100))

        scores = classifier._model.evaluate(self.x_test_mnist,
                                            self.y_test_mnist)
        logger.info("[Keras, MNIST] Accuracy on test set: %.2f%%",
                    (scores[1] * 100))

        # targeted

        # Generate random target classes
        nb_classes = np.unique(np.argmax(self.y_test_mnist, axis=1)).shape[0]
        targets = np.random.randint(nb_classes, size=self.n_test)
        while (targets == np.argmax(self.y_test_mnist, axis=1)).any():
            targets = np.random.randint(nb_classes, size=self.n_test)

        # Perform attack
        df = SaliencyMapMethod(classifier,
                               theta=1,
                               batch_size=100,
                               verbose=False)
        x_test_adv = df.generate(self.x_test_mnist,
                                 y=to_categorical(targets, nb_classes))

        self.assertFalse((self.x_test_mnist == x_test_adv).all())
        self.assertFalse((0.0 == x_test_adv).all())

        y_pred = get_labels_np_array(classifier.predict(x_test_adv))
        self.assertFalse((self.y_test_mnist == y_pred).all())

        accuracy = np.sum(
            np.argmax(y_pred, axis=1) == np.argmax(self.y_test_mnist,
                                                   axis=1)) / self.n_test
        logger.info("Accuracy on adversarial examples: %.2f%%",
                    (accuracy * 100))

        # untargeted
        df = SaliencyMapMethod(classifier,
                               theta=1,
                               batch_size=100,
                               verbose=False)
        x_test_adv = df.generate(self.x_test_mnist)

        self.assertFalse((self.x_test_mnist == x_test_adv).all())
        self.assertFalse((0.0 == x_test_adv).all())

        y_pred = get_labels_np_array(classifier.predict(x_test_adv))
        self.assertFalse((self.y_test_mnist == y_pred).all())

        accuracy = np.sum(
            np.argmax(y_pred, axis=1) == np.argmax(self.y_test_mnist,
                                                   axis=1)) / self.n_test
        logger.info("Accuracy on adversarial examples: %.2f%%",
                    (accuracy * 100))

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(
            np.max(np.abs(x_test_original - self.x_test_mnist))),
                               0.0,
                               delta=0.00001)
示例#8
0
def craft(X, Y, art_classifier, attack=None, **attack_params):
    assert art_classifier is not None
    assert attack is not None

    attacker = None
    if attack == ATTACK.PGD:
        eps = attack_params.get('eps', 0.2)
        eps_step = attack_params.get('eps_step', eps / 5.)
        max_iter = attack_params.get('max_iter', 3)
        targeted = attack_params.get('targeted', False)
        batch_size = attack_params.get('batch_size', 128)

        attacker = ProjectedGradientDescent(classifier=art_classifier,
                                            norm=np.inf,
                                            eps=eps,
                                            eps_step=eps_step,
                                            max_iter=max_iter,
                                            targeted=targeted,
                                            num_random_init=0,
                                            batch_size=batch_size)

    elif attack == ATTACK.DEEPFOOL:
        eps = attack_params.get('eps', 1e-6)
        max_iter = attack_params.get('max_iter', 5)
        nb_grads = attack_params.get('nb_grads', 3)
        batch_size = attack_params.get('batch_size', 1)

        attacker = DeepFool(classifier=art_classifier,
                            max_iter=max_iter,
                            epsilon=eps,
                            nb_grads=nb_grads,
                            batch_size=batch_size)

    elif attack == ATTACK.FGSM:
        eps = attack_params.get('eps', 0.3)
        targeted = attack_params.get('targeted', False)
        batch_size = attack_params.get('batch_size', 128)

        attacker = FastGradientMethod(classifier=art_classifier,
                                      norm=np.inf,
                                      eps=eps,
                                      targeted=targeted,
                                      batch_size=batch_size)

    elif attack == ATTACK.BIM:
        eps = attack_params.get('eps', 0.3)
        eps_step = attack_params.get('eps_step', eps / 5.)
        norm = attack_params.get('norm', np.inf)
        targeted = attack_params.get('targeted', False)
        batch_size = attack_params.get('batch_size', 128)

        attacker = BasicIterativeMethod(classifier=art_classifier,
                                        norm=norm,
                                        eps=eps,
                                        eps_step=eps_step,
                                        targeted=targeted,
                                        batch_size=batch_size)

    elif attack == ATTACK.JSMA:
        theta = attack_params.get('theta', 0.3)
        gamma = attack_params.get('gamma', 0.5)
        batch_size = attack_params.get('batch_size', 128)

        attacker = SaliencyMapMethod(classifier=art_classifier,
                                     theta=theta,
                                     gamma=gamma,
                                     batch_size=batch_size)

    elif attack == ATTACK.CW_L2:
        lr = attack_params.get('lr', 0.1)
        bsearch_steps = attack_params.get('bsearch_steps', 10)

        attacker = CarliniL2Method(classifier=art_classifier,
                                   learning_rate=lr,
                                   binary_search_steps=bsearch_steps)

    elif attack == ATTACK.CW_Linf:
        lr = attack_params.get('lr', 0.01)

        attacker = CarliniLInfMethod(classifier=art_classifier,
                                     learning_rate=lr)

    else:
        raise NotImplementedError(attack, 'is not implemented.')

    print(
        'Generating [{}] adversarial examples, it will take a while...'.format(
            attack))
    X_adv = attacker.generate(X, y=Y)

    del attacker
    return X_adv
示例#9
0
    def test_pytorch_mnist(self):
        x_train_mnist = np.swapaxes(self.x_train_mnist, 1,
                                    3).astype(np.float32)
        x_test_mnist = np.swapaxes(self.x_test_mnist, 1, 3).astype(np.float32)
        x_test_original = x_test_mnist.copy()

        # Create basic PyTorch model
        classifier = get_image_classifier_pt()

        scores = get_labels_np_array(classifier.predict(x_train_mnist))
        accuracy = np.sum(
            np.argmax(scores, axis=1) == np.argmax(self.y_train_mnist,
                                                   axis=1)) / self.n_train
        logger.info("[PyTorch, MNIST] Accuracy on training set: %.2f%%",
                    (accuracy * 100))

        scores = get_labels_np_array(classifier.predict(x_test_mnist))
        accuracy = np.sum(
            np.argmax(scores, axis=1) == np.argmax(self.y_test_mnist,
                                                   axis=1)) / self.n_test
        logger.info("\n[PyTorch, MNIST] Accuracy on test set: %.2f%%",
                    (accuracy * 100))

        # targeted
        # Generate random target classes
        nb_classes = np.unique(np.argmax(self.y_test_mnist, axis=1)).shape[0]
        targets = np.random.randint(nb_classes, size=self.n_test)
        while (targets == np.argmax(self.y_test_mnist, axis=1)).any():
            targets = np.random.randint(nb_classes, size=self.n_test)

        # Perform attack
        df = SaliencyMapMethod(classifier, theta=1, batch_size=100)
        x_test_mnist_adv = df.generate(x_test_mnist,
                                       y=to_categorical(targets, nb_classes))

        self.assertFalse((x_test_mnist == x_test_mnist_adv).all())
        self.assertFalse((0.0 == x_test_mnist_adv).all())

        y_pred = get_labels_np_array(classifier.predict(x_test_mnist_adv))
        self.assertFalse((self.y_test_mnist == y_pred).all())

        accuracy = np.sum(
            np.argmax(y_pred, axis=1) == np.argmax(self.y_test_mnist,
                                                   axis=1)) / self.n_test
        logger.info("Accuracy on adversarial examples: %.2f%%",
                    (accuracy * 100))

        # untargeted
        df = SaliencyMapMethod(classifier, theta=1, batch_size=100)
        x_test_mnist_adv = df.generate(x_test_mnist)

        self.assertFalse((x_test_mnist == x_test_mnist_adv).all())
        self.assertFalse((0.0 == x_test_mnist_adv).all())

        y_pred = get_labels_np_array(classifier.predict(x_test_mnist_adv))
        self.assertFalse((self.y_test_mnist == y_pred).all())

        accuracy = np.sum(
            np.argmax(y_pred, axis=1) == np.argmax(self.y_test_mnist,
                                                   axis=1)) / self.n_test
        logger.info("Accuracy on adversarial examples: %.2f%%",
                    (accuracy * 100))

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(
            np.max(np.abs(x_test_original - x_test_mnist))),
                               0.0,
                               delta=0.00001)