Exemplo n.º 1
0
    def test_tensorflow_iris_L2(self):
        classifier, _ = get_tabular_classifier_tf()

        # Test untargeted attack
        attack = CarliniL2Method(classifier, targeted=False, max_iter=10)
        x_test_adv = attack.generate(self.x_test_iris)
        self.assertFalse((self.x_test_iris == x_test_adv).all())
        self.assertLessEqual(np.amax(x_test_adv), 1.0)
        self.assertGreaterEqual(np.amin(x_test_adv), 0.0)

        predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(self.y_test_iris, axis=1) == predictions_adv).all())
        accuracy = np.sum(predictions_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0]
        logger.info("Accuracy on Iris with C&W adversarial examples: %.2f%%", (accuracy * 100))

        # Test targeted attack
        targets = random_targets(self.y_test_iris, nb_classes=3)
        attack = CarliniL2Method(classifier, targeted=True, max_iter=10)
        x_test_adv = attack.generate(self.x_test_iris, **{"y": targets})
        self.assertFalse((self.x_test_iris == x_test_adv).all())
        self.assertLessEqual(np.amax(x_test_adv), 1.0)
        self.assertGreaterEqual(np.amin(x_test_adv), 0.0)

        predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertTrue((np.argmax(targets, axis=1) == predictions_adv).any())
        accuracy = np.sum(predictions_adv == np.argmax(targets, axis=1)) / self.y_test_iris.shape[0]
        logger.info("Success rate of targeted C&W on Iris: %.2f%%", (accuracy * 100))
Exemplo n.º 2
0
    def test_check_params_L2(self):

        ptc = get_image_classifier_pt(from_logits=True)

        with self.assertRaises(ValueError):
            _ = CarliniL2Method(ptc, binary_search_steps="1.0")
        with self.assertRaises(ValueError):
            _ = CarliniL2Method(ptc, binary_search_steps=-1)

        with self.assertRaises(ValueError):
            _ = CarliniL2Method(ptc, max_iter="1.0")
        with self.assertRaises(ValueError):
            _ = CarliniL2Method(ptc, max_iter=-1)

        with self.assertRaises(ValueError):
            _ = CarliniL2Method(ptc, max_halving="1.0")
        with self.assertRaises(ValueError):
            _ = CarliniL2Method(ptc, max_halving=-1)

        with self.assertRaises(ValueError):
            _ = CarliniL2Method(ptc, max_doubling="1.0")
        with self.assertRaises(ValueError):
            _ = CarliniL2Method(ptc, max_doubling=-1)

        with self.assertRaises(ValueError):
            _ = CarliniL2Method(ptc, batch_size="1.0")
        with self.assertRaises(ValueError):
            _ = CarliniL2Method(ptc, batch_size=-1)
Exemplo n.º 3
0
    def test_pytorch_mnist_L2(self):
        """
        Third test with the PyTorchClassifier.
        :return:
        """
        x_test = np.reshape(self.x_test_mnist, (self.x_test_mnist.shape[0], 1, 28, 28)).astype(np.float32)
        x_test_original = x_test.copy()

        # Build PyTorchClassifier
        ptc = get_image_classifier_pt(from_logits=True)

        # First attack
        cl2m = CarliniL2Method(classifier=ptc, targeted=True, max_iter=10)
        params = {"y": random_targets(self.y_test_mnist, ptc.nb_classes)}
        x_test_adv = cl2m.generate(x_test, **params)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertLessEqual(np.amax(x_test_adv), 1.0)
        self.assertGreaterEqual(np.amin(x_test_adv), 0.0)
        target = np.argmax(params["y"], axis=1)
        y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1)
        self.assertTrue((target == y_pred_adv).any())
        logger.info("CW2 Success Rate: %.2f", (sum(target == y_pred_adv) / float(len(target))))

        # Second attack
        cl2m = CarliniL2Method(classifier=ptc, targeted=False, max_iter=10)
        x_test_adv = cl2m.generate(x_test)
        self.assertLessEqual(np.amax(x_test_adv), 1.0)
        self.assertGreaterEqual(np.amin(x_test_adv), 0.0)
        target = np.argmax(params["y"], axis=1)
        y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1)
        self.assertTrue((target != y_pred_adv).any())
        logger.info("CW2 Success Rate: %.2f", (sum(target != y_pred_adv) / float(len(target))))

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
Exemplo n.º 4
0
    def test_tensorflow_mnist_L2(self):
        """
        First test with the TensorFlowClassifier.
        :return:
        """
        x_test_original = self.x_test_mnist.copy()

        # Build TensorFlowClassifier
        tfc, sess = get_image_classifier_tf(from_logits=True)

        # First attack
        cl2m = CarliniL2Method(classifier=tfc,
                               targeted=True,
                               max_iter=10,
                               verbose=False)
        params = {"y": random_targets(self.y_test_mnist, tfc.nb_classes)}
        x_test_adv = cl2m.generate(self.x_test_mnist, **params)
        self.assertFalse((self.x_test_mnist == x_test_adv).all())
        self.assertLessEqual(np.amax(x_test_adv), 1.0)
        self.assertGreaterEqual(np.amin(x_test_adv), 0.0)
        target = np.argmax(params["y"], axis=1)
        y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1)
        logger.debug("CW2 Target: %s", target)
        logger.debug("CW2 Actual: %s", y_pred_adv)
        logger.info("CW2 Success Rate: %.2f",
                    (np.sum(target == y_pred_adv) / float(len(target))))
        self.assertTrue((target == y_pred_adv).any())

        # Second attack, no batching
        cl2m = CarliniL2Method(classifier=tfc,
                               targeted=False,
                               max_iter=10,
                               batch_size=1,
                               verbose=False)
        x_test_adv = cl2m.generate(self.x_test_mnist)
        self.assertLessEqual(np.amax(x_test_adv), 1.0)
        self.assertGreaterEqual(np.amin(x_test_adv), 0.0)
        target = np.argmax(params["y"], axis=1)
        y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1)
        logger.debug("CW2 Target: %s", target)
        logger.debug("CW2 Actual: %s", y_pred_adv)
        logger.info("CW2 Success Rate: %.2f",
                    (np.sum(target == y_pred_adv) / float(len(target))))
        self.assertTrue((target != y_pred_adv).any())

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(
            np.max(np.abs(x_test_original - self.x_test_mnist))),
                               0.0,
                               delta=0.00001)

        # Clean-up session
        if sess is not None:
            sess.close()
Exemplo n.º 5
0
    def test_scikitlearn_L2(self):
        from sklearn.linear_model import LogisticRegression
        from sklearn.svm import SVC, LinearSVC

        from art.estimators.classification.scikitlearn import SklearnClassifier

        scikitlearn_test_cases = [
            LogisticRegression(solver="lbfgs", multi_class="auto"),
            SVC(gamma="auto"),
            LinearSVC(),
        ]

        x_test_original = self.x_test_iris.copy()

        for model in scikitlearn_test_cases:
            classifier = SklearnClassifier(model=model, clip_values=(0, 1))
            classifier.fit(x=self.x_test_iris, y=self.y_test_iris)

            # Test untargeted attack
            attack = CarliniL2Method(classifier, targeted=False, max_iter=2)
            x_test_adv = attack.generate(self.x_test_iris)
            self.assertFalse((self.x_test_iris == x_test_adv).all())
            self.assertLessEqual(np.amax(x_test_adv), 1.0)
            self.assertGreaterEqual(np.amin(x_test_adv), 0.0)

            predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
            self.assertFalse((np.argmax(self.y_test_iris, axis=1) == predictions_adv).all())
            accuracy = np.sum(predictions_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0]
            logger.info(
                "Accuracy of " + classifier.__class__.__name__ + " on Iris with C&W adversarial examples: " "%.2f%%",
                (accuracy * 100),
            )

            # Test targeted attack
            targets = random_targets(self.y_test_iris, nb_classes=3)
            attack = CarliniL2Method(classifier, targeted=True, max_iter=2)
            x_test_adv = attack.generate(self.x_test_iris, **{"y": targets})
            self.assertFalse((self.x_test_iris == x_test_adv).all())
            self.assertLessEqual(np.amax(x_test_adv), 1.0)
            self.assertGreaterEqual(np.amin(x_test_adv), 0.0)

            predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
            self.assertTrue((np.argmax(targets, axis=1) == predictions_adv).any())
            accuracy = np.sum(predictions_adv == np.argmax(targets, axis=1)) / self.y_test_iris.shape[0]
            logger.info(
                "Success rate of " + classifier.__class__.__name__ + " on targeted C&W on Iris: %.2f%%",
                (accuracy * 100),
            )

            # Check that x_test has not been modified by attack and classifier
            self.assertAlmostEqual(float(np.max(np.abs(x_test_original - self.x_test_iris))), 0.0, delta=0.00001)
Exemplo n.º 6
0
    def test_tensorflow_failure_attack_L2(self):
        """
        Test the corner case when attack is failed.
        :return:
        """
        x_test_original = self.x_test_mnist.copy()

        # Build TensorFlowClassifier
        tfc, sess = get_image_classifier_tf(from_logits=True)

        # Failure attack
        cl2m = CarliniL2Method(classifier=tfc,
                               targeted=True,
                               max_iter=0,
                               binary_search_steps=0,
                               learning_rate=0,
                               initial_const=1)
        params = {"y": random_targets(self.y_test_mnist, tfc.nb_classes)}
        x_test_adv = cl2m.generate(self.x_test_mnist, **params)
        self.assertLessEqual(np.amax(x_test_adv), 1.0)
        self.assertGreaterEqual(np.amin(x_test_adv), 0.0)
        np.testing.assert_array_almost_equal(self.x_test_mnist,
                                             x_test_adv,
                                             decimal=3)

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(
            np.max(np.abs(x_test_original - self.x_test_mnist))),
                               0.0,
                               delta=0.00001)

        # Clean-up session
        if sess is not None:
            sess.close()
Exemplo n.º 7
0
def get_adversarial_examples(X, Y, model, nb_classes, attack=None):
    assert model is not None
    assert attack is not None

    art_classifier = SklearnClassifier(model=model,
                                       clip_values=(0, nb_classes))

    attacker = None
    if attack == ATTACK.PGD:
        attacker = ProjectedGradientDescent(classifier=art_classifier,
                                            norm=np.inf,
                                            eps=0.2,
                                            eps_step=0.1,
                                            max_iter=3,
                                            targeted=False,
                                            num_random_init=0,
                                            batch_size=128)
    elif attack == ATTACK.DEEPFOOL:
        attacker = DeepFool(classifier=art_classifier,
                            max_iter=5,
                            epsilon=1e-6,
                            nb_grads=3,
                            batch_size=1)
    elif attack == ATTACK.FGSM:
        attacker = FastGradientMethod(classifier=art_classifier,
                                      norm=np.inf,
                                      eps=0.3,
                                      targeted=False,
                                      batch_size=128)
    elif attack == ATTACK.BIM:
        attacker = BasicIterativeMethod(classifier=art_classifier,
                                        eps=0.3,
                                        eps_step=0.1,
                                        targeted=False,
                                        batch_size=128)
    elif attack == ATTACK.JSMA:
        attacker = SaliencyMapMethod(classifier=art_classifier,
                                     theta=0.3,
                                     gamma=0.5,
                                     batch_size=128)
    elif attack == ATTACK.CW_L2:
        attacker = CarliniL2Method(classifier=art_classifier,
                                   learning_rate=0.1)
    elif attack == ATTACK.CW_Linf:
        attacker = CarliniLInfMethod(classifier=art_classifier,
                                     learning_rate=0.01)
    else:
        raise NotImplementedError(attack, 'is not implemented.')

    print(
        'Generating [{}] adversarial examples, it will take a while...'.format(
            attack))
    X_adv = attacker.generate(X, y=Y)

    del attacker
    return X_adv
Exemplo n.º 8
0
def _cw(model, data, labels, attack_args):
    """
    Carlini & Wanger
    Towards Evaluating the Robustness of Neural Networks
    by Nicholas Carlini, David Wagner
    ``https://arxiv.org/abs/1608.04644``
    :param model:
    :param data:
    :param labels:
    :param attack_args:
    :return:
    """
    norm = attack_args.get('norm').lower()

    lr = attack_args.get('lr')
    max_iter = attack_args.get('max_iter', 100)

    # use default values for the following arguments
    confidence = attack_args.get('confidence', 0.0)
    targeted = attack_args.get('targeted', False)
    init_const = attack_args.get('init_const', 0.01)
    max_halving = attack_args.get('max_halving', 5)
    max_doubling = attack_args.get('max_doubling', 5)

    if norm == 'l2':
        print('>>> Generating CW_l2 examples.')
        binary_search_steps = attack_args.get('binary_search_steps', 10)

        attacker = CarliniL2Method(classifier=model,
                                   confidence=confidence,
                                   targeted=targeted,
                                   learning_rate=lr,
                                   binary_search_steps=binary_search_steps,
                                   max_iter=max_iter,
                                   initial_const=init_const,
                                   max_halving=max_halving,
                                   max_doubling=max_doubling)

    elif norm == 'linf':
        print('>>> Generating CW_linf examples.')
        eps = attack_args.get('eps', 0.3)
        attacker = CarliniLInfMethod(classifier=model,
                                     confidence=confidence,
                                     targeted=targeted,
                                     learning_rate=lr,
                                     max_iter=max_iter,
                                     max_halving=max_halving,
                                     max_doubling=max_doubling,
                                     eps=eps)
    else:
        raise ValueError(
            'Support `l2` and `linf` norms. But found {}'.format(norm))

    return attacker.generate(data, labels)
Exemplo n.º 9
0
    def test_pytorch_iris_L2(self):
        classifier = get_tabular_classifier_pt()
        attack = CarliniL2Method(classifier, targeted=False, max_iter=10)
        x_test_adv = attack.generate(self.x_test_iris)
        self.assertFalse((self.x_test_iris == x_test_adv).all())
        self.assertLessEqual(np.amax(x_test_adv), 1.0)
        self.assertGreaterEqual(np.amin(x_test_adv), 0.0)

        predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(self.y_test_iris, axis=1) == predictions_adv).all())
        accuracy = np.sum(predictions_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0]
        logger.info("Accuracy on Iris with C&W adversarial examples: %.2f%%", (accuracy * 100))
Exemplo n.º 10
0
    def test_keras_iris_unbounded_L2(self):
        classifier = get_tabular_classifier_kr()

        # Recreate a classifier without clip values
        classifier = KerasClassifier(model=classifier._model, use_logits=False, channels_first=True)
        attack = CarliniL2Method(classifier, targeted=False, max_iter=10)
        x_test_adv = attack.generate(self.x_test_iris)
        self.assertFalse((self.x_test_iris == x_test_adv).all())

        predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(self.y_test_iris, axis=1) == predictions_adv).all())
        accuracy = np.sum(predictions_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0]
        logger.info("Accuracy on Iris with C&W adversarial examples: %.2f%%", (accuracy * 100))
Exemplo n.º 11
0
    def test_keras_mnist_L2(self):
        """
        Second test with the KerasClassifier.
        :return:
        """
        x_test_original = self.x_test_mnist.copy()

        # Build KerasClassifier
        krc = get_image_classifier_kr(from_logits=True)

        # First attack
        cl2m = CarliniL2Method(classifier=krc, targeted=True, max_iter=10)
        y_target = [6, 6, 7, 4, 9, 7, 9, 0, 1, 0]
        x_test_adv = cl2m.generate(self.x_test_mnist, y=to_categorical(y_target, nb_classes=10))
        self.assertFalse((self.x_test_mnist == x_test_adv).all())
        self.assertLessEqual(np.amax(x_test_adv), 1.0)
        self.assertGreaterEqual(np.amin(x_test_adv), 0.0)
        y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1)
        logger.debug("CW2 Target: %s", y_target)
        logger.debug("CW2 Actual: %s", y_pred_adv)
        logger.info("CW2 Success Rate: %.2f", (np.sum(y_target == y_pred_adv) / float(len(y_target))))
        self.assertTrue((y_target == y_pred_adv).any())

        # Second attack
        cl2m = CarliniL2Method(classifier=krc, targeted=False, max_iter=10)
        x_test_adv = cl2m.generate(self.x_test_mnist)
        self.assertLessEqual(np.amax(x_test_adv), 1.0)
        self.assertGreaterEqual(np.amin(x_test_adv), 0.0)
        y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1)
        logger.debug("CW2 Target: %s", y_target)
        logger.debug("CW2 Actual: %s", y_pred_adv)
        logger.info("CW2 Success Rate: %.2f", (np.sum(y_target != y_pred_adv) / float(len(y_target))))
        self.assertTrue((y_target != y_pred_adv).any())

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - self.x_test_mnist))), 0.0, delta=0.00001)

        # Clean-up
        k.clear_session()
Exemplo n.º 12
0
def craft(X, Y, art_classifier, attack=None, **attack_params):
    assert art_classifier is not None
    assert attack is not None

    attacker = None
    if attack == ATTACK.PGD:
        eps = attack_params.get('eps', 0.2)
        eps_step = attack_params.get('eps_step', eps / 5.)
        max_iter = attack_params.get('max_iter', 3)
        targeted = attack_params.get('targeted', False)
        batch_size = attack_params.get('batch_size', 128)

        attacker = ProjectedGradientDescent(classifier=art_classifier,
                                            norm=np.inf,
                                            eps=eps,
                                            eps_step=eps_step,
                                            max_iter=max_iter,
                                            targeted=targeted,
                                            num_random_init=0,
                                            batch_size=batch_size)

    elif attack == ATTACK.DEEPFOOL:
        eps = attack_params.get('eps', 1e-6)
        max_iter = attack_params.get('max_iter', 5)
        nb_grads = attack_params.get('nb_grads', 3)
        batch_size = attack_params.get('batch_size', 1)

        attacker = DeepFool(classifier=art_classifier,
                            max_iter=max_iter,
                            epsilon=eps,
                            nb_grads=nb_grads,
                            batch_size=batch_size)

    elif attack == ATTACK.FGSM:
        eps = attack_params.get('eps', 0.3)
        targeted = attack_params.get('targeted', False)
        batch_size = attack_params.get('batch_size', 128)

        attacker = FastGradientMethod(classifier=art_classifier,
                                      norm=np.inf,
                                      eps=eps,
                                      targeted=targeted,
                                      batch_size=batch_size)

    elif attack == ATTACK.BIM:
        eps = attack_params.get('eps', 0.3)
        eps_step = attack_params.get('eps_step', eps / 5.)
        norm = attack_params.get('norm', np.inf)
        targeted = attack_params.get('targeted', False)
        batch_size = attack_params.get('batch_size', 128)

        attacker = BasicIterativeMethod(classifier=art_classifier,
                                        norm=norm,
                                        eps=eps,
                                        eps_step=eps_step,
                                        targeted=targeted,
                                        batch_size=batch_size)

    elif attack == ATTACK.JSMA:
        theta = attack_params.get('theta', 0.3)
        gamma = attack_params.get('gamma', 0.5)
        batch_size = attack_params.get('batch_size', 128)

        attacker = SaliencyMapMethod(classifier=art_classifier,
                                     theta=theta,
                                     gamma=gamma,
                                     batch_size=batch_size)

    elif attack == ATTACK.CW_L2:
        lr = attack_params.get('lr', 0.1)
        bsearch_steps = attack_params.get('bsearch_steps', 10)

        attacker = CarliniL2Method(classifier=art_classifier,
                                   learning_rate=lr,
                                   binary_search_steps=bsearch_steps)

    elif attack == ATTACK.CW_Linf:
        lr = attack_params.get('lr', 0.01)

        attacker = CarliniLInfMethod(classifier=art_classifier,
                                     learning_rate=lr)

    else:
        raise NotImplementedError(attack, 'is not implemented.')

    print(
        'Generating [{}] adversarial examples, it will take a while...'.format(
            attack))
    X_adv = attacker.generate(X, y=Y)

    del attacker
    return X_adv