Exemplo n.º 1
0
    def test_tensorflow_mnist(self):
        """
        First test with the TensorFlowClassifier.
        :return:
        """
        x_test_original = self.x_test_mnist.copy()

        # Build TensorFlowClassifier
        tfc, sess = get_image_classifier_tf()

        # Targeted attack
        zoo = ZooAttack(classifier=tfc,
                        targeted=True,
                        max_iter=30,
                        binary_search_steps=8,
                        batch_size=128)
        params = {"y": random_targets(self.y_test_mnist, tfc.nb_classes())}
        x_test_mnist_adv = zoo.generate(self.x_test_mnist, **params)
        self.assertFalse((self.x_test_mnist == x_test_mnist_adv).all())
        self.assertLessEqual(np.amax(x_test_mnist_adv), 1.0)
        self.assertGreaterEqual(np.amin(x_test_mnist_adv), 0.0)
        target = np.argmax(params["y"], axis=1)
        y_pred_adv = np.argmax(tfc.predict(x_test_mnist_adv), axis=1)
        logger.debug("ZOO target: %s", target)
        logger.debug("ZOO actual: %s", y_pred_adv)
        logger.info("ZOO success rate on MNIST: %.2f",
                    (sum(target == y_pred_adv) / float(len(target))))

        # Untargeted attack
        zoo = ZooAttack(classifier=tfc,
                        targeted=False,
                        max_iter=10,
                        binary_search_steps=3)
        x_test_mnist_adv = zoo.generate(self.x_test_mnist)
        # self.assertFalse((x_test == x_test_adv).all())
        self.assertLessEqual(np.amax(x_test_mnist_adv), 1.0)
        self.assertGreaterEqual(np.amin(x_test_mnist_adv), 0.0)
        y_pred = np.argmax(tfc.predict(self.x_test_mnist), axis=1)
        y_pred_adv = np.argmax(tfc.predict(x_test_mnist_adv), axis=1)
        logger.debug("ZOO actual: %s", y_pred_adv)
        logger.info("ZOO success rate on MNIST: %.2f",
                    (sum(y_pred != y_pred_adv) / float(len(y_pred))))

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(
            np.max(np.abs(x_test_original - self.x_test_mnist))),
                               0.0,
                               delta=0.00001)

        # Clean-up session
        if sess is not None:
            sess.close()
Exemplo n.º 2
0
    def test_tensorflow_failure_attack(self):
        """
        Test the corner case when attack fails.
        :return:
        """
        x_test_original = self.x_test_mnist.copy()

        # Build TensorFlowClassifier
        tfc, sess = get_image_classifier_tf()

        # Failure attack
        zoo = ZooAttack(classifier=tfc,
                        max_iter=0,
                        binary_search_steps=0,
                        learning_rate=0)
        x_test_mnist_adv = zoo.generate(self.x_test_mnist)
        self.assertLessEqual(np.amax(x_test_mnist_adv), 1.0)
        self.assertGreaterEqual(np.amin(x_test_mnist_adv), 0.0)
        np.testing.assert_almost_equal(self.x_test_mnist, x_test_mnist_adv, 3)

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(
            np.max(np.abs(x_test_original - self.x_test_mnist))),
                               0.0,
                               delta=0.00001)

        # Clean-up session
        if sess is not None:
            sess.close()
Exemplo n.º 3
0
    def test_keras_mnist(self):
        """
        Second test with the KerasClassifier.
        :return:
        """
        x_test_original = self.x_test.copy()

        # Build KerasClassifier
        krc = get_classifier_kr()

        # Targeted attack
        # zoo = ZooAttack(classifier=krc, targeted=True, batch_size=5)
        # params = {'y': random_targets(self.y_test, krc.nb_classes())}
        # x_test_adv = zoo.generate(self.x_test, **params)
        #
        # self.assertFalse((self.x_test == x_test_adv).all())
        # self.assertLessEqual(np.amax(x_test_adv), 1.0)
        # self.assertGreaterEqual(np.amin(x_test_adv), 0.0)
        # target = np.argmax(params['y'], axis=1)
        # y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1)
        # logger.debug('ZOO target: %s', target)
        # logger.debug('ZOO actual: %s', y_pred_adv)
        # logger.info('ZOO success rate on MNIST: %.2f', (sum(target == y_pred_adv) / float(len(target))))

        # Untargeted attack
        # zoo = ZooAttack(classifier=krc, targeted=False, max_iter=20)
        zoo = ZooAttack(classifier=krc, targeted=False, batch_size=5)
        # x_test_adv = zoo.generate(x_test)
        params = {'y': random_targets(self.y_test, krc.nb_classes())}
        x_test_adv = zoo.generate(self.x_test, **params)

        # x_test_adv_true = [0.00000000e+00, 2.50167388e-04, 1.50529508e-04, 4.69674182e-04,
        #                    0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        #                    1.67321396e-05, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        #                    0.00000000e+00, 2.08451956e-06, 0.00000000e+00, 0.00000000e+00,
        #                    2.53360748e-01, 9.60119188e-01, 9.85227525e-01, 2.53600776e-01,
        #                    0.00000000e+00, 0.00000000e+00, 5.23251540e-04, 0.00000000e+00,
        #                    0.00000000e+00, 0.00000000e+00, 1.08632184e-05, 0.00000000e+00]
        #
        # for i in range(14):
        #     self.assertAlmostEqual(x_test_adv_true[i], x_test_adv[0, 14, i, 0])

        # self.assertFalse((x_test == x_test_adv).all())
        self.assertLessEqual(np.amax(x_test_adv), 1.0)
        self.assertGreaterEqual(np.amin(x_test_adv), 0.0)
        y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1)
        y_pred = np.argmax(krc.predict(self.x_test), axis=1)
        logger.debug('ZOO actual: %s', y_pred_adv)
        logger.info('ZOO success rate on MNIST: %.2f', (sum(y_pred != y_pred_adv) / float(len(y_pred))))

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - self.x_test))), 0.0, delta=0.00001)

        # Clean-up
        k.clear_session()
Exemplo n.º 4
0
    def test_tfclassifier(self):
        """
        First test with the TensorFlowClassifier.
        :return:
        """
        # Build TensorFlowClassifier
        tfc, sess = get_classifier_tf()

        # Targeted attack
        zoo = ZooAttack(classifier=tfc,
                        targeted=True,
                        max_iter=100,
                        binary_search_steps=10)
        params = {'y': random_targets(self.y_test, tfc.nb_classes())}
        x_test_adv = zoo.generate(self.x_test, **params)
        self.assertFalse((self.x_test == x_test_adv).all())
        self.assertLessEqual(np.amax(x_test_adv), 1.0)
        self.assertGreaterEqual(np.amin(x_test_adv), 0.0)
        target = np.argmax(params['y'], axis=1)
        y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1)
        logger.debug('ZOO target: %s', target)
        logger.debug('ZOO actual: %s', y_pred_adv)
        logger.info('ZOO success rate on MNIST: %.2f',
                    (sum(target == y_pred_adv) / float(len(target))))

        # Untargeted attack
        zoo = ZooAttack(classifier=tfc, targeted=False)
        x_test_adv = zoo.generate(self.x_test)
        # self.assertFalse((x_test == x_test_adv).all())
        self.assertLessEqual(np.amax(x_test_adv), 1.0)
        self.assertGreaterEqual(np.amin(x_test_adv), 0.0)
        y_pred = np.argmax(tfc.predict(self.x_test), axis=1)
        y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1)
        logger.debug('ZOO actual: %s', y_pred_adv)
        logger.info('ZOO success rate on MNIST: %.2f',
                    (sum(y_pred != y_pred_adv) / float(len(y_pred))))

        # Clean-up session
        sess.close()
Exemplo n.º 5
0
    def test_pytorch_mnist(self):
        """
        Third test with the PyTorchClassifier.
        :return:
        """
        # Build PyTorchClassifier
        ptc = get_image_classifier_pt()

        # Get MNIST
        x_test_mnist = np.swapaxes(self.x_test_mnist, 1, 3).astype(np.float32)
        x_test_original = x_test_mnist.copy()

        # First attack
        # zoo = ZooAttack(classifier=ptc, targeted=True, max_iter=10, binary_search_steps=10)
        # params = {'y': random_targets(self.y_test, ptc.nb_classes())}
        # x_test_adv = zoo.generate(x_test, **params)
        # self.assertFalse((x_test == x_test_adv).all())
        # self.assertLessEqual(np.amax(x_test_adv), 1.0)
        # self.assertGreaterEqual(np.amin(x_test_adv), 0.0)
        # target = np.argmax(params['y'], axis=1)
        # y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1)
        # logger.debug('ZOO target: %s', target)
        # logger.debug('ZOO actual: %s', y_pred_adv)
        # logger.info('ZOO success rate on MNIST: %.2f', (sum(target != y_pred_adv) / float(len(target))))

        # Second attack
        zoo = ZooAttack(
            classifier=ptc,
            targeted=False,
            learning_rate=1e-2,
            max_iter=10,
            binary_search_steps=3,
            abort_early=False,
            use_resize=False,
            use_importance=False,
        )
        x_test_mnist_adv = zoo.generate(x_test_mnist)
        self.assertLessEqual(np.amax(x_test_mnist_adv), 1.0)
        self.assertGreaterEqual(np.amin(x_test_mnist_adv), 0.0)

        # print(x_test[0, 0, 14, :])
        # print(x_test_adv[0, 0, 14, :])
        # print(np.amax(x_test - x_test_adv))
        x_test_adv_expected = []

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(
            np.max(np.abs(x_test_original - x_test_mnist))),
                               0.0,
                               delta=0.00001)
Exemplo n.º 6
0
    def test_failure_attack(self):
        """
        Test the corner case when attack fails.
        :return:
        """
        # Build TensorFlowClassifier
        tfc, sess = get_classifier_tf()

        # Failure attack
        zoo = ZooAttack(classifier=tfc,
                        max_iter=0,
                        binary_search_steps=0,
                        learning_rate=0)
        x_test_adv = zoo.generate(self.x_test)
        self.assertLessEqual(np.amax(x_test_adv), 1.0)
        self.assertGreaterEqual(np.amin(x_test_adv), 0.0)
        np.testing.assert_almost_equal(self.x_test, x_test_adv, 3)

        # Clean-up session
        sess.close()
# Step 5: Evaluate the ART classifier on benign test examples

predictions = classifier.predict(x_test)
accuracy = np.sum(np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(y_test)
print("Accuracy on benign test examples: {}%".format(accuracy * 100))

# Step 6: Generate adversarial test examples
attack = ZooAttack(
    classifier=classifier,
    confidence=0.0,
    targeted=False,
    learning_rate=1e-1,
    max_iter=200,
    binary_search_steps=10,
    initial_const=1e-3,
    abort_early=True,
    use_resize=False,
    use_importance=False,
    nb_parallel=5,
    batch_size=1,
    variable_h=0.01,
)
x_test_adv = attack.generate(x=x_test, y=y_test)

# Step 7: Evaluate the ART classifier on adversarial test examples

predictions = classifier.predict(x_test_adv)
accuracy = np.sum(np.argmax(predictions, axis=1) == np.argmax(y_test, axis=1)) / len(y_test)
print("Accuracy on adversarial test examples: {}%".format(accuracy * 100))
def adversarial_attack_shift(x, y, delta=1.0, model=RandomForestClassifier(), attack_type='zoo',
                             numerical_features=None, feat_delta=1.0):
    # in this case delta is the portion of half the data on which to generate attacks
    # because the first half as a minimum has to be used to train a model against which generate the attacks
    assert (attack_type in ['zoo', 'boundary', 'hop-skip-jump'])

    le = preprocessing.LabelEncoder()
    le.fit(np.squeeze(y))
    y = le.transform(y)

    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=(0.5 * delta))

    if numerical_features is not None:

        n_numerical = len(numerical_features)
        feat_indices = np.random.choice(n_numerical, ceil(n_numerical * feat_delta), replace=False)
        feat_indices = np.array(numerical_features)[feat_indices]

    else:

        feat_indices = np.random.choice(x.shape[1], ceil(x.shape[1] * feat_delta), replace=False)

    other_features = list(set(range(x.shape[1])) - set(feat_indices))

    x_train_other = x_train[:, other_features]
    x_train_numerical = x_train[:, feat_indices]
    x_test_other = x_test[:, other_features]
    x_test_numerical = x_test[:, feat_indices]

    classifier = SklearnClassifier(model=model, clip_values=(0, np.max(x_train_numerical)))

    # Train the ART classifier

    classifier.fit(x_train_numerical, y_train)

    # Evaluate the ART classifier on benign test examples

    predictions = classifier.predict(x_test_numerical)
    accuracy = np.sum(np.argmax(predictions, axis=1) == y_test) / len(y_test)
    print("Accuracy on benign test examples: {}%".format(accuracy * 100))

    # Generate adversarial test examples
    if attack_type == 'zoo':
        attack = ZooAttack(
            classifier=classifier,
            confidence=0.0,
            targeted=False,
            learning_rate=1e-1,
            max_iter=10,
            binary_search_steps=10,
            initial_const=1e-3,
            abort_early=True,
            use_resize=False,
            use_importance=False,
            nb_parallel=x_test_numerical.shape[1],
            batch_size=1,
            variable_h=0.01,
        )
    elif attack_type == 'boundary':
        attack = BoundaryAttack(classifier, targeted=False, epsilon=0.02, max_iter=20, num_trial=10)
    elif attack_type == 'hop-skip-jump':
        attack = HopSkipJump(classifier,
                             targeted=False,
                             norm=2,
                             max_iter=20,
                             max_eval=10,
                             init_eval=9,
                             init_size=10)

    x_adv = attack.generate(x=x_test_numerical, y=y_test)

    # Evaluate the ART classifier on adversarial test examples

    predictions_adv = classifier.predict(x_adv)
    accuracy = np.sum(np.argmax(predictions_adv, axis=1) == y_test) / len(y_test)
    print("Accuracy on adversarial test examples: {}%".format(accuracy * 100))
    print("Max difference: {}".format(np.max(np.abs(x_test_numerical - x_adv) / x_test_numerical)))

    x_final = np.zeros_like(x)
    x_final[:, feat_indices] = np.vstack([x_train_numerical, x_adv])
    x_final[:, other_features] = np.vstack([x_train_other, x_test_other])

    y_final = np.concatenate([y_train, y_test], axis=0)
    y_final = le.inverse_transform(y_final)

    adv_indices = list(range(len(y_train), len(y)))

    return x_final, y_final, adv_indices, feat_indices