def test_2_tensorflow_iris(self):
        """
        First test for TensorFlow.
        :return:
        """
        # Create the trained classifier
        trained_classifier, sess = get_tabular_classifier_tf()

        # Create the modified classifier
        transformed_classifier, _ = get_tabular_classifier_tf(load_init=False,
                                                              sess=sess)

        # Create defensive distillation transformer
        transformer = DefensiveDistillation(classifier=trained_classifier,
                                            batch_size=BATCH_SIZE,
                                            nb_epochs=NB_EPOCHS)

        # Perform the transformation
        with self.assertRaises(ValueError) as context:
            _ = transformer(x=self.x_train_iris,
                            transformed_classifier=transformed_classifier)

        self.assertIn(
            "The input trained classifier do not produce probability outputs.",
            str(context.exception))

        # Clean-up session
        if sess is not None:
            sess.close()
    def test_2_tensorflow_iris(self):
        """
        First test for TensorFlow.
        :return:
        """
        # Get the TensorFlow classifier
        victim_tfc, sess = get_tabular_classifier_tf()

        # Create the thieved classifier
        thieved_tfc, _ = get_tabular_classifier_tf(load_init=False, sess=sess)

        # Create random attack
        attack = KnockoffNets(
            classifier=victim_tfc,
            batch_size_fit=BATCH_SIZE,
            batch_size_query=BATCH_SIZE,
            nb_epochs=NB_EPOCHS,
            nb_stolen=NB_STOLEN,
            sampling_strategy="random",
            verbose=False,
        )
        thieved_tfc = attack.extract(x=self.x_train_iris,
                                     thieved_classifier=thieved_tfc)

        victim_preds = np.argmax(victim_tfc.predict(x=self.x_train_iris),
                                 axis=1)
        thieved_preds = np.argmax(thieved_tfc.predict(x=self.x_train_iris),
                                  axis=1)
        acc = np.sum(victim_preds == thieved_preds) / len(victim_preds)

        self.assertGreater(acc, 0.3)

        # Create adaptive attack
        attack = KnockoffNets(
            classifier=victim_tfc,
            batch_size_fit=BATCH_SIZE,
            batch_size_query=BATCH_SIZE,
            nb_epochs=NB_EPOCHS,
            nb_stolen=NB_STOLEN,
            sampling_strategy="adaptive",
            reward="all",
            verbose=False,
        )
        thieved_tfc = attack.extract(x=self.x_train_iris,
                                     y=self.y_train_iris,
                                     thieved_classifier=thieved_tfc)

        victim_preds = np.argmax(victim_tfc.predict(x=self.x_train_iris),
                                 axis=1)
        thieved_preds = np.argmax(thieved_tfc.predict(x=self.x_train_iris),
                                  axis=1)
        acc = np.sum(victim_preds == thieved_preds) / len(victim_preds)

        self.assertGreater(acc, 0.4)

        # Clean-up session
        if sess is not None:
            sess.close()
    def test_tensorflow_iris(self):
        classifier, _ = get_tabular_classifier_tf()

        # Test untargeted attack
        attack = BasicIterativeMethod(classifier, eps=1, eps_step=0.1, max_iter=5)
        x_test_adv = attack.generate(self.x_test_iris)
        self.assertFalse((self.x_test_iris == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0]
        logger.info("Accuracy on Iris with BIM adversarial examples: %.2f%%", (acc * 100))

        # Test targeted attack
        targets = random_targets(self.y_test_iris, nb_classes=3)
        attack = BasicIterativeMethod(classifier, targeted=True, eps=1, eps_step=0.1, max_iter=5)
        x_test_adv = attack.generate(self.x_test_iris, **{"y": targets})
        self.assertFalse((self.x_test_iris == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any())
        acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / self.y_test_iris.shape[0]
        logger.info("Success rate of targeted BIM on Iris: %.2f%%", (acc * 100))
Пример #4
0
    def test_2_tensorflow_iris(self):
        classifier, _ = get_tabular_classifier_tf()

        # Test untargeted attack
        attack_params = {
            "max_iter": 1,
            "attacker": "ead",
            "attacker_params": {
                "max_iter": 5,
                "targeted": False,
                "verbose": False
            },
        }
        attack = UniversalPerturbation(classifier, verbose=False)
        attack.set_params(**attack_params)
        x_test_iris_adv = attack.generate(self.x_test_iris)
        self.assertFalse((self.x_test_iris == x_test_iris_adv).all())
        self.assertTrue((x_test_iris_adv <= 1).all())
        self.assertTrue((x_test_iris_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_iris_adv), axis=1)
        self.assertFalse((np.argmax(self.y_test_iris,
                                    axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(
            self.y_test_iris, axis=1)) / self.y_test_iris.shape[0]
        logger.info(
            "Accuracy on Iris with universal adversarial examples: %.2f%%",
            (acc * 100))
    def test_2_tensorflow_iris(self):
        classifier, sess = get_tabular_classifier_tf()

        # Test untargeted attack and norm=2
        attack = HopSkipJump(classifier, targeted=False, max_iter=20, max_eval=100, init_eval=10, verbose=False)
        x_test_adv = attack.generate(self.x_test_iris)
        self.assertFalse((self.x_test_iris == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0]
        logger.info("Accuracy on Iris with HopSkipJump adversarial examples: %.2f%%", (acc * 100))

        # Test untargeted attack and norm=np.inf
        attack = HopSkipJump(
            classifier, targeted=False, max_iter=20, max_eval=100, init_eval=10, norm=np.Inf, verbose=False
        )
        x_test_adv = attack.generate(self.x_test_iris)
        self.assertFalse((self.x_test_iris == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0]
        logger.info("Accuracy on Iris with HopSkipJump adversarial examples: %.2f%%", (acc * 100))

        # Test targeted attack and norm=2
        targets = random_targets(self.y_test_iris, nb_classes=3)
        attack = HopSkipJump(classifier, targeted=True, max_iter=20, max_eval=100, init_eval=10, verbose=False)
        x_test_adv = attack.generate(self.x_test_iris, **{"y": targets})
        self.assertFalse((self.x_test_iris == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any())
        acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / self.y_test_iris.shape[0]
        logger.info("Success rate of targeted HopSkipJump on Iris: %.2f%%", (acc * 100))

        # Test targeted attack and norm=np.inf
        targets = random_targets(self.y_test_iris, nb_classes=3)
        attack = HopSkipJump(
            classifier, targeted=True, max_iter=20, max_eval=100, init_eval=10, norm=np.Inf, verbose=False
        )
        x_test_adv = attack.generate(self.x_test_iris, **{"y": targets})
        self.assertFalse((self.x_test_iris == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any())
        acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / self.y_test_iris.shape[0]
        logger.info("Success rate of targeted HopSkipJump on Iris: %.2f%%", (acc * 100))

        # Clean-up session
        if sess is not None:
            sess.close()
Пример #6
0
    def test_tensorflow_iris_LInf(self):
        classifier, _ = get_tabular_classifier_tf()

        # Test untargeted attack
        attack = CarliniLInfMethod(classifier, targeted=False, max_iter=10, eps=0.5)
        x_test_adv = attack.generate(self.x_test_iris)
        self.assertFalse((self.x_test_iris == x_test_adv).all())
        self.assertLessEqual(np.amax(x_test_adv), 1.0)
        self.assertGreaterEqual(np.amin(x_test_adv), 0.0)

        predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(self.y_test_iris, axis=1) == predictions_adv).all())
        accuracy = np.sum(predictions_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0]
        logger.info("Accuracy on Iris with C&W adversarial examples: %.2f%%", (accuracy * 100))

        # Test targeted attack
        targets = random_targets(self.y_test_iris, nb_classes=3)
        attack = CarliniLInfMethod(classifier, targeted=True, max_iter=10, eps=0.5)
        x_test_adv = attack.generate(self.x_test_iris, **{"y": targets})
        self.assertFalse((self.x_test_iris == x_test_adv).all())
        self.assertLessEqual(np.amax(x_test_adv), 1.0)
        self.assertGreaterEqual(np.amin(x_test_adv), 0.0)

        predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertTrue((np.argmax(targets, axis=1) == predictions_adv).any())
        accuracy = np.sum(predictions_adv == np.argmax(targets, axis=1)) / self.y_test_iris.shape[0]
        logger.info("Success rate of targeted C&W on Iris: %.2f%%", (accuracy * 100))
    def _get_tabular_classifier_list(clipped=True):
        if framework == "keras":
            if clipped:
                classifier_list = [get_tabular_classifier_kr()]
            else:
                classifier = get_tabular_classifier_kr()
                classifier_list = [
                    KerasClassifier(model=classifier.model,
                                    use_logits=False,
                                    channels_first=True)
                ]

        if framework == "tensorflow":
            if clipped:
                classifier, _ = get_tabular_classifier_tf()
                classifier_list = [classifier]
            else:
                logging.warning(
                    "{0} doesn't have an uncliped classifier defined yet".
                    format(framework))
                classifier_list = None

        if framework == "pytorch":
            if clipped:
                classifier_list = [get_tabular_classifier_pt()]
            else:
                logging.warning(
                    "{0} doesn't have an uncliped classifier defined yet".
                    format(framework))
                classifier_list = None

        if framework == "scikitlearn":
            return get_tabular_classifier_scikit_list(clipped=False)

        return classifier_list
Пример #8
0
    def test_tensorflow_iris(self):
        """
        First test for TensorFlow.
        :return:
        """
        # Get the TensorFlow classifier
        victim_tfc, sess = get_tabular_classifier_tf()

        # Define input and output placeholders
        input_ph = tf.placeholder(tf.float32, shape=[None, 4])
        output_ph = tf.placeholder(tf.int32, shape=[None, 3])

        # Define the tensorflow graph
        dense1 = tf.layers.dense(input_ph, 10)
        dense2 = tf.layers.dense(dense1, 10)
        logits = tf.layers.dense(dense2, 3)

        # Train operator
        loss = tf.reduce_mean(tf.losses.softmax_cross_entropy(logits=logits, onehot_labels=output_ph))
        optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
        train = optimizer.minimize(loss)

        # TensorFlow session and initialization
        sess.run(tf.global_variables_initializer())

        # Train the classifier
        thieved_tfc = TensorFlowClassifier(
            clip_values=(0, 1),
            input_ph=input_ph,
            output=logits,
            labels_ph=output_ph,
            train=train,
            loss=loss,
            learning=None,
            sess=sess,
            channels_first=True,
        )

        # Create attack
        copycat_cnn = CopycatCNN(
            classifier=victim_tfc,
            batch_size_fit=self.batch_size,
            batch_size_query=self.batch_size,
            nb_epochs=NB_EPOCHS,
            nb_stolen=NB_STOLEN,
        )
        thieved_tfc = copycat_cnn.extract(x=self.x_train_iris, thieved_classifier=thieved_tfc)

        victim_preds = np.argmax(victim_tfc.predict(x=self.x_train_iris[:100]), axis=1)
        thieved_preds = np.argmax(thieved_tfc.predict(x=self.x_train_iris[:100]), axis=1)
        acc = np.sum(victim_preds == thieved_preds) / len(victim_preds)

        self.assertGreater(acc, 0.3)

        # Clean-up session
        if sess is not None:
            sess.close()
            tf.reset_default_graph()
Пример #9
0
    def test_2_tensorflow_iris(self):
        classifier, _ = get_tabular_classifier_tf()

        attack = VirtualAdversarialMethod(classifier, eps=0.1, verbose=False)

        with self.assertRaises(TypeError) as context:
            _ = attack.generate(self.x_test_iris)

        self.assertIn(
            "This attack requires a classifier predicting probabilities in the range [0, 1] as output."
            "Values smaller than 0.0 or larger than 1.0 have been detected.",
            str(context.exception),
        )
Пример #10
0
    def test_tensorflow_iris(self):
        classifier, _ = get_tabular_classifier_tf()

        attack = NewtonFool(classifier, max_iter=5, batch_size=128)
        x_test_adv = attack.generate(self.x_test_iris)
        self.assertFalse((self.x_test_iris == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0]
        logger.info("Accuracy on Iris with NewtonFool adversarial examples: %.2f%%", (acc * 100))
    def test_2_tensorflow_iris(self):
        classifier, _ = get_tabular_classifier_tf()

        attack = DeepFool(classifier, max_iter=5, batch_size=128)
        x_test_adv = attack.generate(self.x_test_iris)
        self.assertFalse((self.x_test_iris == x_test_adv).all())
        self.assertLessEqual(np.amax(x_test_adv), 1.0)
        self.assertGreaterEqual(np.amin(x_test_adv), 0.0)

        predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(self.y_test_iris, axis=1) == predictions_adv).all())
        accuracy = np.sum(predictions_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0]
        logger.info("Accuracy on Iris with DeepFool adversarial examples: %.2f%%", (accuracy * 100))
Пример #12
0
    def test_2_tensorflow_iris(self):
        (_, _), (x_test, y_test) = self.iris
        classifier, _ = get_tabular_classifier_tf()

        # Test untargeted attack
        attack = ProjectedGradientDescent(classifier, eps=1, eps_step=0.1, max_iter=5)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((np.array(x_test) == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(np.array(y_test), axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(np.array(y_test), axis=1)) / len(y_test)
        logger.info("Accuracy on Iris with PGD adversarial examples: %.2f%%", (acc * 100))
    def test_2_tensorflow_iris_vector(self):
        classifier, _ = get_tabular_classifier_tf()

        attack = SaliencyMapMethod(classifier, theta=1, verbose=False)
        x_test_iris_adv = attack.generate(self.x_test_iris)
        self.assertFalse((self.x_test_iris == x_test_iris_adv).all())
        self.assertTrue((x_test_iris_adv <= 1).all())
        self.assertTrue((x_test_iris_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_iris_adv), axis=1)
        self.assertFalse((np.argmax(self.y_test_iris,
                                    axis=1) == preds_adv).all())
        accuracy = np.sum(preds_adv == np.argmax(
            self.y_test_iris, axis=1)) / self.y_test_iris.shape[0]
        logger.info("Accuracy on Iris with JSMA adversarial examples: %.2f%%",
                    (accuracy * 100))
Пример #14
0
    def _tabular_dl_estimator(clipped=True):
        classifier = None
        if framework == "keras":
            if clipped:
                classifier = get_tabular_classifier_kr()
            else:
                kr_classifier = get_tabular_classifier_kr()
                classifier = KerasClassifier(model=kr_classifier.model, use_logits=False, channels_first=True)

        if framework == "tensorflow1" or framework == "tensorflow2":
            if clipped:
                classifier, _ = get_tabular_classifier_tf()

        if framework == "pytorch":
            if clipped:
                classifier = get_tabular_classifier_pt()

        if classifier is None:
            raise ARTTestFixtureNotImplemented(
                "no deep learning tabular estimator available", tabular_dl_estimator.__name__, framework
            )
        return classifier
    def test_tensorflow_iris(self):
        classifier, _ = get_tabular_classifier_tf()

        # Test untargeted attack
        attack = ElasticNet(classifier, targeted=False, max_iter=10)
        x_test_adv = attack.generate(self.x_test_iris)
        expected_x_test_adv = np.asarray(
            [0.8479195, 0.42525578, 0.70166135, 0.28664514])
        np.testing.assert_array_almost_equal(x_test_adv[0, :],
                                             expected_x_test_adv,
                                             decimal=6)
        self.assertLessEqual(np.amax(x_test_adv), 1.0)
        self.assertGreaterEqual(np.amin(x_test_adv), 0.0)

        predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        np.testing.assert_array_equal(
            predictions_adv,
            np.asarray([
                1,
                2,
                2,
                2,
                1,
                1,
                1,
                2,
                1,
                2,
                1,
                1,
                1,
                2,
                2,
                2,
                2,
                2,
                1,
                1,
                1,
                1,
                1,
                1,
                1,
                2,
                2,
                2,
                2,
                2,
                2,
                1,
                2,
                1,
                0,
                2,
                2,
                1,
                2,
                0,
                2,
                2,
                1,
                1,
                2,
            ]),
        )
        accuracy = 1.0 - np.sum(predictions_adv == np.argmax(
            self.y_test_iris, axis=1)) / self.y_test_iris.shape[0]
        logger.info("EAD success rate on Iris: %.2f%%", (accuracy * 100))

        # Test targeted attack
        targets = random_targets(self.y_test_iris, nb_classes=3)
        attack = ElasticNet(classifier, targeted=True, max_iter=10)
        x_test_adv = attack.generate(self.x_test_iris, **{"y": targets})
        expected_x_test_adv = np.asarray(
            [0.8859426, 0.51877, 0.5014498, 0.05447771])
        np.testing.assert_array_almost_equal(x_test_adv[0, :],
                                             expected_x_test_adv,
                                             decimal=6)
        self.assertLessEqual(np.amax(x_test_adv), 1.0)
        self.assertGreaterEqual(np.amin(x_test_adv), 0.0)

        predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        np.testing.assert_array_equal(
            predictions_adv,
            np.asarray([
                0,
                0,
                0,
                2,
                2,
                2,
                2,
                2,
                2,
                2,
                2,
                2,
                0,
                2,
                0,
                0,
                2,
                2,
                0,
                2,
                2,
                2,
                2,
                2,
                2,
                0,
                0,
                0,
                2,
                0,
                2,
                2,
                2,
                2,
                2,
                0,
                0,
                0,
                2,
                2,
                2,
                2,
                2,
                0,
                2,
            ]),
        )

        accuracy = np.sum(predictions_adv == np.argmax(
            targets, axis=1)) / self.y_test_iris.shape[0]
        logger.info("Targeted EAD success rate on Iris: %.2f%%",
                    (accuracy * 100))
Пример #16
0
    def test_3_tensorflow_iris(self):
        classifier, sess = get_tabular_classifier_tf()

        # Test untargeted attack
        attack = ElasticNet(classifier,
                            targeted=False,
                            max_iter=10,
                            verbose=False)
        x_test_adv = attack.generate(self.x_test_iris)
        expected_x_test_adv = np.asarray(
            [0.852286, 0.434626, 0.703376, 0.293738])
        np.testing.assert_array_almost_equal(x_test_adv[0, :],
                                             expected_x_test_adv,
                                             decimal=6)
        self.assertLessEqual(np.amax(x_test_adv), 1.0)
        self.assertGreaterEqual(np.amin(x_test_adv), 0.0)

        predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        np.testing.assert_array_equal(
            predictions_adv,
            np.asarray([
                1,
                2,
                2,
                2,
                1,
                1,
                1,
                2,
                1,
                2,
                1,
                1,
                1,
                2,
                2,
                2,
                2,
                2,
                1,
                1,
                1,
                1,
                1,
                1,
                1,
                2,
                2,
                2,
                2,
                1,
                2,
                1,
                2,
                1,
                0,
                1,
                2,
                1,
                2,
                0,
                2,
                2,
                1,
                1,
                2,
            ]),
        )
        accuracy = 1.0 - np.sum(predictions_adv == np.argmax(
            self.y_test_iris, axis=1)) / self.y_test_iris.shape[0]
        logger.info("EAD success rate on Iris: %.2f%%", (accuracy * 100))

        # Test targeted attack
        targets = random_targets(self.y_test_iris, nb_classes=3)
        attack = ElasticNet(classifier,
                            targeted=True,
                            max_iter=10,
                            verbose=False)
        x_test_adv = attack.generate(self.x_test_iris, **{"y": targets})
        expected_x_test_adv = np.asarray(
            [0.892806, 0.531875, 0.501707, 0.059951])
        np.testing.assert_array_almost_equal(x_test_adv[0, :],
                                             expected_x_test_adv,
                                             decimal=6)
        self.assertLessEqual(np.amax(x_test_adv), 1.0)
        self.assertGreaterEqual(np.amin(x_test_adv), 0.0)

        predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        np.testing.assert_array_equal(
            predictions_adv,
            np.asarray([
                0,
                0,
                0,
                2,
                2,
                2,
                2,
                2,
                2,
                2,
                2,
                2,
                0,
                2,
                0,
                0,
                2,
                2,
                0,
                2,
                2,
                2,
                2,
                2,
                2,
                0,
                0,
                0,
                2,
                0,
                2,
                2,
                2,
                2,
                2,
                0,
                0,
                0,
                2,
                2,
                2,
                2,
                2,
                0,
                2,
            ]),
        )

        accuracy = np.sum(predictions_adv == np.argmax(
            targets, axis=1)) / self.y_test_iris.shape[0]
        logger.info("Targeted EAD success rate on Iris: %.2f%%",
                    (accuracy * 100))

        # Close session
        if sess is not None:
            sess.close()