def test_2_tensorflow_iris(self): """ First test for TensorFlow. :return: """ # Create the trained classifier trained_classifier, sess = get_tabular_classifier_tf() # Create the modified classifier transformed_classifier, _ = get_tabular_classifier_tf(load_init=False, sess=sess) # Create defensive distillation transformer transformer = DefensiveDistillation(classifier=trained_classifier, batch_size=BATCH_SIZE, nb_epochs=NB_EPOCHS) # Perform the transformation with self.assertRaises(ValueError) as context: _ = transformer(x=self.x_train_iris, transformed_classifier=transformed_classifier) self.assertIn( "The input trained classifier do not produce probability outputs.", str(context.exception)) # Clean-up session if sess is not None: sess.close()
def test_2_tensorflow_iris(self): """ First test for TensorFlow. :return: """ # Get the TensorFlow classifier victim_tfc, sess = get_tabular_classifier_tf() # Create the thieved classifier thieved_tfc, _ = get_tabular_classifier_tf(load_init=False, sess=sess) # Create random attack attack = KnockoffNets( classifier=victim_tfc, batch_size_fit=BATCH_SIZE, batch_size_query=BATCH_SIZE, nb_epochs=NB_EPOCHS, nb_stolen=NB_STOLEN, sampling_strategy="random", verbose=False, ) thieved_tfc = attack.extract(x=self.x_train_iris, thieved_classifier=thieved_tfc) victim_preds = np.argmax(victim_tfc.predict(x=self.x_train_iris), axis=1) thieved_preds = np.argmax(thieved_tfc.predict(x=self.x_train_iris), axis=1) acc = np.sum(victim_preds == thieved_preds) / len(victim_preds) self.assertGreater(acc, 0.3) # Create adaptive attack attack = KnockoffNets( classifier=victim_tfc, batch_size_fit=BATCH_SIZE, batch_size_query=BATCH_SIZE, nb_epochs=NB_EPOCHS, nb_stolen=NB_STOLEN, sampling_strategy="adaptive", reward="all", verbose=False, ) thieved_tfc = attack.extract(x=self.x_train_iris, y=self.y_train_iris, thieved_classifier=thieved_tfc) victim_preds = np.argmax(victim_tfc.predict(x=self.x_train_iris), axis=1) thieved_preds = np.argmax(thieved_tfc.predict(x=self.x_train_iris), axis=1) acc = np.sum(victim_preds == thieved_preds) / len(victim_preds) self.assertGreater(acc, 0.4) # Clean-up session if sess is not None: sess.close()
def test_tensorflow_iris(self): classifier, _ = get_tabular_classifier_tf() # Test untargeted attack attack = BasicIterativeMethod(classifier, eps=1, eps_step=0.1, max_iter=5) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with BIM adversarial examples: %.2f%%", (acc * 100)) # Test targeted attack targets = random_targets(self.y_test_iris, nb_classes=3) attack = BasicIterativeMethod(classifier, targeted=True, eps=1, eps_step=0.1, max_iter=5) x_test_adv = attack.generate(self.x_test_iris, **{"y": targets}) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any()) acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / self.y_test_iris.shape[0] logger.info("Success rate of targeted BIM on Iris: %.2f%%", (acc * 100))
def test_2_tensorflow_iris(self): classifier, _ = get_tabular_classifier_tf() # Test untargeted attack attack_params = { "max_iter": 1, "attacker": "ead", "attacker_params": { "max_iter": 5, "targeted": False, "verbose": False }, } attack = UniversalPerturbation(classifier, verbose=False) attack.set_params(**attack_params) x_test_iris_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_iris_adv).all()) self.assertTrue((x_test_iris_adv <= 1).all()) self.assertTrue((x_test_iris_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_iris_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax( self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info( "Accuracy on Iris with universal adversarial examples: %.2f%%", (acc * 100))
def test_2_tensorflow_iris(self): classifier, sess = get_tabular_classifier_tf() # Test untargeted attack and norm=2 attack = HopSkipJump(classifier, targeted=False, max_iter=20, max_eval=100, init_eval=10, verbose=False) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with HopSkipJump adversarial examples: %.2f%%", (acc * 100)) # Test untargeted attack and norm=np.inf attack = HopSkipJump( classifier, targeted=False, max_iter=20, max_eval=100, init_eval=10, norm=np.Inf, verbose=False ) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with HopSkipJump adversarial examples: %.2f%%", (acc * 100)) # Test targeted attack and norm=2 targets = random_targets(self.y_test_iris, nb_classes=3) attack = HopSkipJump(classifier, targeted=True, max_iter=20, max_eval=100, init_eval=10, verbose=False) x_test_adv = attack.generate(self.x_test_iris, **{"y": targets}) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any()) acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / self.y_test_iris.shape[0] logger.info("Success rate of targeted HopSkipJump on Iris: %.2f%%", (acc * 100)) # Test targeted attack and norm=np.inf targets = random_targets(self.y_test_iris, nb_classes=3) attack = HopSkipJump( classifier, targeted=True, max_iter=20, max_eval=100, init_eval=10, norm=np.Inf, verbose=False ) x_test_adv = attack.generate(self.x_test_iris, **{"y": targets}) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any()) acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / self.y_test_iris.shape[0] logger.info("Success rate of targeted HopSkipJump on Iris: %.2f%%", (acc * 100)) # Clean-up session if sess is not None: sess.close()
def test_tensorflow_iris_LInf(self): classifier, _ = get_tabular_classifier_tf() # Test untargeted attack attack = CarliniLInfMethod(classifier, targeted=False, max_iter=10, eps=0.5) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == predictions_adv).all()) accuracy = np.sum(predictions_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with C&W adversarial examples: %.2f%%", (accuracy * 100)) # Test targeted attack targets = random_targets(self.y_test_iris, nb_classes=3) attack = CarliniLInfMethod(classifier, targeted=True, max_iter=10, eps=0.5) x_test_adv = attack.generate(self.x_test_iris, **{"y": targets}) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == predictions_adv).any()) accuracy = np.sum(predictions_adv == np.argmax(targets, axis=1)) / self.y_test_iris.shape[0] logger.info("Success rate of targeted C&W on Iris: %.2f%%", (accuracy * 100))
def _get_tabular_classifier_list(clipped=True): if framework == "keras": if clipped: classifier_list = [get_tabular_classifier_kr()] else: classifier = get_tabular_classifier_kr() classifier_list = [ KerasClassifier(model=classifier.model, use_logits=False, channels_first=True) ] if framework == "tensorflow": if clipped: classifier, _ = get_tabular_classifier_tf() classifier_list = [classifier] else: logging.warning( "{0} doesn't have an uncliped classifier defined yet". format(framework)) classifier_list = None if framework == "pytorch": if clipped: classifier_list = [get_tabular_classifier_pt()] else: logging.warning( "{0} doesn't have an uncliped classifier defined yet". format(framework)) classifier_list = None if framework == "scikitlearn": return get_tabular_classifier_scikit_list(clipped=False) return classifier_list
def test_tensorflow_iris(self): """ First test for TensorFlow. :return: """ # Get the TensorFlow classifier victim_tfc, sess = get_tabular_classifier_tf() # Define input and output placeholders input_ph = tf.placeholder(tf.float32, shape=[None, 4]) output_ph = tf.placeholder(tf.int32, shape=[None, 3]) # Define the tensorflow graph dense1 = tf.layers.dense(input_ph, 10) dense2 = tf.layers.dense(dense1, 10) logits = tf.layers.dense(dense2, 3) # Train operator loss = tf.reduce_mean(tf.losses.softmax_cross_entropy(logits=logits, onehot_labels=output_ph)) optimizer = tf.train.AdamOptimizer(learning_rate=0.001) train = optimizer.minimize(loss) # TensorFlow session and initialization sess.run(tf.global_variables_initializer()) # Train the classifier thieved_tfc = TensorFlowClassifier( clip_values=(0, 1), input_ph=input_ph, output=logits, labels_ph=output_ph, train=train, loss=loss, learning=None, sess=sess, channels_first=True, ) # Create attack copycat_cnn = CopycatCNN( classifier=victim_tfc, batch_size_fit=self.batch_size, batch_size_query=self.batch_size, nb_epochs=NB_EPOCHS, nb_stolen=NB_STOLEN, ) thieved_tfc = copycat_cnn.extract(x=self.x_train_iris, thieved_classifier=thieved_tfc) victim_preds = np.argmax(victim_tfc.predict(x=self.x_train_iris[:100]), axis=1) thieved_preds = np.argmax(thieved_tfc.predict(x=self.x_train_iris[:100]), axis=1) acc = np.sum(victim_preds == thieved_preds) / len(victim_preds) self.assertGreater(acc, 0.3) # Clean-up session if sess is not None: sess.close() tf.reset_default_graph()
def test_2_tensorflow_iris(self): classifier, _ = get_tabular_classifier_tf() attack = VirtualAdversarialMethod(classifier, eps=0.1, verbose=False) with self.assertRaises(TypeError) as context: _ = attack.generate(self.x_test_iris) self.assertIn( "This attack requires a classifier predicting probabilities in the range [0, 1] as output." "Values smaller than 0.0 or larger than 1.0 have been detected.", str(context.exception), )
def test_tensorflow_iris(self): classifier, _ = get_tabular_classifier_tf() attack = NewtonFool(classifier, max_iter=5, batch_size=128) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with NewtonFool adversarial examples: %.2f%%", (acc * 100))
def test_2_tensorflow_iris(self): classifier, _ = get_tabular_classifier_tf() attack = DeepFool(classifier, max_iter=5, batch_size=128) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == predictions_adv).all()) accuracy = np.sum(predictions_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with DeepFool adversarial examples: %.2f%%", (accuracy * 100))
def test_2_tensorflow_iris(self): (_, _), (x_test, y_test) = self.iris classifier, _ = get_tabular_classifier_tf() # Test untargeted attack attack = ProjectedGradientDescent(classifier, eps=1, eps_step=0.1, max_iter=5) x_test_adv = attack.generate(x_test) self.assertFalse((np.array(x_test) == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(np.array(y_test), axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(np.array(y_test), axis=1)) / len(y_test) logger.info("Accuracy on Iris with PGD adversarial examples: %.2f%%", (acc * 100))
def test_2_tensorflow_iris_vector(self): classifier, _ = get_tabular_classifier_tf() attack = SaliencyMapMethod(classifier, theta=1, verbose=False) x_test_iris_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_iris_adv).all()) self.assertTrue((x_test_iris_adv <= 1).all()) self.assertTrue((x_test_iris_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_iris_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) accuracy = np.sum(preds_adv == np.argmax( self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with JSMA adversarial examples: %.2f%%", (accuracy * 100))
def _tabular_dl_estimator(clipped=True): classifier = None if framework == "keras": if clipped: classifier = get_tabular_classifier_kr() else: kr_classifier = get_tabular_classifier_kr() classifier = KerasClassifier(model=kr_classifier.model, use_logits=False, channels_first=True) if framework == "tensorflow1" or framework == "tensorflow2": if clipped: classifier, _ = get_tabular_classifier_tf() if framework == "pytorch": if clipped: classifier = get_tabular_classifier_pt() if classifier is None: raise ARTTestFixtureNotImplemented( "no deep learning tabular estimator available", tabular_dl_estimator.__name__, framework ) return classifier
def test_tensorflow_iris(self): classifier, _ = get_tabular_classifier_tf() # Test untargeted attack attack = ElasticNet(classifier, targeted=False, max_iter=10) x_test_adv = attack.generate(self.x_test_iris) expected_x_test_adv = np.asarray( [0.8479195, 0.42525578, 0.70166135, 0.28664514]) np.testing.assert_array_almost_equal(x_test_adv[0, :], expected_x_test_adv, decimal=6) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) np.testing.assert_array_equal( predictions_adv, np.asarray([ 1, 2, 2, 2, 1, 1, 1, 2, 1, 2, 1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 1, 0, 2, 2, 1, 2, 0, 2, 2, 1, 1, 2, ]), ) accuracy = 1.0 - np.sum(predictions_adv == np.argmax( self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("EAD success rate on Iris: %.2f%%", (accuracy * 100)) # Test targeted attack targets = random_targets(self.y_test_iris, nb_classes=3) attack = ElasticNet(classifier, targeted=True, max_iter=10) x_test_adv = attack.generate(self.x_test_iris, **{"y": targets}) expected_x_test_adv = np.asarray( [0.8859426, 0.51877, 0.5014498, 0.05447771]) np.testing.assert_array_almost_equal(x_test_adv[0, :], expected_x_test_adv, decimal=6) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) np.testing.assert_array_equal( predictions_adv, np.asarray([ 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 0, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 2, 0, 2, 2, 2, 2, 2, 0, 0, 0, 2, 2, 2, 2, 2, 0, 2, ]), ) accuracy = np.sum(predictions_adv == np.argmax( targets, axis=1)) / self.y_test_iris.shape[0] logger.info("Targeted EAD success rate on Iris: %.2f%%", (accuracy * 100))
def test_3_tensorflow_iris(self): classifier, sess = get_tabular_classifier_tf() # Test untargeted attack attack = ElasticNet(classifier, targeted=False, max_iter=10, verbose=False) x_test_adv = attack.generate(self.x_test_iris) expected_x_test_adv = np.asarray( [0.852286, 0.434626, 0.703376, 0.293738]) np.testing.assert_array_almost_equal(x_test_adv[0, :], expected_x_test_adv, decimal=6) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) np.testing.assert_array_equal( predictions_adv, np.asarray([ 1, 2, 2, 2, 1, 1, 1, 2, 1, 2, 1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 2, 1, 2, 1, 0, 1, 2, 1, 2, 0, 2, 2, 1, 1, 2, ]), ) accuracy = 1.0 - np.sum(predictions_adv == np.argmax( self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("EAD success rate on Iris: %.2f%%", (accuracy * 100)) # Test targeted attack targets = random_targets(self.y_test_iris, nb_classes=3) attack = ElasticNet(classifier, targeted=True, max_iter=10, verbose=False) x_test_adv = attack.generate(self.x_test_iris, **{"y": targets}) expected_x_test_adv = np.asarray( [0.892806, 0.531875, 0.501707, 0.059951]) np.testing.assert_array_almost_equal(x_test_adv[0, :], expected_x_test_adv, decimal=6) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) np.testing.assert_array_equal( predictions_adv, np.asarray([ 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 0, 0, 2, 2, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 2, 0, 2, 2, 2, 2, 2, 0, 0, 0, 2, 2, 2, 2, 2, 0, 2, ]), ) accuracy = np.sum(predictions_adv == np.argmax( targets, axis=1)) / self.y_test_iris.shape[0] logger.info("Targeted EAD success rate on Iris: %.2f%%", (accuracy * 100)) # Close session if sess is not None: sess.close()