def test_7_keras_iris_unbounded(self): classifier = get_tabular_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channels_first=True) # Norm=2 attack = HopSkipJump(classifier, targeted=False, max_iter=2, max_eval=100, init_eval=10) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with HopSkipJump adversarial examples: %.2f%%", (acc * 100)) # Norm=np.inf attack = HopSkipJump(classifier, targeted=False, max_iter=2, max_eval=100, init_eval=10, norm=np.Inf) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with HopSkipJump adversarial examples: %.2f%%", (acc * 100)) # Clean-up session k.clear_session()
def test_binary_keras_instantiation_and_attack_pgd(art_warning): tf.compat.v1.disable_eager_execution() try: x, y = sklearn.datasets.make_classification(n_samples=10000, n_features=20, n_informative=5, n_redundant=2, n_repeated=0, n_classes=2) train_x, test_x, train_y, test_y = sklearn.model_selection.train_test_split( x, y, test_size=0.2) train_x = train_x.astype(np.float32) test_x = test_x.astype(np.float32) model = tf.keras.models.Sequential([ tf.keras.layers.Dense(128, activation=tf.nn.relu, input_shape=(20, )), tf.keras.layers.Dense(1, activation=tf.nn.sigmoid), ]) model.summary() model.compile(optimizer=tf.optimizers.Adam(), loss="binary_crossentropy", metrics=["accuracy"]) classifier = KerasClassifier(model=model) classifier.fit(train_x, train_y, nb_epochs=5) pred = classifier.predict(test_x) attack = ProjectedGradientDescent(estimator=classifier, eps=0.5) x_test_adv = attack.generate(x=test_x) adv_predictions = classifier.predict(x_test_adv) assert (adv_predictions != pred).any() except ARTTestException as e: art_warning(e)
def test_with_defences(self): (x_train, y_train), (x_test, y_test) = self.mnist # Get the trained Keras model model = self.classifier_k._model fs = FeatureSqueezing(bit_depth=1, clip_values=(0, 1)) classifier = KerasClassifier(model=model, clip_values=(0, 1), preprocessing_defences=fs) # Create the classifier classifier = QueryEfficientGradientEstimationClassifier( classifier, 20, 1 / 64.0, round_samples=1 / 255.0) attack = FastGradientMethod(classifier, eps=1) x_train_adv = attack.generate(x_train) x_test_adv = attack.generate(x_test) self.assertFalse((x_train == x_train_adv).all()) self.assertFalse((x_test == x_test_adv).all()) train_y_pred = get_labels_np_array(classifier.predict(x_train_adv)) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_train == train_y_pred).all()) self.assertFalse((y_test == test_y_pred).all())
def test_iris_unbounded(self): (_, _), (x_test, y_test) = self.iris classifier = get_tabular_classifier_kr() def t(x): return x def transformation(): while True: yield t # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channels_first=True) classifier = ExpectationOverTransformations( classifier, sample_size=1, transformation=transformation) attack = FastGradientMethod(classifier, eps=1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv > 1).any()) self.assertTrue((x_test_adv < 0).any()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info("Accuracy on Iris with limited query info: %.2f%%", (acc * 100))
def test_defences_predict(get_default_mnist_subset, get_image_classifier_list): (x_train_mnist, y_train_mnist), (x_test_mnist, y_test_mnist) = get_default_mnist_subset clip_values = (0, 1) fs = FeatureSqueezing(clip_values=clip_values, bit_depth=2) jpeg = JpegCompression(clip_values=clip_values, apply_predict=True) smooth = SpatialSmoothing() classifier_, _ = get_image_classifier_list(one_classifier=True) classifier = KerasClassifier(clip_values=clip_values, model=classifier_.model, preprocessing_defences=[fs, jpeg, smooth]) assert len(classifier.preprocessing_defences) == 3 predictions_classifier = classifier.predict(x_test_mnist) # Apply the same defences by hand x_test_defense = x_test_mnist x_test_defense, _ = fs(x_test_defense, y_test_mnist) x_test_defense, _ = jpeg(x_test_defense, y_test_mnist) x_test_defense, _ = smooth(x_test_defense, y_test_mnist) classifier, _ = get_image_classifier_list(one_classifier=True) predictions_check = classifier.model.predict(x_test_defense) # Check that the prediction results match np.testing.assert_array_almost_equal(predictions_classifier, predictions_check, decimal=4)
def test_defences_predict(self): clip_values = (0, 1) fs = FeatureSqueezing(clip_values=clip_values, bit_depth=2) jpeg = JpegCompression(clip_values=clip_values, apply_predict=True) smooth = SpatialSmoothing() classifier_ = get_image_classifier_kr_tf() classifier = KerasClassifier(clip_values=clip_values, model=classifier_._model, preprocessing_defences=[fs, jpeg, smooth]) self.assertEqual(len(classifier.preprocessing_defences), 3) predictions_classifier = classifier.predict(self.x_test_mnist) # Apply the same defences by hand x_test_defense = self.x_test_mnist x_test_defense, _ = fs(x_test_defense, self.y_test_mnist) x_test_defense, _ = jpeg(x_test_defense, self.y_test_mnist) x_test_defense, _ = smooth(x_test_defense, self.y_test_mnist) classifier = get_image_classifier_kr_tf() predictions_check = classifier._model.predict(x_test_defense) # Check that the prediction results match np.testing.assert_array_almost_equal(predictions_classifier, predictions_check, decimal=4)
def test_7_keras_iris_unbounded(self): classifier = get_tabular_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channels_first=True) attack_params = { "max_iter": 1, "attacker": "newtonfool", "attacker_params": { "max_iter": 5, "verbose": False } } attack = UniversalPerturbation(classifier, verbose=False) attack.set_params(**attack_params) x_test_iris_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_iris_adv).all()) preds_adv = np.argmax(classifier.predict(x_test_iris_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax( self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info( "Accuracy on Iris with universal adversarial examples: %.2f%%", (acc * 100))
def test_with_defences(self): (x_train, y_train), (x_test, y_test) = self.mnist # Get the ready-trained Keras model model = self.classifier_k._model fs = FeatureSqueezing(bit_depth=1, clip_values=(0, 1)) classifier = KerasClassifier(model=model, clip_values=(0, 1), preprocessing_defences=fs) # Wrap the classifier classifier = QueryEfficientBBGradientEstimation(classifier, 20, 1 / 64.0, round_samples=1 / 255.0) attack = FastGradientMethod(classifier, eps=1) x_train_adv = attack.generate(x_train) x_test_adv = attack.generate(x_test) self.assertFalse((x_train == x_train_adv).all()) self.assertFalse((x_test == x_test_adv).all()) train_y_pred = get_labels_np_array(classifier.predict(x_train_adv)) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_train == train_y_pred).all()) self.assertFalse((y_test == test_y_pred).all()) preds = classifier.predict(x_train_adv) acc = np.sum(np.argmax(preds, axis=1) == np.argmax( y_train, axis=1)) / y_train.shape[0] logger.info( "Accuracy on adversarial train examples with feature squeezing and limited query info: %.2f%%", (acc * 100)) preds = classifier.predict(x_test_adv) acc = np.sum(np.argmax(preds, axis=1) == np.argmax( y_test, axis=1)) / y_test.shape[0] logger.info( "Accuracy on adversarial test examples with feature squeezing and limited query info: %.2f%%", (acc * 100))
def _predict_classifier(self, x: np.ndarray, batch_size: int = 128, training_mode: bool = False, **kwargs) -> np.ndarray: x = x.astype(ART_NUMPY_DTYPE) return KerasClassifier.predict(self, x=x, batch_size=batch_size, training_mode=training_mode, **kwargs)
def test_keras_iris_unbounded_LInf(self): classifier = get_tabular_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channels_first=True) attack = CarliniLInfMethod(classifier, targeted=False, max_iter=10, eps=1) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == predictions_adv).all()) accuracy = np.sum(predictions_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with C&W adversarial examples: %.2f%%", (accuracy * 100))
def test_keras_iris_unbounded(self): classifier = get_tabular_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channels_first=True) attack = NewtonFool(classifier, max_iter=5, batch_size=128) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with NewtonFool adversarial examples: %.2f%%", (acc * 100))
def test_keras_classifier(self): """ Second test with the KerasClassifier. :return: """ # Build KerasClassifier victim_krc = get_image_classifier_kr() # Create simple CNN model = Sequential() model.add( Conv2D(1, kernel_size=(7, 7), activation="relu", input_shape=(28, 28, 1))) model.add(MaxPooling2D(pool_size=(4, 4))) model.add(Flatten()) model.add(Dense(10, activation="softmax")) loss = keras.losses.categorical_crossentropy model.compile(loss=loss, optimizer=keras.optimizers.Adam(lr=0.001), metrics=["accuracy"]) # Get classifier thieved_krc = KerasClassifier(model, clip_values=(0, 1), use_logits=False) # Create attack copycat_cnn = CopycatCNN( classifier=victim_krc, batch_size_fit=self.batch_size, batch_size_query=self.batch_size, nb_epochs=NB_EPOCHS, nb_stolen=NB_STOLEN, ) thieved_krc = copycat_cnn.extract(x=self.x_train_mnist, thieved_classifier=thieved_krc) victim_preds = np.argmax( victim_krc.predict(x=self.x_train_mnist[:100]), axis=1) thieved_preds = np.argmax( thieved_krc.predict(x=self.x_train_mnist[:100]), axis=1) acc = np.sum(victim_preds == thieved_preds) / len(victim_preds) self.assertGreater(acc, 0.3) # Clean-up k.clear_session()
def test_7_keras_iris_unbounded(self): classifier = get_tabular_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channels_first=True) attack = VirtualAdversarialMethod(classifier, eps=1, verbose=False) x_test_iris_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_iris_adv).all()) self.assertTrue((x_test_iris_adv > 1).any()) self.assertTrue((x_test_iris_adv < 0).any()) preds_adv = np.argmax(classifier.predict(x_test_iris_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with VAT adversarial examples: %.2f%%", (acc * 100))
def test_7_keras_iris_unbounded(self): (_, _), (x_test, y_test) = self.iris classifier = get_tabular_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channels_first=True) attack = ProjectedGradientDescent(classifier, eps=1, eps_step=0.2, max_iter=5) x_test_adv = attack.generate(x_test) self.assertFalse((np.array(x_test) == x_test_adv).all()) self.assertTrue((x_test_adv > 1).any()) self.assertTrue((x_test_adv < 0).any()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(np.array(y_test), axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(np.array(y_test), axis=1)) / len(y_test) logger.info("Accuracy on Iris with PGD adversarial examples: %.2f%%", (acc * 100))
def test_resnet(create_test_image): image_file_path = create_test_image keras.backend.set_learning_phase(0) model = ResNet50(weights="imagenet") classifier = KerasClassifier(model, clip_values=(0, 255)) image = img_to_array(load_img(image_file_path, target_size=(224, 224))) image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2])) prediction = classifier.predict(image) label = decode_predictions(prediction)[0][0] assert label[1] == "Weimaraner" np.testing.assert_array_almost_equal(prediction[0, 178], 0.2658045, decimal=3)
def test_iris_unbounded(self): (_, _), (x_test, y_test) = self.iris classifier = get_tabular_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channels_first=True) classifier = QueryEfficientGradientEstimationClassifier( classifier, 20, 1 / 64.0, round_samples=1 / 255.0) attack = FastGradientMethod(classifier, eps=1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv > 1).any()) self.assertTrue((x_test_adv < 0).any()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
def test_keras_iris(self): """ Second test for Keras. :return: """ # Build KerasClassifier victim_krc = get_tabular_classifier_kr() # Create simple CNN model = Sequential() model.add(Dense(10, input_shape=(4, ), activation="relu")) model.add(Dense(10, activation="relu")) model.add(Dense(3, activation="softmax")) model.compile(loss="categorical_crossentropy", optimizer=keras.optimizers.Adam(lr=0.001), metrics=["accuracy"]) # Get classifier thieved_krc = KerasClassifier(model, clip_values=(0, 1), use_logits=False, channels_first=True) # Create attack copycat_cnn = CopycatCNN( classifier=victim_krc, batch_size_fit=self.batch_size, batch_size_query=self.batch_size, nb_epochs=NB_EPOCHS, nb_stolen=NB_STOLEN, ) thieved_krc = copycat_cnn.extract(x=self.x_train_iris, thieved_classifier=thieved_krc) victim_preds = np.argmax(victim_krc.predict(x=self.x_train_iris[:100]), axis=1) thieved_preds = np.argmax( thieved_krc.predict(x=self.x_train_iris[:100]), axis=1) acc = np.sum(victim_preds == thieved_preds) / len(victim_preds) self.assertGreater(acc, 0.3) # Clean-up k.clear_session()
def test_iris_unbounded(self): (_, _), (x_test, y_test) = self.iris classifier = get_tabular_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channels_first=True) classifier = QueryEfficientBBGradientEstimation(classifier, 20, 1 / 64.0, round_samples=1 / 255.0) attack = FastGradientMethod(classifier, eps=1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv > 1).any()) self.assertTrue((x_test_adv < 0).any()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info("Accuracy on Iris with limited query info: %.2f%%", (acc * 100))
def test_keras_iris_unbounded(self): classifier = get_tabular_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channels_first=True) attack = ElasticNet(classifier, targeted=False, max_iter=10) x_test_adv = attack.generate(self.x_test_iris) expected_x_test_adv = np.asarray( [0.85931635, 0.44633555, 0.65658355, 0.23840423]) np.testing.assert_array_almost_equal(x_test_adv[0, :], expected_x_test_adv, decimal=6) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) np.testing.assert_array_equal( predictions_adv, np.asarray([ 1, 1, 1, 2, 1, 1, 1, 2, 1, 2, 1, 1, 1, 2, 1, 1, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 2, 1, 0, 1, 1, 1, 2, 0, 2, 2, 1, 1, 2, ]), ) accuracy = 1.0 - np.sum(predictions_adv == np.argmax( self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("EAD success rate on Iris: %.2f%%", (accuracy * 100))
def _predict_classifier(self, x: np.ndarray) -> np.ndarray: x = x.astype(ART_NUMPY_DTYPE) return KerasClassifier.predict(self, x=x, batch_size=self.batch_size)
model.save("./model.h5") score_target = model.evaluate(x_test, y_test, verbose=0) target_classifier = KerasClassifier(model=model, use_logits=True, clip_values=(0, 1)) fee = FunctionallyEquivalentExtraction( classifier=target_classifier, num_neurons=number_neurons # type: ignore ) bbc = fee.extract(x_test[0:100]) y_test_predicted_extracted = bbc.predict(x_test) y_test_predicted_target = target_classifier.predict(x_test) print("Target model - Test accuracy:", score_target[1]) print( "Extracted model - Test accuracy:", np.sum( np.argmax(y_test_predicted_extracted, axis=1) == np.argmax( y_test, axis=1)) / y_test.shape[0], ) print( "Extracted model - Test Fidelity:", np.sum( np.argmax(y_test_predicted_extracted, axis=1) == np.argmax( y_test_predicted_target, axis=1)) / y_test_predicted_target.shape[0], )