def test_keras_iris(self): """ Second test for Keras. :return: """ # Create the trained classifier trained_classifier = get_tabular_classifier_kr() # Create the modified classifier transformed_classifier = get_tabular_classifier_kr(load_init=False) # Create defensive distillation transformer transformer = DefensiveDistillation(classifier=trained_classifier, batch_size=BATCH_SIZE, nb_epochs=NB_EPOCHS) # Perform the transformation transformed_classifier = transformer(x=self.x_train_iris, transformed_classifier=transformed_classifier) # Compare the 2 outputs preds1 = trained_classifier.predict(x=self.x_train_iris, batch_size=BATCH_SIZE) preds2 = transformed_classifier.predict(x=self.x_train_iris, batch_size=BATCH_SIZE) preds1 = np.argmax(preds1, axis=1) preds2 = np.argmax(preds2, axis=1) acc = np.sum(preds1 == preds2) / len(preds1) self.assertGreater(acc, 0.2) ce = cross_entropy(preds1, preds2) self.assertLess(ce, 20) self.assertGreaterEqual(ce, 0)
def _get_tabular_classifier_list(clipped=True): if framework == "keras": if clipped: classifier_list = [get_tabular_classifier_kr()] else: classifier = get_tabular_classifier_kr() classifier_list = [ KerasClassifier(model=classifier.model, use_logits=False, channels_first=True) ] if framework == "tensorflow": if clipped: classifier, _ = get_tabular_classifier_tf() classifier_list = [classifier] else: logging.warning( "{0} doesn't have an uncliped classifier defined yet". format(framework)) classifier_list = None if framework == "pytorch": if clipped: classifier_list = [get_tabular_classifier_pt()] else: logging.warning( "{0} doesn't have an uncliped classifier defined yet". format(framework)) classifier_list = None if framework == "scikitlearn": return get_tabular_classifier_scikit_list(clipped=False) return classifier_list
def test_6_keras_iris(self): """ Second test for Keras. :return: """ # Build KerasClassifier victim_krc = get_tabular_classifier_kr() # Create the thieved classifier thieved_krc = get_tabular_classifier_kr(load_init=False) # Create random attack attack = KnockoffNets( classifier=victim_krc, batch_size_fit=BATCH_SIZE, batch_size_query=BATCH_SIZE, nb_epochs=NB_EPOCHS, nb_stolen=NB_STOLEN, sampling_strategy="random", verbose=False, ) thieved_krc = attack.extract(x=self.x_train_iris, thieved_classifier=thieved_krc) victim_preds = np.argmax(victim_krc.predict(x=self.x_train_iris), axis=1) thieved_preds = np.argmax(thieved_krc.predict(x=self.x_train_iris), axis=1) acc = np.sum(victim_preds == thieved_preds) / len(victim_preds) self.assertGreater(acc, 0.3) # Create adaptive attack attack = KnockoffNets( classifier=victim_krc, batch_size_fit=BATCH_SIZE, batch_size_query=BATCH_SIZE, nb_epochs=NB_EPOCHS, nb_stolen=NB_STOLEN, sampling_strategy="adaptive", reward="all", verbose=False, ) thieved_krc = attack.extract(x=self.x_train_iris, y=self.y_train_iris, thieved_classifier=thieved_krc) victim_preds = np.argmax(victim_krc.predict(x=self.x_train_iris), axis=1) thieved_preds = np.argmax(thieved_krc.predict(x=self.x_train_iris), axis=1) acc = np.sum(victim_preds == thieved_preds) / len(victim_preds) self.assertGreater(acc, 0.33) # Clean-up k.clear_session()
def test_7_keras_iris_unbounded(self): classifier = get_tabular_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channels_first=True) attack_params = { "max_iter": 1, "attacker": "newtonfool", "attacker_params": { "max_iter": 5, "verbose": False } } attack = UniversalPerturbation(classifier, verbose=False) attack.set_params(**attack_params) x_test_iris_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_iris_adv).all()) preds_adv = np.argmax(classifier.predict(x_test_iris_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax( self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info( "Accuracy on Iris with universal adversarial examples: %.2f%%", (acc * 100))
def test_7_keras_iris_clipped(self): classifier = get_tabular_classifier_kr() # Norm=2 attack = HopSkipJump(classifier, targeted=False, max_iter=2, max_eval=100, init_eval=10) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with HopSkipJump adversarial examples: %.2f%%", (acc * 100)) # Norm=np.inf attack = HopSkipJump(classifier, targeted=False, max_iter=2, max_eval=100, init_eval=10, norm=np.Inf) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with HopSkipJump adversarial examples: %.2f%%", (acc * 100)) # Clean-up session k.clear_session()
def test_7_keras_iris_unbounded(self): classifier = get_tabular_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channels_first=True) # Norm=2 attack = HopSkipJump(classifier, targeted=False, max_iter=2, max_eval=100, init_eval=10) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with HopSkipJump adversarial examples: %.2f%%", (acc * 100)) # Norm=np.inf attack = HopSkipJump(classifier, targeted=False, max_iter=2, max_eval=100, init_eval=10, norm=np.Inf) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with HopSkipJump adversarial examples: %.2f%%", (acc * 100)) # Clean-up session k.clear_session()
def test_keras_iris_clipped(self): classifier = get_tabular_classifier_kr() # Test untargeted attack attack = BasicIterativeMethod(classifier, eps=1, eps_step=0.1, batch_size=128) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with BIM adversarial examples: %.2f%%", (acc * 100)) # Test targeted attack targets = random_targets(self.y_test_iris, nb_classes=3) attack = BasicIterativeMethod(classifier, targeted=True, eps=1, eps_step=0.1) x_test_adv = attack.generate(self.x_test_iris, **{"y": targets}) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any()) acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / self.y_test_iris.shape[0] logger.info("Success rate of targeted BIM on Iris: %.2f%%", (acc * 100))
def test_8_keras_iris_clipped(self): classifier = get_tabular_classifier_kr() # Test untargeted attack attack = ProjectedGradientDescent(classifier, eps=1.0, eps_step=0.1, max_iter=5, verbose=False) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with PGD adversarial examples: %.2f%%", (acc * 100)) # Test targeted attack targets = random_targets(self.y_test_iris, nb_classes=3) attack = ProjectedGradientDescent(classifier, targeted=True, eps=1.0, eps_step=0.1, max_iter=5, verbose=False) x_test_adv = attack.generate(self.x_test_iris, **{"y": targets}) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any()) acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / self.y_test_iris.shape[0] logger.info("Success rate of targeted PGD on Iris: %.2f%%", (acc * 100))
def test_iris_unbounded(self): (_, _), (x_test, y_test) = self.iris classifier = get_tabular_classifier_kr() def t(x): return x def transformation(): while True: yield t # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channel_index=1) classifier = ExpectationOverTransformations( classifier, sample_size=1, transformation=transformation) attack = FastGradientMethod(classifier, eps=1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv > 1).any()) self.assertTrue((x_test_adv < 0).any()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info("Accuracy on Iris with limited query info: %.2f%%", (acc * 100))
def test_6_keras_iris_clipped(self): classifier = get_tabular_classifier_kr() # Test untargeted attack attack_params = { "max_iter": 1, "attacker": "newtonfool", "attacker_params": { "max_iter": 5, "verbose": False } } attack = UniversalPerturbation(classifier, verbose=False) attack.set_params(**attack_params) x_test_iris_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_iris_adv).all()) self.assertTrue((x_test_iris_adv <= 1).all()) self.assertTrue((x_test_iris_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_iris_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax( self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info( "Accuracy on Iris with universal adversarial examples: %.2f%%", (acc * 100))
def test_iris_clipped(self): (_, _), (x_test, y_test) = self.iris def t(x): return x def transformation(): while True: yield t classifier = get_tabular_classifier_kr() classifier = ExpectationOverTransformations( classifier, sample_size=1, transformation=transformation) # Test untargeted attack attack = FastGradientMethod(classifier, eps=0.1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info("Accuracy on Iris with limited query info: %.2f%%", (acc * 100))
def test_8_keras_iris_vector_unbounded(self): classifier = get_tabular_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channels_first=True) attack = SaliencyMapMethod(classifier, theta=1, verbose=False) x_test_iris_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_iris_adv).all())
def test_5_failure_feature_vectors(self): classifier = get_tabular_classifier_kr() classifier._clip_values = (0, 1) # Assert that value error is raised for feature vectors with self.assertRaises(ValueError) as context: _ = AdversarialPatch(classifier=classifier) self.assertIn( "Unexpected input_shape in estimator detected. AdversarialPatch is expecting images or videos as input.", str(context.exception), )
def test_failure_feature_vectors(self): attack_params = {"max_translation": 10.0, "num_translations": 3, "max_rotation": 30.0, "num_rotations": 3} classifier = get_tabular_classifier_kr() attack = SpatialTransformation(classifier=classifier) attack.set_params(**attack_params) data = np.random.rand(10, 4) # Assert that value error is raised for feature vectors with self.assertRaises(ValueError) as context: attack.generate(data) self.assertIn("Feature vectors detected.", str(context.exception))
def test_keras_iris_clipped_LInf(self): classifier = get_tabular_classifier_kr() attack = CarliniLInfMethod(classifier, targeted=False, max_iter=10, eps=0.5) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == predictions_adv).all()) accuracy = np.sum(predictions_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with C&W adversarial examples: %.2f%%", (accuracy * 100))
def _tabular_dl_estimator(clipped=True): classifier = None if framework == "keras": if clipped: classifier = get_tabular_classifier_kr() else: kr_classifier = get_tabular_classifier_kr() classifier = KerasClassifier(model=kr_classifier.model, use_logits=False, channels_first=True) if framework == "tensorflow1" or framework == "tensorflow2": if clipped: classifier, _ = get_tabular_classifier_tf() if framework == "pytorch": if clipped: classifier = get_tabular_classifier_pt() if classifier is None: raise ARTTestFixtureNotImplemented( "no deep learning tabular estimator available", tabular_dl_estimator.__name__, framework ) return classifier
def test_keras_iris_unbounded_LInf(self): classifier = get_tabular_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channels_first=True) attack = CarliniLInfMethod(classifier, targeted=False, max_iter=10, eps=1) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == predictions_adv).all()) accuracy = np.sum(predictions_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with C&W adversarial examples: %.2f%%", (accuracy * 100))
def test_keras_iris_clipped(self): classifier = get_tabular_classifier_kr() attack = NewtonFool(classifier, max_iter=5) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with NewtonFool adversarial examples: %.2f%%", (acc * 100))
def test_keras_iris_unbounded(self): classifier = get_tabular_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channels_first=True) attack = NewtonFool(classifier, max_iter=5, batch_size=128) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with NewtonFool adversarial examples: %.2f%%", (acc * 100))
def test_keras_iris_clipped(self): classifier = get_tabular_classifier_kr() # Test untargeted attack attack = VirtualAdversarialMethod(classifier, eps=0.1) x_test_iris_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_iris_adv).all()) self.assertTrue((x_test_iris_adv <= 1).all()) self.assertTrue((x_test_iris_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_iris_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with VAT adversarial examples: %.2f%%", (acc * 100))
def test_6_keras_iris_clipped(self): (_, _), (x_test, y_test) = self.iris classifier = get_tabular_classifier_kr() # Test untargeted attack attack = ProjectedGradientDescent(classifier, eps=1, eps_step=0.1, max_iter=5) x_test_adv = attack.generate(x_test) self.assertFalse((np.array(x_test) == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(np.array(y_test), axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(np.array(y_test), axis=1)) / len(y_test) logger.info("Accuracy on Iris with PGD adversarial examples: %.2f%%", (acc * 100))
def test_keras_iris_unbounded(self): classifier = get_tabular_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channels_first=True) attack = ProjectedGradientDescent(classifier, eps=1, eps_step=0.2, max_iter=5) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv > 1).any()) self.assertTrue((x_test_adv < 0).any()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with PGD adversarial examples: %.2f%%", (acc * 100))
def test_keras_iris_unbounded(self): classifier = get_tabular_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channel_index=1) attack = BasicIterativeMethod(classifier, eps=1, eps_step=0.2, batch_size=128) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertTrue((x_test_adv > 1).any()) self.assertTrue((x_test_adv < 0).any()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with BIM adversarial examples: %.2f%%", (acc * 100))
def test_iris_clipped(self): (_, _), (x_test, y_test) = self.iris classifier = get_tabular_classifier_kr() classifier = QueryEfficientGradientEstimationClassifier( classifier, 20, 1 / 64.0, round_samples=1 / 255.0) # Test untargeted attack attack = FastGradientMethod(classifier, eps=0.1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
def test_7_keras_iris_vector_clipped(self): classifier = get_tabular_classifier_kr() attack = SaliencyMapMethod(classifier, theta=1, verbose=False) x_test_iris_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_iris_adv).all()) self.assertTrue((x_test_iris_adv <= 1).all()) self.assertTrue((x_test_iris_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_iris_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) accuracy = np.sum(preds_adv == np.argmax( self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with JSMA adversarial examples: %.2f%%", (accuracy * 100))
def test_6_keras_iris_clipped(self): classifier = get_tabular_classifier_kr() attack = DeepFool(classifier, max_iter=5, verbose=False) x_test_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_adv).all()) self.assertLessEqual(np.amax(x_test_adv), 1.0) self.assertGreaterEqual(np.amin(x_test_adv), 0.0) predictions_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == predictions_adv).all()) accuracy = np.sum(predictions_adv == np.argmax( self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info( "Accuracy on Iris with DeepFool adversarial examples: %.2f%%", (accuracy * 100))
def test_iris_clipped(self): (_, _), (x_test, y_test) = self.iris krc = get_tabular_classifier_kr() rs = RandomizedSmoothing(classifier=krc, sample_size=100, scale=0.01, alpha=0.001) # Test untargeted attack attack = FastGradientMethod(krc, eps=0.1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_smooth = np.argmax(rs.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_smooth).all()) pred = rs.predict(x_test) pred2 = rs.predict(x_test_adv) acc, cov = compute_accuracy(pred, y_test) acc2, cov2 = compute_accuracy(pred2, y_test) logger.info("Accuracy on Iris with smoothing on adversarial examples: %.2f%%", (acc * 100)) logger.info("Coverage on Iris with smoothing on adversarial examples: %.2f%%", (cov * 100)) logger.info("Accuracy on Iris with smoothing: %.2f%%", (acc2 * 100)) logger.info("Coverage on Iris with smoothing: %.2f%%", (cov2 * 100)) # Check basic functionality of RS object # check predict y_test_smooth = rs.predict(x=x_test) self.assertEqual(y_test_smooth.shape, y_test.shape) self.assertTrue((np.sum(y_test_smooth, axis=1) <= 1).all()) # check gradients grad_smooth1 = rs.loss_gradient(x=x_test, y=y_test) grad_smooth2 = rs.class_gradient(x=x_test, label=None) grad_smooth3 = rs.class_gradient(x=x_test, label=np.argmax(y_test, axis=1)) self.assertEqual(grad_smooth1.shape, x_test_adv.shape) self.assertEqual(grad_smooth2.shape[0], len(x_test)) self.assertEqual(grad_smooth3.shape[0], len(x_test)) # check certification pred, radius = rs.certify(x=x_test, n=250) self.assertEqual(len(pred), len(x_test)) self.assertEqual(len(radius), len(x_test)) self.assertTrue((radius <= 1).all()) self.assertTrue((pred < y_test.shape[1]).all())
def test_iris_unbounded(self): (_, _), (x_test, y_test) = self.iris classifier = get_tabular_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channels_first=True) classifier = QueryEfficientGradientEstimationClassifier( classifier, 20, 1 / 64.0, round_samples=1 / 255.0) attack = FastGradientMethod(classifier, eps=1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv > 1).any()) self.assertTrue((x_test_adv < 0).any()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
def test_keras_iris(self): """ Second test for Keras. :return: """ # Build KerasClassifier victim_krc = get_tabular_classifier_kr() # Create simple CNN model = Sequential() model.add(Dense(10, input_shape=(4, ), activation="relu")) model.add(Dense(10, activation="relu")) model.add(Dense(3, activation="softmax")) model.compile(loss="categorical_crossentropy", optimizer=keras.optimizers.Adam(lr=0.001), metrics=["accuracy"]) # Get classifier thieved_krc = KerasClassifier(model, clip_values=(0, 1), use_logits=False, channels_first=True) # Create attack copycat_cnn = CopycatCNN( classifier=victim_krc, batch_size_fit=self.batch_size, batch_size_query=self.batch_size, nb_epochs=NB_EPOCHS, nb_stolen=NB_STOLEN, ) thieved_krc = copycat_cnn.extract(x=self.x_train_iris, thieved_classifier=thieved_krc) victim_preds = np.argmax(victim_krc.predict(x=self.x_train_iris[:100]), axis=1) thieved_preds = np.argmax( thieved_krc.predict(x=self.x_train_iris[:100]), axis=1) acc = np.sum(victim_preds == thieved_preds) / len(victim_preds) self.assertGreater(acc, 0.3) # Clean-up k.clear_session()
def test_failure_feature_vectors(self): attack_params = { "rotation_max": 22.5, "scale_min": 0.1, "scale_max": 1.0, "learning_rate": 5.0, "number_of_steps": 5, "batch_size": 10, } classifier = get_tabular_classifier_kr() attack = AdversarialPatch(classifier=classifier) attack.set_params(**attack_params) data = np.random.rand(10, 4) # Assert that value error is raised for feature vectors with self.assertRaises(ValueError) as context: attack.generate(data) self.assertIn("Feature vectors detected.", str(context.exception))