def test_check_params(self): ptc = get_image_classifier_pt(from_logits=True) with self.assertRaises(ValueError): _ = SaliencyMapMethod(ptc, gamma=-1) with self.assertRaises(ValueError): _ = SaliencyMapMethod(ptc, batch_size=-1) with self.assertRaises(ValueError): _ = SaliencyMapMethod(ptc, verbose="False")
def get_adversarial_examples(X, Y, model, nb_classes, attack=None): assert model is not None assert attack is not None art_classifier = SklearnClassifier(model=model, clip_values=(0, nb_classes)) attacker = None if attack == ATTACK.PGD: attacker = ProjectedGradientDescent(classifier=art_classifier, norm=np.inf, eps=0.2, eps_step=0.1, max_iter=3, targeted=False, num_random_init=0, batch_size=128) elif attack == ATTACK.DEEPFOOL: attacker = DeepFool(classifier=art_classifier, max_iter=5, epsilon=1e-6, nb_grads=3, batch_size=1) elif attack == ATTACK.FGSM: attacker = FastGradientMethod(classifier=art_classifier, norm=np.inf, eps=0.3, targeted=False, batch_size=128) elif attack == ATTACK.BIM: attacker = BasicIterativeMethod(classifier=art_classifier, eps=0.3, eps_step=0.1, targeted=False, batch_size=128) elif attack == ATTACK.JSMA: attacker = SaliencyMapMethod(classifier=art_classifier, theta=0.3, gamma=0.5, batch_size=128) elif attack == ATTACK.CW_L2: attacker = CarliniL2Method(classifier=art_classifier, learning_rate=0.1) elif attack == ATTACK.CW_Linf: attacker = CarliniLInfMethod(classifier=art_classifier, learning_rate=0.01) else: raise NotImplementedError(attack, 'is not implemented.') print( 'Generating [{}] adversarial examples, it will take a while...'.format( attack)) X_adv = attacker.generate(X, y=Y) del attacker return X_adv
def test_8_keras_iris_vector_unbounded(self): classifier = get_tabular_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channels_first=True) attack = SaliencyMapMethod(classifier, theta=1, verbose=False) x_test_iris_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_iris_adv).all())
def _jsma(model, data, labels, attack_args): theta = attack_args.get('theta', 0.15) gamma = attack_args.get('gamma', 0.5) batch_size = attack_args.get('batch_size', 1) attacker = SaliencyMapMethod(classifier=model, theta=theta, gamma=gamma, batch_size=batch_size) return attacker.generate(data, labels)
def test_4_pytorch_iris_vector(self): classifier = get_tabular_classifier_pt() attack = SaliencyMapMethod(classifier, theta=1, verbose=False) x_test_iris_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_iris_adv).all()) self.assertTrue((x_test_iris_adv <= 1).all()) self.assertTrue((x_test_iris_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_iris_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) accuracy = np.sum(preds_adv == np.argmax( self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with JSMA adversarial examples: %.2f%%", (accuracy * 100))
def test_6_scikitlearn(self): from sklearn.linear_model import LogisticRegression from sklearn.svm import SVC, LinearSVC from art.estimators.classification.scikitlearn import SklearnClassifier scikitlearn_test_cases = [ LogisticRegression(solver="lbfgs", multi_class="auto"), SVC(gamma="auto"), LinearSVC(), ] x_test_original = self.x_test_iris.copy() for model in scikitlearn_test_cases: classifier = SklearnClassifier(model=model, clip_values=(0, 1)) classifier.fit(x=self.x_test_iris, y=self.y_test_iris) attack = SaliencyMapMethod(classifier, theta=1, batch_size=128, verbose=False) x_test_iris_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_iris_adv).all()) self.assertTrue((x_test_iris_adv <= 1).all()) self.assertTrue((x_test_iris_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_iris_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) accuracy = np.sum(preds_adv == np.argmax( self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info( "Accuracy of " + classifier.__class__.__name__ + " on Iris with JSMA adversarial examples: " "%.2f%%", (accuracy * 100), ) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float( np.max(np.abs(x_test_original - self.x_test_iris))), 0.0, delta=0.00001)
def test_9_keras_mnist(self): x_test_original = self.x_test_mnist.copy() # Keras classifier classifier = get_image_classifier_kr() scores = classifier._model.evaluate(self.x_train_mnist, self.y_train_mnist) logger.info("[Keras, MNIST] Accuracy on training set: %.2f%%", (scores[1] * 100)) scores = classifier._model.evaluate(self.x_test_mnist, self.y_test_mnist) logger.info("[Keras, MNIST] Accuracy on test set: %.2f%%", (scores[1] * 100)) # targeted # Generate random target classes nb_classes = np.unique(np.argmax(self.y_test_mnist, axis=1)).shape[0] targets = np.random.randint(nb_classes, size=self.n_test) while (targets == np.argmax(self.y_test_mnist, axis=1)).any(): targets = np.random.randint(nb_classes, size=self.n_test) # Perform attack df = SaliencyMapMethod(classifier, theta=1, batch_size=100, verbose=False) x_test_adv = df.generate(self.x_test_mnist, y=to_categorical(targets, nb_classes)) self.assertFalse((self.x_test_mnist == x_test_adv).all()) self.assertFalse((0.0 == x_test_adv).all()) y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((self.y_test_mnist == y_pred).all()) accuracy = np.sum( np.argmax(y_pred, axis=1) == np.argmax(self.y_test_mnist, axis=1)) / self.n_test logger.info("Accuracy on adversarial examples: %.2f%%", (accuracy * 100)) # untargeted df = SaliencyMapMethod(classifier, theta=1, batch_size=100, verbose=False) x_test_adv = df.generate(self.x_test_mnist) self.assertFalse((self.x_test_mnist == x_test_adv).all()) self.assertFalse((0.0 == x_test_adv).all()) y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((self.y_test_mnist == y_pred).all()) accuracy = np.sum( np.argmax(y_pred, axis=1) == np.argmax(self.y_test_mnist, axis=1)) / self.n_test logger.info("Accuracy on adversarial examples: %.2f%%", (accuracy * 100)) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float( np.max(np.abs(x_test_original - self.x_test_mnist))), 0.0, delta=0.00001)
def craft(X, Y, art_classifier, attack=None, **attack_params): assert art_classifier is not None assert attack is not None attacker = None if attack == ATTACK.PGD: eps = attack_params.get('eps', 0.2) eps_step = attack_params.get('eps_step', eps / 5.) max_iter = attack_params.get('max_iter', 3) targeted = attack_params.get('targeted', False) batch_size = attack_params.get('batch_size', 128) attacker = ProjectedGradientDescent(classifier=art_classifier, norm=np.inf, eps=eps, eps_step=eps_step, max_iter=max_iter, targeted=targeted, num_random_init=0, batch_size=batch_size) elif attack == ATTACK.DEEPFOOL: eps = attack_params.get('eps', 1e-6) max_iter = attack_params.get('max_iter', 5) nb_grads = attack_params.get('nb_grads', 3) batch_size = attack_params.get('batch_size', 1) attacker = DeepFool(classifier=art_classifier, max_iter=max_iter, epsilon=eps, nb_grads=nb_grads, batch_size=batch_size) elif attack == ATTACK.FGSM: eps = attack_params.get('eps', 0.3) targeted = attack_params.get('targeted', False) batch_size = attack_params.get('batch_size', 128) attacker = FastGradientMethod(classifier=art_classifier, norm=np.inf, eps=eps, targeted=targeted, batch_size=batch_size) elif attack == ATTACK.BIM: eps = attack_params.get('eps', 0.3) eps_step = attack_params.get('eps_step', eps / 5.) norm = attack_params.get('norm', np.inf) targeted = attack_params.get('targeted', False) batch_size = attack_params.get('batch_size', 128) attacker = BasicIterativeMethod(classifier=art_classifier, norm=norm, eps=eps, eps_step=eps_step, targeted=targeted, batch_size=batch_size) elif attack == ATTACK.JSMA: theta = attack_params.get('theta', 0.3) gamma = attack_params.get('gamma', 0.5) batch_size = attack_params.get('batch_size', 128) attacker = SaliencyMapMethod(classifier=art_classifier, theta=theta, gamma=gamma, batch_size=batch_size) elif attack == ATTACK.CW_L2: lr = attack_params.get('lr', 0.1) bsearch_steps = attack_params.get('bsearch_steps', 10) attacker = CarliniL2Method(classifier=art_classifier, learning_rate=lr, binary_search_steps=bsearch_steps) elif attack == ATTACK.CW_Linf: lr = attack_params.get('lr', 0.01) attacker = CarliniLInfMethod(classifier=art_classifier, learning_rate=lr) else: raise NotImplementedError(attack, 'is not implemented.') print( 'Generating [{}] adversarial examples, it will take a while...'.format( attack)) X_adv = attacker.generate(X, y=Y) del attacker return X_adv
def test_pytorch_mnist(self): x_train_mnist = np.swapaxes(self.x_train_mnist, 1, 3).astype(np.float32) x_test_mnist = np.swapaxes(self.x_test_mnist, 1, 3).astype(np.float32) x_test_original = x_test_mnist.copy() # Create basic PyTorch model classifier = get_image_classifier_pt() scores = get_labels_np_array(classifier.predict(x_train_mnist)) accuracy = np.sum( np.argmax(scores, axis=1) == np.argmax(self.y_train_mnist, axis=1)) / self.n_train logger.info("[PyTorch, MNIST] Accuracy on training set: %.2f%%", (accuracy * 100)) scores = get_labels_np_array(classifier.predict(x_test_mnist)) accuracy = np.sum( np.argmax(scores, axis=1) == np.argmax(self.y_test_mnist, axis=1)) / self.n_test logger.info("\n[PyTorch, MNIST] Accuracy on test set: %.2f%%", (accuracy * 100)) # targeted # Generate random target classes nb_classes = np.unique(np.argmax(self.y_test_mnist, axis=1)).shape[0] targets = np.random.randint(nb_classes, size=self.n_test) while (targets == np.argmax(self.y_test_mnist, axis=1)).any(): targets = np.random.randint(nb_classes, size=self.n_test) # Perform attack df = SaliencyMapMethod(classifier, theta=1, batch_size=100) x_test_mnist_adv = df.generate(x_test_mnist, y=to_categorical(targets, nb_classes)) self.assertFalse((x_test_mnist == x_test_mnist_adv).all()) self.assertFalse((0.0 == x_test_mnist_adv).all()) y_pred = get_labels_np_array(classifier.predict(x_test_mnist_adv)) self.assertFalse((self.y_test_mnist == y_pred).all()) accuracy = np.sum( np.argmax(y_pred, axis=1) == np.argmax(self.y_test_mnist, axis=1)) / self.n_test logger.info("Accuracy on adversarial examples: %.2f%%", (accuracy * 100)) # untargeted df = SaliencyMapMethod(classifier, theta=1, batch_size=100) x_test_mnist_adv = df.generate(x_test_mnist) self.assertFalse((x_test_mnist == x_test_mnist_adv).all()) self.assertFalse((0.0 == x_test_mnist_adv).all()) y_pred = get_labels_np_array(classifier.predict(x_test_mnist_adv)) self.assertFalse((self.y_test_mnist == y_pred).all()) accuracy = np.sum( np.argmax(y_pred, axis=1) == np.argmax(self.y_test_mnist, axis=1)) / self.n_test logger.info("Accuracy on adversarial examples: %.2f%%", (accuracy * 100)) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float( np.max(np.abs(x_test_original - x_test_mnist))), 0.0, delta=0.00001)