def test_keras_mnist(self): (x_train, y_train), (x_test, y_test) = self.mnist x_test_original = x_test.copy() # Keras classifier classifier = get_classifier_kr() scores = classifier._model.evaluate(x_train, y_train) logger.info('[Keras, MNIST] Accuracy on training set: %.2f%%', (scores[1] * 100)) scores = classifier._model.evaluate(x_test, y_test) logger.info('[Keras, MNIST] Accuracy on test set: %.2f%%', (scores[1] * 100)) # targeted # Generate random target classes nb_classes = np.unique(np.argmax(y_test, axis=1)).shape[0] targets = np.random.randint(nb_classes, size=NB_TEST) while (targets == np.argmax(y_test, axis=1)).any(): targets = np.random.randint(nb_classes, size=NB_TEST) # Perform attack df = SaliencyMapMethod(classifier, theta=1, batch_size=100) x_test_adv = df.generate(x_test, y=to_categorical(targets, nb_classes)) self.assertFalse((x_test == x_test_adv).all()) self.assertFalse((0. == x_test_adv).all()) y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_test == y_pred).all()) accuracy = np.sum( np.argmax(y_pred, axis=1) == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on adversarial examples: %.2f%%', (accuracy * 100)) # untargeted df = SaliencyMapMethod(classifier, theta=1, batch_size=100) x_test_adv = df.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertFalse((0. == x_test_adv).all()) y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_test == y_pred).all()) accuracy = np.sum( np.argmax(y_pred, axis=1) == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on adversarial examples: %.2f%%', (accuracy * 100)) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
def atk_JSMA(x_train, x_test, y_train, y_test, classifier): #print('Create JSMA attack \n') adv_crafter = SaliencyMapMethod(classifier, theta=1) x_train_adv = adv_crafter.generate(x_train) x_test_adv = adv_crafter.generate(x_test) print("After JSMA Attack \n") evaluate(x_train, x_test, y_train, y_test, x_train_adv, x_test_adv, classifier) return x_test_adv, x_train_adv
def test_scikitlearn(self): from sklearn.linear_model import LogisticRegression from sklearn.svm import SVC, LinearSVC from art.classifiers.scikitlearn import ScikitlearnLogisticRegression, ScikitlearnSVC scikitlearn_test_cases = { LogisticRegression: ScikitlearnLogisticRegression } # , # SVC: ScikitlearnSVC, # LinearSVC: ScikitlearnSVC} (_, _), (x_test, y_test) = self.iris for (model_class, classifier_class) in scikitlearn_test_cases.items(): model = model_class() classifier = classifier_class(model=model, clip_values=(0, 1)) classifier.fit(x=x_test, y=y_test) attack = SaliencyMapMethod(classifier, theta=1, batch_size=128) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum( preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info( 'Accuracy of ' + classifier.__class__.__name__ + ' on Iris with JSMA adversarial examples: ' '%.2f%%', (acc * 100))
def test_keras_iris_vector_unbounded(self): classifier = get_tabular_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channel_index=1) attack = SaliencyMapMethod(classifier, theta=1) x_test_iris_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_iris_adv).all())
def test_iris_pt(self): (_, _), (x_test, y_test) = self.iris classifier = get_iris_classifier_pt() attack = SaliencyMapMethod(classifier, theta=1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on Iris with JSMA adversarial examples: %.2f%%', (acc * 100))
class SaliencyMapAttack(AdversarialAttack): def __init__(self, model, theta=0.1, gamma=1.0, batch_size=16): super().__init__(model=model) self._theta = theta self._gamma = gamma self._method = SaliencyMapMethod(classifier=self.model, theta=self._theta, gamma=self._gamma, batch_size=batch_size) def attack_method(self, x, y=None): params = {} if y is not None: params['y'] = y return self._method.generate(x=x, **params)
def test_pytorch_iris_vector(self): classifier = get_tabular_classifier_pt() attack = SaliencyMapMethod(classifier, theta=1) x_test_iris_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_iris_adv).all()) self.assertTrue((x_test_iris_adv <= 1).all()) self.assertTrue((x_test_iris_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_iris_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) accuracy = np.sum(preds_adv == np.argmax( self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info("Accuracy on Iris with JSMA adversarial examples: %.2f%%", (accuracy * 100))
def test_iris_k_unbounded(self): (_, _), (x_test, y_test) = self.iris classifier, _ = get_iris_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channel_index=1) attack = SaliencyMapMethod(classifier, theta=1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on Iris with JSMA adversarial examples: %.2f%%', (acc * 100))
def _test_mnist_untargeted(self, classifier): # Get MNIST (_, _), (x_test, y_test) = self.mnist x_test, y_test = x_test[:NB_TEST], y_test[:NB_TEST] df = SaliencyMapMethod(classifier, theta=1, batch_size=100) x_test_adv = df.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertFalse((0. == x_test_adv).all()) y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_test == y_pred).all()) acc = np.sum(np.argmax(y_pred, axis=1) == np.argmax( y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on adversarial examples: %.2f%%', (acc * 100))
def GetAttackers(classifier, x_test, attacker_name): """ Function: Load classifier and generate adversarial samples """ t_start = time.time() if attacker_name == "SMM": attacker = SaliencyMapMethod(classifier=classifier, theta=2) elif attacker_name == "PGD": attacker = ProjectedGradientDescent(classifier=classifier, norm=2, eps=1, eps_step=0.5) else: raise ValueError("Please get the right attacker's name for the input.") test_adv = attacker.generate(x_test) dt = time.time() - t_start return test_adv, dt
def test_scikitlearn(self): from sklearn.linear_model import LogisticRegression from sklearn.svm import SVC, LinearSVC from art.classifiers.scikitlearn import SklearnClassifier scikitlearn_test_cases = [ LogisticRegression(solver="lbfgs", multi_class="auto"), SVC(gamma="auto"), LinearSVC(), ] x_test_original = self.x_test_iris.copy() for model in scikitlearn_test_cases: classifier = SklearnClassifier(model=model, clip_values=(0, 1)) classifier.fit(x=self.x_test_iris, y=self.y_test_iris) attack = SaliencyMapMethod(classifier, theta=1, batch_size=128) x_test_iris_adv = attack.generate(self.x_test_iris) self.assertFalse((self.x_test_iris == x_test_iris_adv).all()) self.assertTrue((x_test_iris_adv <= 1).all()) self.assertTrue((x_test_iris_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_iris_adv), axis=1) self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all()) accuracy = np.sum(preds_adv == np.argmax( self.y_test_iris, axis=1)) / self.y_test_iris.shape[0] logger.info( "Accuracy of " + classifier.__class__.__name__ + " on Iris with JSMA adversarial examples: " "%.2f%%", (accuracy * 100), ) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float( np.max(np.abs(x_test_original - self.x_test_iris))), 0.0, delta=0.00001)
def test_scikitlearn(self): from sklearn.linear_model import LogisticRegression from art.classifiers.scikitlearn import ScikitlearnLogisticRegression scikitlearn_test_cases = { LogisticRegression: ScikitlearnLogisticRegression } # , # SVC: ScikitlearnSVC, # LinearSVC: ScikitlearnSVC} (_, _), (x_test, y_test) = self.iris x_test_original = x_test.copy() for (model_class, classifier_class) in scikitlearn_test_cases.items(): model = model_class() classifier = classifier_class(model=model, clip_values=(0, 1)) classifier.fit(x=x_test, y=y_test) attack = SaliencyMapMethod(classifier, theta=1, batch_size=128) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) accuracy = np.sum( preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info( 'Accuracy of ' + classifier.__class__.__name__ + ' on Iris with JSMA adversarial examples: ' '%.2f%%', (accuracy * 100)) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float( np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
def GetAttackers(classifier, x_test, attacker_name): """ Function: Load classifier and generate adversarial samples """ t_start = time.time() if attacker_name == "SMM": attacker = SaliencyMapMethod(classifier=classifier, theta=.5, gamma=1.) elif attacker_name == "PGD": attacker = ProjectedGradientDescent(classifier=classifier, norm=1, eps=1, eps_step=0.5, max_iter=100, targeted=False, num_random_init=0, batch_size=1) else: raise ValueError("Please get the right attacker's name for the input.") test_adv = attacker.generate(x_test) dt = time.time() - t_start return test_adv, dt
def _test_mnist_targeted(self, classifier): # Get MNIST (_, _), (x_test, y_test) = self.mnist x_test, y_test = x_test[:NB_TEST], y_test[:NB_TEST] # Generate random target classes nb_classes = np.unique(np.argmax(y_test, axis=1)).shape[0] targets = np.random.randint(nb_classes, size=NB_TEST) while (targets == np.argmax(y_test, axis=1)).any(): targets = np.random.randint(nb_classes, size=NB_TEST) # Perform attack df = SaliencyMapMethod(classifier, theta=1, batch_size=100) x_test_adv = df.generate(x_test, y=to_categorical(targets, nb_classes)) self.assertFalse((x_test == x_test_adv).all()) self.assertFalse((0. == x_test_adv).all()) y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_test == y_pred).all()) acc = np.sum(np.argmax(y_pred, axis=1) == np.argmax( y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on adversarial examples: %.2f%%', (acc * 100))
def test_pytorch_mnist(self): x_train_mnist = np.swapaxes(self.x_train_mnist, 1, 3).astype(np.float32) x_test_mnist = np.swapaxes(self.x_test_mnist, 1, 3).astype(np.float32) x_test_original = x_test_mnist.copy() # Create basic PyTorch model classifier = get_image_classifier_pt() scores = get_labels_np_array(classifier.predict(x_train_mnist)) accuracy = np.sum( np.argmax(scores, axis=1) == np.argmax(self.y_train_mnist, axis=1)) / self.n_train logger.info("[PyTorch, MNIST] Accuracy on training set: %.2f%%", (accuracy * 100)) scores = get_labels_np_array(classifier.predict(x_test_mnist)) accuracy = np.sum( np.argmax(scores, axis=1) == np.argmax(self.y_test_mnist, axis=1)) / self.n_test logger.info("\n[PyTorch, MNIST] Accuracy on test set: %.2f%%", (accuracy * 100)) # targeted # Generate random target classes nb_classes = np.unique(np.argmax(self.y_test_mnist, axis=1)).shape[0] targets = np.random.randint(nb_classes, size=self.n_test) while (targets == np.argmax(self.y_test_mnist, axis=1)).any(): targets = np.random.randint(nb_classes, size=self.n_test) # Perform attack df = SaliencyMapMethod(classifier, theta=1, batch_size=100) x_test_mnist_adv = df.generate(x_test_mnist, y=to_categorical(targets, nb_classes)) self.assertFalse((x_test_mnist == x_test_mnist_adv).all()) self.assertFalse((0.0 == x_test_mnist_adv).all()) y_pred = get_labels_np_array(classifier.predict(x_test_mnist_adv)) self.assertFalse((self.y_test_mnist == y_pred).all()) accuracy = np.sum( np.argmax(y_pred, axis=1) == np.argmax(self.y_test_mnist, axis=1)) / self.n_test logger.info("Accuracy on adversarial examples: %.2f%%", (accuracy * 100)) # untargeted df = SaliencyMapMethod(classifier, theta=1, batch_size=100) x_test_mnist_adv = df.generate(x_test_mnist) self.assertFalse((x_test_mnist == x_test_mnist_adv).all()) self.assertFalse((0.0 == x_test_mnist_adv).all()) y_pred = get_labels_np_array(classifier.predict(x_test_mnist_adv)) self.assertFalse((self.y_test_mnist == y_pred).all()) accuracy = np.sum( np.argmax(y_pred, axis=1) == np.argmax(self.y_test_mnist, axis=1)) / self.n_test logger.info("Accuracy on adversarial examples: %.2f%%", (accuracy * 100)) # Check that x_test has not been modified by attack and classifier self.assertAlmostEqual(float( np.max(np.abs(x_test_original - x_test_mnist))), 0.0, delta=0.00001)
def JSMA(victims): adv_crafter = SaliencyMapMethod(model) finalVictims = adv_crafter.generate(x = victims) return finalVictims
def saliency_map(classifier, inputs, true_targets, epsilon): adv_crafter = SaliencyMapMethod(classifier, theta=epsilon, gamma=0.11) x_test_adv = adv_crafter.generate(x=inputs) return x_test_adv