def test_subsetscan_detector(self): (x_train, y_train), (x_test, y_test), _, _ = load_dataset('mnist') x_train, y_train = x_train[:NB_TRAIN], y_train[:NB_TRAIN] x_test, y_test = x_test[:NB_TEST], y_test[:NB_TEST] # Keras classifier classifier, _ = get_classifier_kr() # Generate adversarial samples: attacker = FastGradientMethod(classifier, eps=0.5) x_train_adv = attacker.generate(x_train) x_test_adv = attacker.generate(x_test) # Compile training data for detector: x_train_detector = np.concatenate((x_train, x_train_adv), axis=0) bgd = x_train clean = x_test anom = x_test_adv detector = SubsetScanningDetector(classifier, bgd, layer=1) _, _, dpwr = detector.scan(clean, clean) self.assertAlmostEqual(dpwr, 0.5) _, _, dpwr = detector.scan(clean, anom) self.assertGreater(dpwr, 0.5) _, _, dpwr = detector.scan(clean, x_train_detector, 85, 15) self.assertGreater(dpwr, 0.5)
def fgsm(clf, x_train, x_test, epsilon=0.1): from art.attacks.fast_gradient import FastGradientMethod epsilon = .1 # Maximum perturbation fgsm_adv_crafter = FastGradientMethod(clf, eps=epsilon) x_test_fgsm_adv = fgsm_adv_crafter.generate(x=x_test) x_train_fgsm_adv = fgsm_adv_crafter.generate(x=x_train) return x_train_fgsm_adv, x_test_fgsm_adv
def test_with_defences(self): # Get MNIST (x_train, y_train), (x_test, y_test) = self.mnist x_train, y_train = x_train[:NB_TRAIN], y_train[:NB_TRAIN] x_test, y_test = x_test[:NB_TEST], y_test[:NB_TEST] # Get the ready-trained Keras model model = self.classifier_k._model classifier = KerasClassifier((0, 1), model, defences='featsqueeze1') attack = FastGradientMethod(classifier, eps=1) x_train_adv = attack.generate(x_train) x_test_adv = attack.generate(x_test) self.assertFalse((x_train == x_train_adv).all()) self.assertFalse((x_test == x_test_adv).all()) train_y_pred = get_labels_np_array(classifier.predict(x_train_adv)) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_train == train_y_pred).all()) self.assertFalse((y_test == test_y_pred).all()) preds = classifier.predict(x_train_adv) acc = np.sum(np.argmax(preds, axis=1) == np.argmax( y_train, axis=1)) / y_train.shape[0] print( '\nAccuracy on adversarial train examples with feature squeezing: %.2f%%' % (acc * 100)) preds = classifier.predict(x_test_adv) acc = np.sum(np.argmax(preds, axis=1) == np.argmax( y_test, axis=1)) / y_test.shape[0] print('\naccuracy on adversarial test examples: %.2f%%' % (acc * 100))
def test_with_defences(self): (x_train, y_train), (x_test, y_test) = self.mnist # Get the ready-trained Keras model model = self.classifier_k._model fs = FeatureSqueezing(bit_depth=1, clip_values=(0, 1)) classifier = KerasClassifier(model=model, clip_values=(0, 1), defences=fs) # Wrap the classifier classifier = QueryEfficientBBGradientEstimation(classifier, 20, 1 / 64., round_samples=1 / 255.) attack = FastGradientMethod(classifier, eps=1) x_train_adv = attack.generate(x_train) x_test_adv = attack.generate(x_test) self.assertFalse((x_train == x_train_adv).all()) self.assertFalse((x_test == x_test_adv).all()) train_y_pred = get_labels_np_array(classifier.predict(x_train_adv)) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_train == train_y_pred).all()) self.assertFalse((y_test == test_y_pred).all()) preds = classifier.predict(x_train_adv) acc = np.sum(np.argmax(preds, axis=1) == np.argmax(y_train, axis=1)) / y_train.shape[0] logger.info('Accuracy on adversarial train examples with feature squeezing and limited query info: %.2f%%', (acc * 100)) preds = classifier.predict(x_test_adv) acc = np.sum(np.argmax(preds, axis=1) == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on adversarial test examples with feature squeezing and limited query info: %.2f%%', (acc * 100))
def main(argv): if len(argv) < 2: sys.exit("Not enough arguments provided.") global network_definition_filename, weights_filename, dataset_filename i = 1 while i <= 8: arg = str(argv[i]) print(arg) if arg == "--datax": dataset_filenamex = os.path.join(os.environ["DATA_DIR"], str(argv[i + 1])) if arg == "--datay": dataset_filenamey = os.path.join(os.environ["DATA_DIR"], str(argv[i + 1])) if arg == "--weights": weights_filename = os.path.join(os.environ["DATA_DIR"], str(argv[i + 1])) if arg == "--epsilon": epsilon = float(argv[i + 1]) i += 2 print("dataset_x:", dataset_filenamex) print("dataset_y:", dataset_filenamey) print("weights:", weights_filename) # load & compile model device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') model = ThreeLayerCNN().to(device) model.load_state_dict(torch.load(weights_filename)) loss_fn = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # create pytorch classifier classifier = PyTorchClassifier((0, 1), model, loss_fn, optimizer, (1, 3, 64, 64), 2) # load data set x = np.load(dataset_filenamex) y = np.loadtxt(dataset_filenamey) # craft adversarial samples using FGSM crafter = FastGradientMethod(classifier, eps=epsilon) x_samples = crafter.generate(x) # obtain all metrics (robustness score, perturbation metric, reduction in confidence) metrics, y_pred_orig, y_pred_adv = get_metrics(model, x, x_samples, y) print("metrics:", metrics) report_file = os.path.join(os.environ["RESULT_DIR"], "report.txt") with open(report_file, "w") as report: report.write(json.dumps(metrics)) adv_samples_file = os.path.join(os.environ["RESULT_DIR"], "adv_samples") print("adversarial samples saved to: ", adv_samples_file) np.savez(adv_samples_file, x_original=x, x_adversarial=x_samples, y=y)
def test_without_defences(self): (x_train, y_train), (x_test, y_test) = self.mnist # Get the ready-trained Keras model and wrap it in query efficient gradient estimator wrapper classifier = QueryEfficientBBGradientEstimation(self.classifier_k, 20, 1 / 64., round_samples=1 / 255.) attack = FastGradientMethod(classifier, eps=1) x_train_adv = attack.generate(x_train) x_test_adv = attack.generate(x_test) self.assertFalse((x_train == x_train_adv).all()) self.assertFalse((x_test == x_test_adv).all()) train_y_pred = get_labels_np_array(classifier.predict(x_train_adv)) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_train == train_y_pred).all()) self.assertFalse((y_test == test_y_pred).all()) preds = classifier.predict(x_train_adv) acc = np.sum(np.argmax(preds, axis=1) == np.argmax(y_train, axis=1)) / y_train.shape[0] logger.info('Accuracy on adversarial train examples with limited query info: %.2f%%', (acc * 100)) preds = classifier.predict(x_test_adv) acc = np.sum(np.argmax(preds, axis=1) == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on adversarial test examples with limited query info: %.2f%%', (acc * 100))
def _test_with_defences(self, custom_activation=False): # Get MNIST (x_train, y_train), (x_test, y_test) = self.mnist # Get the ready-trained Keras model model = self.classifier_k._model classifier = KerasClassifier((0, 1), model, defences='featsqueeze1', custom_activation=custom_activation) attack = FastGradientMethod(classifier, eps=1) x_train_adv = attack.generate(x_train) x_test_adv = attack.generate(x_test) self.assertFalse((x_train == x_train_adv).all()) self.assertFalse((x_test == x_test_adv).all()) train_y_pred = get_labels_np_array(classifier.predict(x_train_adv)) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_train == train_y_pred).all()) self.assertFalse((y_test == test_y_pred).all()) preds = classifier.predict(x_train_adv) acc = np.sum(np.argmax(preds, axis=1) == np.argmax(y_train, axis=1)) / y_train.shape[0] logger.info('Accuracy on adversarial train examples with feature squeezing: %.2f%%', (acc * 100)) preds = classifier.predict(x_test_adv) acc = np.sum(np.argmax(preds, axis=1) == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on adversarial test examples: %.2f%%', (acc * 100))
def test_with_preprocessing(self): session = tf.Session() k.set_session(session) comp_params = { "loss": 'categorical_crossentropy', "optimizer": 'adam', "metrics": ['accuracy'] } # get MNIST batch_size, nb_train, nb_test = 100, 1000, 100 (X_train, Y_train), (X_test, Y_test), _, _ = load_mnist() X_train, Y_train = X_train[:nb_train], Y_train[:nb_train] X_test, Y_test = X_test[:nb_test], Y_test[:nb_test] im_shape = X_train[0].shape # get classifier classifier = CNN(im_shape, act="relu", defences=["featsqueeze1"]) classifier.compile(comp_params) classifier.fit(X_train, Y_train, epochs=1, batch_size=batch_size, verbose=0) scores = classifier.evaluate(X_train, Y_train) print("\naccuracy on training set: %.2f%%" % (scores[1] * 100)) scores = classifier.evaluate(X_test, Y_test) print("\naccuracy on test set: %.2f%%" % (scores[1] * 100)) attack_params = { "verbose": 0, "clip_min": 0., "clip_max": 1., "eps": 1. } attack = FastGradientMethod(classifier, session) X_train_adv = attack.generate(X_train, **attack_params) X_test_adv = attack.generate(X_test, **attack_params) self.assertFalse((X_train == X_train_adv).all()) self.assertFalse((X_test == X_test_adv).all()) train_y_pred = get_labels_np_array(classifier.predict(X_train_adv)) test_y_pred = get_labels_np_array(classifier.predict(X_test_adv)) self.assertFalse((Y_train == train_y_pred).all()) self.assertFalse((Y_test == test_y_pred).all()) scores = classifier.evaluate(X_train_adv, Y_train) print('\naccuracy on adversarial train examples: %.2f%%' % (scores[1] * 100)) scores = classifier.evaluate(X_test_adv, Y_test) print('\naccuracy on adversarial test examples: %.2f%%' % (scores[1] * 100))
def test_multi_attack_mnist(self): """ Test the adversarial trainer using two attackers: FGSM and DeepFool. The source and target models of the attack are two CNNs on MNIST trained for 5 epochs. FGSM and DeepFool both generate the attack images on the same source classifier. The test cast check if accuracy on adversarial samples increases after adversarially training the model. :return: None """ session = tf.Session() k.set_session(session) # Load MNIST (x_train, y_train), (x_test, y_test), _, _ = load_dataset('mnist') x_train, y_train, x_test, y_test = x_train[: NB_TRAIN], y_train[: NB_TRAIN], x_test[: NB_TEST], y_test[: NB_TEST] im_shape = x_train[0].shape # Create and fit target classifier comp_params = { 'loss': 'categorical_crossentropy', 'optimizer': 'adam', 'metrics': ['accuracy'] } params = {'epochs': 5, 'batch_size': BATCH_SIZE} classifier_tgt = CNN(im_shape, dataset='mnist') classifier_tgt.compile(comp_params) classifier_tgt.fit(x_train, y_train, **params) # Create source classifier classifier_src = CNN(im_shape, dataset='mnist') classifier_src.compile(comp_params) classifier_tgt.fit(x_train, y_train, **params) # Create FGSM and DeepFool attackers adv1 = FastGradientMethod(classifier_src, session) adv2 = DeepFool(classifier_src, session) x_adv = np.vstack((adv1.generate(x_test), adv2.generate(x_test))) y_adv = np.vstack((y_test, y_test)) print(y_adv.shape) acc = classifier_tgt.evaluate(x_adv, y_adv) # Perform adversarial training adv_trainer = AdversarialTrainer(classifier_tgt, [adv1, adv2]) adv_trainer.fit(x_train, y_train, **params) # Evaluate that accuracy on adversarial sample has improved acc_adv_trained = adv_trainer.classifier.evaluate(x_adv, y_adv) self.assertTrue(acc_adv_trained >= acc)
def test_binary_activation_detector(self): """ Test the binary activation detector end-to-end. :return: """ # Get MNIST (x_train, y_train), (x_test, y_test), _, _ = load_mnist() x_train, y_train = x_train[:NB_TRAIN], y_train[:NB_TRAIN] x_test, y_test = x_test[:NB_TEST], y_test[:NB_TEST] # Keras classifier classifier, _ = get_classifier_kr() # Generate adversarial samples: attacker = FastGradientMethod(classifier, eps=0.1) x_train_adv = attacker.generate(x_train[:NB_TRAIN]) x_test_adv = attacker.generate(x_test[:NB_TRAIN]) # Compile training data for detector: x_train_detector = np.concatenate((x_train[:NB_TRAIN], x_train_adv), axis=0) y_train_detector = np.concatenate((np.array([[1, 0]] * NB_TRAIN), np.array([[0, 1]] * NB_TRAIN)), axis=0) # Create a simple CNN for the detector activation_shape = classifier.get_activations(x_test[:1], 0).shape[1:] number_outputs = 2 model = Sequential() model.add(MaxPooling2D(pool_size=(2, 2), input_shape=activation_shape)) model.add(Flatten()) model.add(Dense(number_outputs, activation='softmax')) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(lr=0.01), metrics=['accuracy']) # Create detector and train it. # Detector consider activations at layer=0: detector = BinaryActivationDetector(classifier=classifier, detector=KerasClassifier(model=model, clip_values=(0, 1), use_logits=False), layer=0) detector.fit(x_train_detector, y_train_detector, nb_epochs=2, batch_size=128) # Apply detector on clean and adversarial test data: test_detection = np.argmax(detector.predict(x_test), axis=1) test_adv_detection = np.argmax(detector.predict(x_test_adv), axis=1) # Assert there is at least one true positive and negative nb_true_positives = len(np.where(test_adv_detection == 1)[0]) nb_true_negatives = len(np.where(test_detection == 0)[0]) logger.debug('Number of true positives detected: %i', nb_true_positives) logger.debug('Number of true negatives detected: %i', nb_true_negatives) self.assertGreater(nb_true_positives, 0) self.assertGreater(nb_true_negatives, 0)
def test_iris_clipped(self): (_, _), (x_test, y_test) = self.iris classifier, _ = get_iris_classifier_kr() classifier = QueryEfficientBBGradientEstimation(classifier, 20, 1 / 64., round_samples=1 / 255.) # Test untargeted attack attack = FastGradientMethod(classifier, eps=.1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on Iris with limited query info: %.2f%%', (acc * 100))
def test_iris_unbounded(self): (_, _), (x_test, y_test) = self.iris classifier, _ = get_iris_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channel_index=1) classifier = QueryEfficientBBGradientEstimation(classifier, 20, 1 / 64., round_samples=1 / 255.) attack = FastGradientMethod(classifier, eps=1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv > 1).any()) self.assertTrue((x_test_adv < 0).any()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on Iris with limited query info: %.2f%%', (acc * 100))
def test_iris_k_unbounded(self): (_, _), (x_test, y_test) = self.iris classifier, _ = get_iris_classifier_kr() # Recreate a classifier without clip values classifier = KerasClassifier(model=classifier._model, use_logits=False, channel_index=1) attack = FastGradientMethod(classifier, eps=1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv > 1).any()) self.assertTrue((x_test_adv < 0).any()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on Iris with FGM adversarial examples: %.2f%%', (acc * 100))
def _test_mnist_targeted(self, classifier): # Get MNIST (_, _), (x_test, y_test) = self.mnist # Test FGSM with np.inf norm attack = FastGradientMethod(classifier, eps=1.0, targeted=True) pred_sort = classifier.predict(x_test).argsort(axis=1) y_test_adv = np.zeros((x_test.shape[0], 10)) for i in range(x_test.shape[0]): y_test_adv[i, pred_sort[i, -2]] = 1.0 x_test_adv = attack.generate(x_test, minimal=True, eps_step=0.01, eps=1.0, y=y_test_adv) self.assertFalse((x_test == x_test_adv).all()) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertEqual(y_test_adv.shape, test_y_pred.shape) self.assertTrue((y_test_adv == test_y_pred).sum() >= x_test.shape[0] // 2)
def test_shared_model_mnist(self): """ Test the adversarial trainer using one FGSM attacker. The source and target models of the attack are the same CNN on MNIST trained for 5 epochs. The test cast check if accuracy on adversarial samples increases after adversarially training the model. :return: None """ session = tf.Session() k.set_session(session) # Load MNIST (x_train, y_train), (x_test, y_test), _, _ = load_dataset('mnist') x_train, y_train, x_test, y_test = x_train[: NB_TRAIN], y_train[: NB_TRAIN], x_test[: NB_TEST], y_test[: NB_TEST] im_shape = x_train[0].shape # Create and fit classifier params = {'epochs': 5, 'batch_size': BATCH_SIZE} classifier = CNN(im_shape, dataset='mnist') classifier.compile({ 'loss': 'categorical_crossentropy', 'optimizer': 'adam', 'metrics': ['accuracy'] }) classifier.fit(x_train, y_train, **params) # Create FGSM attacker adv = FastGradientMethod(classifier, session) x_adv = adv.generate(x_test) acc = classifier.evaluate(x_adv, y_test) # Perform adversarial training adv_trainer = AdversarialTrainer(classifier, adv) adv_trainer.fit(x_train, y_train, **params) # Evaluate that accuracy on adversarial sample has improved acc_adv_trained = adv_trainer.classifier.evaluate(x_adv, y_test) self.assertTrue(acc_adv_trained >= acc)
def _test_with_defences(self, custom_activation=False): from art.defences import FeatureSqueezing # Get MNIST (x_train, y_train), (x_test, y_test) = self.mnist # Get the ready-trained Keras model model = self.classifier_k._model fs = FeatureSqueezing(bit_depth=1, clip_values=(0, 1)) classifier = KerasClassifier(model=model, clip_values=(0, 1), defences=fs, custom_activation=custom_activation) attack = FastGradientMethod(classifier, eps=1, batch_size=128) x_train_adv = attack.generate(x_train) x_test_adv = attack.generate(x_test) self.assertFalse((x_train == x_train_adv).all()) self.assertFalse((x_test == x_test_adv).all()) train_y_pred = get_labels_np_array(classifier.predict(x_train_adv)) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_train == train_y_pred).all()) self.assertFalse((y_test == test_y_pred).all()) preds = classifier.predict(x_train_adv) acc = np.sum(np.argmax(preds, axis=1) == np.argmax( y_train, axis=1)) / y_train.shape[0] logger.info( 'Accuracy on MNIST with FGM adversarial train examples with feature squeezing: %.2f%%', (acc * 100)) preds = classifier.predict(x_test_adv) acc = np.sum(np.argmax(preds, axis=1) == np.argmax( y_test, axis=1)) / y_test.shape[0] logger.info( 'Accuracy on MNIST with FGM adversarial test examples: %.2f%%', (acc * 100))
def test_iris_pt(self): (_, _), (x_test, y_test) = self.iris classifier = get_iris_classifier_pt() # Test untargeted attack attack = FastGradientMethod(classifier, eps=.1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all()) acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on Iris with FGM adversarial examples: %.2f%%', (acc * 100)) # Test targeted attack targets = random_targets(y_test, nb_classes=3) attack = FastGradientMethod(classifier, targeted=True, eps=.1, batch_size=128) x_test_adv = attack.generate(x_test, **{'y': targets}) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1).all()) self.assertTrue((x_test_adv >= 0).all()) preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1) self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any()) acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / y_test.shape[0] logger.info('Success rate of targeted FGM on Iris: %.2f%%', (acc * 100))
def _test_backend_mnist(self, classifier): # Get MNIST (x_train, y_train), (x_test, y_test) = self.mnist # Test FGSM with np.inf norm attack = FastGradientMethod(classifier, eps=1) x_test_adv = attack.generate(x_test, **{'batch_size': 2}) x_train_adv = attack.generate(x_train, **{'batch_size': 4}) self.assertFalse((x_train == x_train_adv).all()) self.assertFalse((x_test == x_test_adv).all()) train_y_pred = get_labels_np_array(classifier.predict(x_train_adv)) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_train == train_y_pred).all()) self.assertFalse((y_test == test_y_pred).all()) acc = np.sum( np.argmax(train_y_pred, axis=1) == np.argmax( y_train, axis=1)) / y_train.shape[0] logger.info('Accuracy on adversarial train examples: %.2f%%', (acc * 100)) acc = np.sum( np.argmax(test_y_pred, axis=1) == np.argmax( y_test, axis=1)) / y_test.shape[0] logger.info('Accuracy on adversarial test examples: %.2f%%', (acc * 100)) # Test minimal perturbations attack_params = {"minimal": True, "eps_step": .1, "eps_max": 1.} x_train_adv_min = attack.generate(x_train, **attack_params) x_test_adv_min = attack.generate(x_test, **attack_params) self.assertFalse((x_train_adv_min == x_train_adv).all()) self.assertFalse((x_test_adv_min == x_test_adv).all()) self.assertFalse((x_train == x_train_adv_min).all()) self.assertFalse((x_test == x_test_adv_min).all()) train_y_pred = get_labels_np_array(classifier.predict(x_train_adv_min)) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv_min)) self.assertFalse((y_train == train_y_pred).all()) self.assertFalse((y_test == test_y_pred).all()) acc = np.sum( np.argmax(train_y_pred, axis=1) == np.argmax( y_train, axis=1)) / y_train.shape[0] logger.info( 'Accuracy on adversarial train examples with minimal perturbation: %.2f%%', (acc * 100)) acc = np.sum( np.argmax(test_y_pred, axis=1) == np.argmax( y_test, axis=1)) / y_test.shape[0] logger.info( 'Accuracy on adversarial test examples with minimal perturbation: %.2f%%', (acc * 100)) # L_1 norm attack = FastGradientMethod(classifier, eps=1, norm=1) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_test == test_y_pred).all()) acc = np.sum( np.argmax(test_y_pred, axis=1) == np.argmax( y_test, axis=1)) / y_test.shape[0] logger.info( 'Accuracy on adversarial test examples with L1 norm: %.2f%%', (acc * 100)) # L_2 norm attack = FastGradientMethod(classifier, eps=1, norm=2) x_test_adv = attack.generate(x_test) self.assertFalse((x_test == x_test_adv).all()) test_y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((y_test == test_y_pred).all()) acc = np.sum( np.argmax(test_y_pred, axis=1) == np.argmax( y_test, axis=1)) / y_test.shape[0] logger.info( 'Accuracy on adversarial test examples with L2 norm: %.2f%%', (acc * 100))
def test_binary_input_detector(self): """ Test the binary input detector end-to-end. :return: """ # Initialize a tf session session = tf.Session() k.set_session(session) # Get MNIST batch_size, nb_train, nb_test = 100, 1000, 10 (x_train, y_train), (x_test, y_test), _, _ = load_mnist() x_train, y_train = x_train[:NB_TRAIN], y_train[:NB_TRAIN] x_test, y_test = x_test[:NB_TEST], y_test[:NB_TEST] input_shape = x_train.shape[1:] nb_classes = 10 # Create simple CNN model = Sequential() model.add( Conv2D(4, kernel_size=(5, 5), activation='relu', input_shape=input_shape)) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Flatten()) model.add(Dense(nb_classes, activation='softmax')) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(lr=0.01), metrics=['accuracy']) # Create classifier and train it: classifier = KerasClassifier((0, 1), model, use_logits=False) classifier.fit(x_train, y_train, nb_epochs=5, batch_size=128) # Generate adversarial samples: attacker = FastGradientMethod(classifier, eps=0.1) x_train_adv = attacker.generate(x_train[:nb_train]) x_test_adv = attacker.generate(x_test[:nb_test]) # Compile training data for detector: x_train_detector = np.concatenate((x_train[:nb_train], x_train_adv), axis=0) y_train_detector = np.concatenate( (np.array([[1, 0]] * nb_train), np.array([[0, 1]] * nb_train)), axis=0) # Create a simple CNN for the detector. # Note: we use the same architecture as for the classifier, except for the number of outputs (=2) model = Sequential() model.add( Conv2D(4, kernel_size=(5, 5), activation='relu', input_shape=input_shape)) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Flatten()) model.add(Dense(2, activation='softmax')) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(lr=0.01), metrics=['accuracy']) # Create detector and train it: detector = BinaryInputDetector( KerasClassifier((0, 1), model, use_logits=False)) detector.fit(x_train_detector, y_train_detector, nb_epochs=2, batch_size=128) # Apply detector on clean and adversarial test data: test_detection = np.argmax(detector(x_test), axis=1) test_adv_detection = np.argmax(detector(x_test_adv), axis=1) # Assert there is at least one true positive and negative: nb_true_positives = len(np.where(test_adv_detection == 1)[0]) nb_true_negatives = len(np.where(test_detection == 0)[0]) self.assertTrue(nb_true_positives > 0) self.assertTrue(nb_true_negatives > 0)
eps_step=0.01, max_iter=100, batch_size=batch_size) x_test_adv = adv_crafter_pgd_100.generate(x=test_dataset_array) # Test the classifier on adversarial exmaples predictions = cifar_classifier.predict(x_test_adv) accuracy = np.sum( np.argmax(predictions, axis=1) == test_label_dataset_array) / len( test_label_dataset_array) print('Accuracy after PGD-100 attack: {}%'.format(accuracy * 100)) # FGSM adv_crafter_fgsm = FastGradientMethod(cifar_classifier, eps=epsilon, eps_step=0.01, batch_size=batch_size) x_test_adv = adv_crafter_fgsm.generate(x=test_dataset_array) # Test the classifier on adversarial exmaples predictions = cifar_classifier.predict(x_test_adv) accuracy = np.sum( np.argmax(predictions, axis=1) == test_label_dataset_array) / len( test_label_dataset_array) print('Accuracy after FGSM attack: {}%'.format(accuracy * 100)) # Deepfool adv_crafter_deepfool = DeepFool(cifar_classifier, batch_size=batch_size) x_test_adv = adv_crafter_deepfool.generate(x=test_dataset_array) predictions = cifar_classifier.predict(x_test_adv)
def main(argv): if len(argv) < 2: sys.exit("Not enough arguments provided.") global network_definition_filename, weights_filename, dataset_filename i = 1 while i <= 8: arg = str(argv[i]) print(arg) if arg == "--data": dataset_filename = os.path.join(os.environ["DATA_DIR"], str(argv[i + 1])) if arg == "--networkdefinition": network_definition_filename = os.path.join(os.environ["DATA_DIR"], str(argv[i + 1])) if arg == "--weights": weights_filename = os.path.join(os.environ["DATA_DIR"], str(argv[i + 1])) if arg == "--epsilon": epsilon = float(argv[i + 1]) i += 2 print("dataset : ", dataset_filename) print("network definition : ", network_definition_filename) print("weights : ", weights_filename) # load & compile model json_file = open(network_definition_filename, 'r') model_json = json_file.read() json_file.close() model = model_from_json(model_json) model.load_weights(weights_filename) comp_params = { 'loss': 'categorical_crossentropy', 'optimizer': 'adam', 'metrics': ['accuracy'] } model.compile(**comp_params) # create keras classifier classifier = KerasClassifier((0, 1), model) # load data set pf = np.load(dataset_filename) x = pf['x_test'] y = pf['y_test'] # pre-process numpy array x = np.expand_dims(x, axis=3) x = x.astype('float32') / 255 y = np_utils.to_categorical(y, 10) # craft adversarial samples using FGSM crafter = FastGradientMethod(classifier, eps=epsilon) x_samples = crafter.generate(x) # obtain all metrics (robustness score, perturbation metric, reduction in confidence) metrics = get_metrics(model, x, x_samples, y) print("metrics : ", metrics) report_file = os.path.join(os.environ["RESULT_DIR"], "report.txt") with open(report_file, "w") as report: report.write(json.dumps(metrics)) adv_samples_file = os.path.join(os.environ["RESULT_DIR"], 'adv_samples') print("adversarial samples saved to : ", adv_samples_file) np.savez(adv_samples_file, x_original=x, x_adversarial=x_samples, y=y)
optimizer='adam', metrics=['accuracy']) classifier = KerasClassifier((min_, max_), model=model) classifier.fit(x_train, y_train, nb_epochs=5, batch_size=128) # Evaluate the classifier on the test set preds = np.argmax(classifier.predict(x_test), axis=1) acc = np.sum(preds == np.argmax(y_test, axis=1)) / y_test.shape[0] print("\nTest accuracy: %.2f%%" % (acc * 100)) # Craft adversarial samples ################ FGSM; epsilon = .1 # Maximum perturbation adv_crafter = FastGradientMethod(classifier) x_test_adv = adv_crafter.generate(x=x_test, eps=epsilon) # Evaluate the classifier on the adversarial examples preds = np.argmax(classifier.predict(x_test_adv), axis=1) acc = np.sum(preds == np.argmax(y_test, axis=1)) / y_test.shape[0] print("\nTest accuracy on adversarial sample: %.2f%%" % (acc * 100)) # grab a particular example to play wit a = (preds != np.argmax(y_test, axis=1)) nat_img = x_test[a] adv_img = x_test_adv[a] adv_nse = adv_img - nat_img adv_prd = preds[a] # compute variance and plot (some) example(s) adv_var = np.sqrt(np.var(adv_nse) / np.var(nat_img)) adv_plt = np.column_stack([ nat_img[0].reshape(28, 28), adv_nse[0].reshape(28, 28),
# predictions = mnist_classifier.predict(x_test_adv) # accuracy = np.sum(np.argmax(predictions, axis=1) == test_label_dataset_array) / len(test_label_dataset_array) # print('Accuracy after PGD-20 attack: {}%'.format(accuracy * 100)) # PGD-100 adv_crafter_pgd_100 = ProjectedGradientDescent(mnist_classifier, max_iter=100, batch_size=batch_size) x_test_adv = adv_crafter_pgd_100.generate(x=test_dataset_array) # Test the classifier on adversarial exmaples predictions = mnist_classifier.predict(x_test_adv) accuracy = np.sum(np.argmax(predictions, axis=1) == test_label_dataset_array) / len(test_label_dataset_array) print('Accuracy after PGD-100 attack: {}%'.format(accuracy * 100)) # FGSM adv_crafter_fgsm = FastGradientMethod(mnist_classifier, eps=epsilon, batch_size=batch_size) x_test_adv = adv_crafter_fgsm.generate(x=test_dataset_array) # Test the classifier on adversarial exmaples predictions = mnist_classifier.predict(x_test_adv) accuracy = np.sum(np.argmax(predictions, axis=1) == test_label_dataset_array) / len(test_label_dataset_array) print('Accuracy after FGSM attack: {}%'.format(accuracy * 100)) # DeepFool adv_crafter_deepfool = CarliniLInfMethod(mnist_classifier, batch_size=batch_size) x_test_adv = adv_crafter_deepfool.generate(x=test_dataset_array) predictions = mnist_classifier.predict(x_test_adv) accuracy = np.sum(np.argmax(predictions, axis=1) == test_label_dataset_array) / len(test_label_dataset_array) print('Accuracy after DeepFool attack: {}%'.format(accuracy * 100))
def run_ssc(test_object, outs): print('To run ssc\n') f_results, layer_functions, cover_layers, _ = ssc_setup(test_object, outs) d_advs = [] f = open(f_results, "a") f.write( '#ssc runs; #test cases; #adversarial examples; is feasible; is top-1 adversarial example; is top-x adversarial example; condition feature size; L infinity distance; L0 distance; decision layer index; dec feature; #condition layer neurons; new labels; original labels; coverage; local coverage\n' ) f.close() tot_decs = 0 if test_object.layer_indices == None: for i in range(1, len(cover_layers)): if i == 1 and is_input_layer(test_object.dnn.layers[0]): continue else: csp = cover_layers[i].layer.input.shape dsp = cover_layers[i].ssc_map.shape if is_dense_layer( cover_layers[i].layer) or not (csp[1] == dsp[1] and csp[2] == dsp[2]): tot_decs += cover_layers[i].ssc_map.size else: ks = cover_layers[i].layer.kernel_size sp = cover_layers[i].ssc_map.shape tot_decs += ((sp[1] - ks[0] + 1) * (sp[2] - ks[1] + 1) * sp[3]) else: print(test_object.layer_indices, test_object.feature_indices) for i in range(1, len(cover_layers)): if cover_layers[i].layer_index in test_object.layer_indices: print('****', i) csp = cover_layers[i].layer.input.shape dsp = cover_layers[i].ssc_map.shape if is_dense_layer( cover_layers[i].layer) or not (csp[1] == dsp[1] and csp[2] == dsp[2]): tmp_decs = cover_layers[i].ssc_map.size else: ks = cover_layers[i].layer.kernel_size dsp = cover_layers[i].ssc_map.shape tmp_decs = ((dsp[1] - ks[0] + 1) * (dsp[2] - ks[1] + 1) * dsp[3]) if is_conv_layer(cover_layers[i].layer): if not test_object.feature_indices == None: print('**', tmp_decs) tmp_decs = tmp_decs * ( len(test_object.feature_indices) * 1.0 / dsp[3]) print('**', tmp_decs) tot_decs += tmp_decs print('tot_decs', tot_decs) tot_coverage = 0.0 ## define a global attacker classifier = KerasClassifier((MIN, -MIN), model=test_object.dnn) adv_crafter = FastGradientMethod(classifier) test_cases = [] adversarials = [] count = 0 while True: dec_layer_index, dec_pos = get_ssc_next(cover_layers, test_object.layer_indices, test_object.feature_indices) cover_layers[dec_layer_index].ssc_map.itemset(dec_pos, False) if dec_layer_index == 1 and is_input_layer(test_object.dnn.layers[0]): continue ### cond_layer = cover_layers[dec_layer_index - 1] dec_layer = cover_layers[dec_layer_index] cond_cover = np.zeros(cond_layer.ssc_map.shape, dtype=bool) ### if is_padding(dec_pos, dec_layer, cond_layer): continue print('dec_layer_index', cover_layers[dec_layer_index].layer_index) tot_conds = cond_cover.size if is_conv_layer(cond_layer.layer): csp = cond_layer.layer.input.shape dsp = cond_layer.ssc_map.shape if (csp[1] == dsp[1] and csp[2] == dsp[2]): ks = cond_layer.layer.kernel_size dsp = cond_layer.ssc_map.shape tot_decs = ((dsp[1] - ks[0] + 1) * (dsp[2] - ks[1] + 1) * dsp[3]) non_increasing = 0 step_coverage = 0 while not (step_coverage >= 1.0 or non_increasing >= 10): count += 1 d_min, d_norm, new_image, old_image, old_labels, cond_diff_map = ssc_search( test_object, layer_functions, cond_layer, None, dec_layer, dec_pos, adv_crafter) print('d_min is', d_min, 'd_norm is', d_norm) feasible = ( d_min <= test_object.cond_ratio * cond_layer.ssc_map.size or d_min == 1) top1_adv_flag = False top5_adv_flag = False y1s = [] y2s = [] y1_flag = False y2_flag = False labels = test_object.labels l0_d = None top_classes = test_object.top_classes inp_ub = test_object.inp_ub found_new = True if feasible: cond_cover = np.logical_or(cond_cover, cond_diff_map) covered = np.count_nonzero(cond_cover) new_step_coverage = covered * 1.0 / tot_conds if new_step_coverage == step_coverage: non_increasing += 1 found_new = False else: non_increasing = 0 step_coverage = new_step_coverage if feasible and found_new: test_cases.append((new_image, old_image)) if inp_ub == 255: new_image = new_image.astype('uint8') old_image = old_image.astype('uint8') diff_image = np.abs(new_image - old_image) else: new_image_ = new_image * 255.0 / inp_ub old_image_ = old_image * 255.0 / inp_ub new_image_ = new_image_.astype('uint8') old_image_ = old_image_.astype('uint8') diff_image = np.abs(new_image_ - old_image_) l0_d = np.count_nonzero(diff_image) / (new_image.size * 1.0) y1s = (np.argsort( test_object.dnn.predict(np.array([new_image ]))))[0][-top_classes:] y2s = (np.argsort( test_object.dnn.predict(np.array([old_image ]))))[0][-top_classes:] if y1s[top_classes - 1] != y2s[top_classes - 1]: top1_adv_flag = True if labels == None: labels = old_labels for label in labels: if label in y1s: y1_flag = True if label in y2s: y2_flag = True if y1_flag != y2_flag: top5_adv_flag = True if top5_adv_flag: print('found an adversarial example') adversarials.append((new_image, old_image)) save_adversarial_examples([ new_image / (inp_ub * 1.0), '{0}-adv-{1}'.format( len(adversarials), y1s[top_classes - 1]) ], [ old_image / (inp_ub * 1.0), '{0}-original-{1}'.format( len(adversarials), y2s[top_classes - 1]) ], [ diff_image / (255 * 1.0), '{0}-diff'.format(len(adversarials)) ], f_results.split('/')[0]) adv_flag = True d_advs.append(d_norm) if len(d_advs) % 100 == 0: print_adversarial_distribution( d_advs, f_results.replace('.txt', '') + '-adversarial-distribution.txt') else: print("not feasible") print('f_results: ', f_results) f = open(f_results, "a") f.write( '{0} {1} {2} {3} {4} {5} {6} {7} {8} {9} {10} {11} {12} {13} {14} {15}\n' .format(count, len(test_cases), len(adversarials), feasible, top1_adv_flag, top5_adv_flag, d_min, d_norm, l0_d, dec_layer.layer_index, dec_pos, cond_layer.ssc_map.size, y1s, y2s, tot_coverage + step_coverage / tot_decs, step_coverage)) f.close() ####### if not feasible: break ####### tot_coverage += step_coverage / tot_decs ## todo: this is a shortcut if not np.any(cover_layers[dec_layer_index].ssc_map): print( 'all decision features at layer {0} have been covered'.format( dec_layer.layer_index)) sys.exit(0)
def mcdc(x, dnn, aveImg_binary, mcdc_cond_ratio=0.2, max_v=255, lb_v=-125.5, ub_v=125.5, opt=True, num=None, tot_iters=1000): x_test = np.array([x]) raw_data = raw_datat(x_test, None) test_object = test_objectt(dnn, raw_data, 'ssc', 'linf') test_object.cond_ratio = mcdc_cond_ratio adv_object = adv_objectt(max_v, lb_v, ub_v) predictResults = dnn.predict(np.array([x]), verbose=1) res = np.argmax(predictResults) f_results, layer_functions, cover_layers, _ = ssc_setup( test_object, '/tmp/') d_advs = [] f = open(f_results, "a") f.write( '#ssc runs; #test cases; #adversarial examples; is feasible; is top-1 adversarial example; is top-x adversarial example; condition feature size; L infinity distance; L0 distance; decision layer index; dec feature; #condition layer neurons; new labels; original labels; coverage; local coverage\n' ) f.close() if not (num is None): new_images = [] tot_decs = 0 if test_object.layer_indices == None: for i in range(1, len(cover_layers)): if i == 1 and is_input_layer(test_object.dnn.layers[0]): continue else: csp = cover_layers[i].layer.input.shape dsp = cover_layers[i].ssc_map.shape if is_dense_layer( cover_layers[i].layer) or not (csp[1] == dsp[1] and csp[2] == dsp[2]): tot_decs += cover_layers[i].ssc_map.size else: ks = cover_layers[i].layer.kernel_size sp = cover_layers[i].ssc_map.shape tot_decs += ((sp[1] - ks[0] + 1) * (sp[2] - ks[1] + 1) * sp[3]) else: print(test_object.layer_indices, test_object.feature_indices) for i in range(1, len(cover_layers)): if cover_layers[i].layer_index in test_object.layer_indices: print('****', i) csp = cover_layers[i].layer.input.shape dsp = cover_layers[i].ssc_map.shape if is_dense_layer( cover_layers[i].layer) or not (csp[1] == dsp[1] and csp[2] == dsp[2]): tmp_decs = cover_layers[i].ssc_map.size else: ks = cover_layers[i].layer.kernel_size dsp = cover_layers[i].ssc_map.shape tmp_decs = ((dsp[1] - ks[0] + 1) * (dsp[2] - ks[1] + 1) * dsp[3]) if is_conv_layer(cover_layers[i].layer): if not test_object.feature_indices == None: tmp_decs = tmp_decs * ( len(test_object.feature_indices) * 1.0 / dsp[3]) tot_decs += tmp_decs tot_coverage = 0.0 ## define a global attacker #classifier=KerasClassifier((MIN, -MIN), model=test_object.dnn) classifier = KerasClassifier(test_object.dnn) adv_crafter = FastGradientMethod(classifier) test_cases = [] adversarials = [] count = 0 while count < tot_iters: dec_layer_index, dec_pos = get_ssc_next(cover_layers) cover_layers[dec_layer_index].ssc_map.itemset(dec_pos, False) if dec_layer_index == 1 and is_input_layer(test_object.dnn.layers[0]): continue #print (dec_layer_index, dec_pos) ### cond_layer = cover_layers[dec_layer_index - 1] dec_layer = cover_layers[dec_layer_index] cond_cover = np.zeros(cond_layer.ssc_map.shape, dtype=bool) ### if is_padding(dec_pos, dec_layer, cond_layer, False): continue tot_conds = cond_cover.size if is_conv_layer(cond_layer.layer): csp = cond_layer.layer.input.shape dsp = cond_layer.ssc_map.shape if (csp[1] == dsp[1] and csp[2] == dsp[2]): ks = cond_layer.layer.kernel_size dsp = cond_layer.ssc_map.shape tot_decs = ((dsp[1] - ks[0] + 1) * (dsp[2] - ks[1] + 1) * dsp[3]) non_increasing = 0 step_coverage = 0 count += 1 d_min, d_norm, new_image, old_image, old_labels, cond_diff_map = ssc_search( test_object, layer_functions, cond_layer, None, dec_layer, dec_pos, adv_crafter, adv_object) #print ('d_min is', d_min, 'd_norm is', d_norm) cond_ratio = test_object.cond_ratio feasible = (d_min <= cond_ratio * cond_layer.ssc_map.size or d_min == 1) if feasible: new_predictResults = dnn.predict(np.array([new_image]), verbose=1) new_res = np.argmax(new_predictResults) #print ('####', res, new_res, x.shape) if res == new_res: continue ## to optimise the adversarial example if opt: for i in range(0, len(x)): simple_x = x.copy() for ii in range(0, i + 1): simple_x[ii] = new_image[ii] simple_predictResults = dnn.predict(np.array([simple_x]), batch_size=5000, verbose=1) simple_res = np.argmax(simple_predictResults) if simple_res == res: continue #for ii in range(0, i+1): # plt.imshow(simple_x[ii],cmap='gray') # #plt.imsave('new_{0}.png'.format(ii),simple_x[ii],cmap='gray') # plt.show() # plt.imshow(x[ii],cmap='gray') # plt.show() ## #plt.imsave('origin_{0}.png'.format(ii),x[ii],cmap='gray') if num is None: return True, simple_x else: new_images.append(simple_x) else: if num is None: return True, new_image else: new_images.append(new_image) if not (num is None): if len(new_images) >= num: return True, np.array(new_images) if (num is None): return False, None else: return False, np.array(new_images)
def attack(self, model=None, attack_str=""): imgs = self._load_images(attack_str, self._test_or_val_dataset) if self._test_or_val_dataset == "_x_test_set_": X = self.__data.x_test Y = self.__data.y_test else: X = self.__data.x_val Y = self.__data.y_val if type(imgs) != type(None): print('\n{0} adversarial examples using {1} attack loaded...\n'. format(self.__dataset, self.__attack)) return imgs if type(model) == type(None): model = self.surrogate_model.fit(self.__data.x_train, self.__data.y_train, verbose=1, epochs=self.__epochs, batch_size=128) wrap = KerasClassifier((0., 1.), model=self.surrogate_model) else: wrap = KerasClassifier((0., 1.), model=model) if self.__attack == 'FGSM': print('\nCrafting adversarial examples using FGSM attack...\n') fgsm = FastGradientMethod(wrap) if self.__data.dataset_name == 'MNIST': x_adv_images = fgsm.generate(x=X[self.idx_adv][:self._length], eps=0.2) else: x_adv_images = fgsm.generate(x=X[self.idx_adv][:self._length], eps=0.025) path = os.path.join( self._attack_dir, self.__dataset.lower() + self._test_or_val_dataset + "fgsm.pkl") helpers.save_pkl(x_adv_images, path) elif self.__attack.startswith("CW"): print('\nCrafting adversarial examples using CW attack...\n') cw = CarliniL2Method(wrap, confidence=0.0, targeted=False, binary_search_steps=1, learning_rate=0.2, initial_const=10, max_iter=100) x_adv_images = cw.generate(X[self.idx_adv][:self._length]) path = os.path.join( self._attack_dir, self.__dataset.lower() + self._test_or_val_dataset + "cw.pkl") helpers.save_pkl(x_adv_images, path) elif self.__attack == 'BIM': print('\nCrafting adversarial examples using BIM attack...\n') if self.__dataset == 'MNIST': bim = BasicIterativeMethod(wrap, eps=0.25, eps_step=0.2, max_iter=100, norm=np.inf) if self.__dataset == 'CIFAR': bim = BasicIterativeMethod(wrap, eps=0.025, eps_step=0.01, max_iter=1000, norm=np.inf) x_adv_images = bim.generate(x=X[self.idx_adv][:self._length]) path = os.path.join( self._attack_dir, self.__dataset.lower() + self._test_or_val_dataset + "bim.pkl") helpers.save_pkl(x_adv_images, path) elif self.__attack == 'DEEPFOOL': print('\nCrafting adversarial examples using DeepFool attack...\n') deepfool = DeepFool(wrap) x_adv_images = deepfool.generate(x=X[self.idx_adv][:self._length]) path = os.path.join( self._attack_dir, self.__dataset.lower() + self._test_or_val_dataset + "deepfool.pkl") helpers.save_pkl(x_adv_images, path) return x_adv_images
wfile.write("Model used for crafting the adversarial examples is in " + MODEL_PATH) v_print("Adversarials crafted with", args.adv_method, "on", MODEL_PATH, "will be saved in", SAVE_ADV) if args.adv_method in ['fgsm', "vat", "rnd_fgsm"]: eps_ranges = { 'fgsm': [e / 10 for e in range(1, 11)], 'rnd_fgsm': [e / 10 for e in range(1, 11)], 'vat': [1.5, 2.1, 5, 7, 10] } if args.adv_method in ["fgsm", "rnd_fgsm"]: adv_crafter = FastGradientMethod(classifier, sess=session) else: adv_crafter = VirtualAdversarialMethod(classifier, sess=session) for eps in eps_ranges[args.adv_method]: if args.adv_method == "rnd_fgsm": x_train = np.clip( X_train + alpha * np.sign(np.random.randn(*X_train.shape)), min_, max_) x_test = np.clip( X_test + alpha * np.sign(np.random.randn(*X_test.shape)), min_, max_) e = eps - alpha else: x_train = X_train
def run_svc(test_object, outs): print('To run svc\n') f_results, layer_functions, cover_layers, activations = ssc_setup( test_object, outs) ## define a global attacker classifier = KerasClassifier((MIN, -MIN), model=test_object.dnn) adv_crafter = FastGradientMethod(classifier) test_cases = [] adversarials = [] count = 0 while True: dec_layer_index, dec_pos = get_ssc_next(cover_layers) if dec_layer_index == 1 and is_input_layer(test_object.dnn.layers[0]): continue print('dec_layer_index', clayers[dec_layer_index].layer_index) ### cond_layer = cover_layers[dec_layer_index - 1] dec_layer = cover_layers[dec_layer_index] cond_cover = np.ones(cond_layer.ssc_map.shape, dtype=bool) ### ## to check if dec_pos is a padding dec_pos_unravel = None osp = dec_layer.ssc_map.shape dec_pos_unravel = np.unravel_index(dec_pos, osp) if is_conv_layer(dec_layer.layer): Weights = dec_layer.layer.get_weights() weights = Weights[0] biases = Weights[1] I = 0 J = dec_pos_unravel[1] K = dec_pos_unravel[2] L = dec_pos_unravel[3] kernel_size = dec_layer.layer.kernel_size try: for II in range(0, kernel_size[0]): for JJ in range(0, kernel_size[1]): for KK in range(0, weights.shape[2]): try_tmp = cond_layer.ssc_map[0][J + II][K + JJ][KK] except: #print ('dec neuron is a padding') continue cond_pos = np.random.randint(0, cond_cover.size) print('cond, dec layer index: ', cond_layer.layer_index, dec_layer.layer_index) print('dec_layer_index: ', clayers[dec_layer_index].layer_index) count += 1 dec_ub = dec_layer.ubs.item(dec_pos) + 0.001 #for act in activations[dec_layer.layer_index]: # v=act.item(dec_pos) # if v>dec_ub: dec_ub=v print('dec_ub: ', dec_ub) d_min, d_norm, new_image, old_image = svc_search( test_object, layer_functions, cond_layer, cond_pos, dec_layer, dec_pos, adv_crafter, dec_ub) print('d_min is', d_min, 'd_norm is', d_norm) feasible = (d_min <= test_object.cond_ratio * cond_layer.ssc_map.size or d_min == 1) top1_adv_flag = False top5_adv_flag = False top5b_adv_flag = False y1s = [] y2s = [] y1_flag = False y2_flag = False labels = test_object.labels #[555, 920] l0_d = None top_classes = test_object.top_classes inp_ub = test_object.inp_ub if feasible: test_cases.append((new_image, old_image)) if inp_ub == 255: new_image = new_image.astype('uint8') old_image = old_image.astype('uint8') diff_image = np.abs(new_image - old_image) l0_d = np.count_nonzero(diff_image) / (new_image.size * 1.0) y1s = (np.argsort(test_object.dnn.predict(np.array( [new_image]))))[0][-top_classes:] y2s = (np.argsort(test_object.dnn.predict(np.array( [old_image]))))[0][-top_classes:] if y1s[top_classes - 1] != y2s[top_classes - 1]: top1_adv_flag = True if not y1s[top_classes - 1] in y2s: top5b_adv_flag = True for label in labels: if label in y1s: y1_flag = True if label in y2s: y2_flag = True if y1_flag != y2_flag: top5_adv_flag = True if top5_adv_flag: print('found an adversarial example') adversarials.append((new_image, old_image)) save_an_image( new_image / (inp_ub * 1.0), '{0}-adv-{1}.png'.format(len(adversarials), y1s[top_classes - 1]), f_results.split('/')[0]) save_an_image( old_image / (inp_ub * 1.0), '{0}-original-{1}.png'.format(len(adversarials), y2s[top_classes - 1]), f_results.split('/')[0]) save_an_image(diff_image / (inp_ub * 1.0), '{0}-diff.png'.format(len(adversarials)), f_results.split('/')[0]) adv_flag = True else: print("not feasible") print('f_results: ', f_results) f = open(f_results, "a") f.write( '{0} {1} {2} {3} {4} {5} {6} {7} {8} {9} {10} {11} {12} {13}\n'. format(count, len(test_cases), len(adversarials), feasible, top1_adv_flag, top5_adv_flag, top5b_adv_flag, d_min, d_norm, l0_d, dec_layer.layer_index, cond_layer.ssc_map.size, y1s, y2s)) f.close()
input_shape=x_train.shape[1:])) model.add(Conv2D(64, (3, 3), activation='relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(128, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(10, activation='softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) classifier = KerasClassifier(model=model, clip_values=(min_, max_)) classifier.fit(x_train, y_train, nb_epochs=5, batch_size=128) # Evaluate the classifier on the test set preds = np.argmax(classifier.predict(x_test), axis=1) acc = np.sum(preds == np.argmax(y_test, axis=1)) / y_test.shape[0] print("\nTest accuracy: %.2f%%" % (acc * 100)) # Craft adversarial samples with FGSM epsilon = .1 # Maximum perturbation adv_crafter = FastGradientMethod(classifier, eps=epsilon) x_test_adv = adv_crafter.generate(x=x_test) # Evaluate the classifier on the adversarial examples preds = np.argmax(classifier.predict(x_test_adv), axis=1) acc = np.sum(preds == np.argmax(y_test, axis=1)) / y_test.shape[0] print("\nTest accuracy on adversarial sample: %.2f%%" % (acc * 100))
def robustness_check(object_storage_url, object_storage_username, object_storage_password, data_bucket_name, result_bucket_name, model_id, feature_testset_path='processed_data/X_test.npy', label_testset_path='processed_data/y_test.npy', clip_values=(0, 1), nb_classes=2, input_shape=(1, 3, 64, 64), model_class_file='model.py', model_class_name='model', LossFn='', Optimizer='', epsilon=0.2): url = re.compile(r"https?://") cos = Minio(url.sub('', object_storage_url), access_key=object_storage_username, secret_key=object_storage_password) dataset_filenamex = "X_test.npy" dataset_filenamey = "y_test.npy" weights_filename = "model.pt" model_files = model_id + '/_submitted_code/model.zip' cos.fget_object(data_bucket_name, feature_testset_path, dataset_filenamex) cos.fget_object(data_bucket_name, label_testset_path, dataset_filenamey) cos.fget_object(result_bucket_name, model_id + '/' + weights_filename, weights_filename) cos.fget_object(result_bucket_name, model_files, 'model.zip') # Load PyTorch model definition from the source code. zip_ref = zipfile.ZipFile('model.zip', 'r') zip_ref.extractall('model_files') zip_ref.close() modulename = 'model_files.' + model_class_file.split('.')[0].replace( '-', '_') ''' We required users to define where the model class is located or follow some naming convention we have provided. ''' model_class = getattr(importlib.import_module(modulename), model_class_name) # load & compile model device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') model = model_class().to(device) model.load_state_dict(torch.load(weights_filename, map_location=device)) # Define Loss and optimizer function for the PyTorch model if LossFn: loss_fn = eval(LossFn) else: loss_fn = torch.nn.CrossEntropyLoss() if Optimizer: optimizer = eval(Optimizer) else: optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # create pytorch classifier classifier = PyTorchClassifier(clip_values, model, loss_fn, optimizer, input_shape, nb_classes) # load test dataset x = np.load(dataset_filenamex) y = np.load(dataset_filenamey) # craft adversarial samples using FGSM crafter = FastGradientMethod(classifier, eps=epsilon) x_samples = crafter.generate(x) # obtain all metrics (robustness score, perturbation metric, reduction in confidence) metrics, y_pred_orig, y_pred_adv = get_metrics(model, x, x_samples, y) print("metrics:", metrics) return metrics