def get_DeepFool_adversarial(targeted, xs, classifier, batch_size): # Targeted DeepFool attack not possible if targeted: print('DeepFool attack cannot be targeted.') exit() ATTACK_BATCH = batch_size samples_range = int(xs.shape[0] / ATTACK_BATCH) wrap = KerasModelWrapper(classifier) attack = DeepFool(wrap, sess=K.get_session()) fgsm_params = { 'overshoot': 0.02, 'max_iter': 50, 'nb_candidate': 2, 'clip_min': -5, 'clip_max': 5 } attack_xs = attack.generate_np(xs[:ATTACK_BATCH, :, :, :], **fgsm_params) for ii in range(1, samples_range): print('ITER', ii) new_attack_batch = attack.generate_np( xs[ii * ATTACK_BATCH:(ii + 1) * ATTACK_BATCH, :, :, :], **fgsm_params) attack_xs = np.concatenate((attack_xs, new_attack_batch), axis=0) return attack_xs
class TestDeepFool(CleverHansTest): def setUp(self): super(TestDeepFool, self).setUp() self.sess = tf.Session() self.model = SimpleModel() self.attack = DeepFool(self.model, sess=self.sess) def test_generate_np_gives_adversarial_example(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) x_adv = self.attack.generate_np(x_val, over_shoot=0.02, max_iter=50, nb_candidate=2, clip_min=-5, clip_max=5) orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1) new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1) self.assertTrue(np.mean(orig_labs == new_labs) < 0.1) def test_generate_gives_adversarial_example(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1) x = tf.placeholder(tf.float32, x_val.shape) x_adv_p = self.attack.generate(x, over_shoot=0.02, max_iter=50, nb_candidate=2, clip_min=-5, clip_max=5) self.assertEqual(x_val.shape, x_adv_p.shape) x_adv = self.sess.run(x_adv_p, {x: x_val}) new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1) self.assertTrue(np.mean(orig_labs == new_labs) < 0.1) def test_generate_np_gives_clipped_adversarial_examples(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) x_adv = self.attack.generate_np(x_val, over_shoot=0.02, max_iter=50, nb_candidate=2, clip_min=-0.2, clip_max=0.3) self.assertTrue(-0.201 < np.min(x_adv)) self.assertTrue(np.max(x_adv) < .301)
class TestDeepFool(CleverHansTest): def setUp(self): super(TestDeepFool, self).setUp() import tensorflow as tf # The world's simplest neural network def my_model(x): W1 = tf.constant([[1.5, .3], [-2, 0.3]], dtype=tf.float32) h1 = tf.nn.sigmoid(tf.matmul(x, W1)) W2 = tf.constant([[-2.4, 1.2], [0.5, -2.3]], dtype=tf.float32) res = tf.matmul(h1, W2) return res self.sess = tf.Session() self.model = my_model self.attack = DeepFool(self.model, sess=self.sess) def test_generate_np_gives_adversarial_example(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) x_adv = self.attack.generate_np(x_val, over_shoot=0.02, max_iter=50, nb_candidate=2, clip_min=-5, clip_max=5) orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1) new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1) self.assertTrue(np.mean(orig_labs == new_labs) < 0.1) def test_generate_gives_adversarial_example(self): import tensorflow as tf x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1) x = tf.placeholder(tf.float32, x_val.shape) x_adv_p = self.attack.generate(x, over_shoot=0.02, max_iter=50, nb_candidate=2, clip_min=-5, clip_max=5) x_adv = self.sess.run(x_adv_p, {x: x_val}) new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1) self.assertTrue(np.mean(orig_labs == new_labs) < 0.1) def test_generate_np_gives_clipped_adversarial_examples(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) x_adv = self.attack.generate_np(x_val, over_shoot=0.02, max_iter=50, nb_candidate=2, clip_min=-0.2, clip_max=0.3) self.assertTrue(-0.201 < np.min(x_adv)) self.assertTrue(np.max(x_adv) < .301)
def untargeted_attack(model, images): sess = backend.get_session() wrap = KerasModelWrapper(model) df = DeepFool(wrap, back='tf', sess=sess) adv_x = df.generate_np(images) return adv_x
class TestDeepFool(CleverHansTest): def setUp(self): super(TestDeepFool, self).setUp() self.sess = tf.Session() self.model = SimpleModel() self.attack = DeepFool(self.model, sess=self.sess) def test_generate_np_gives_adversarial_example(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) x_adv = self.attack.generate_np(x_val, over_shoot=0.02, max_iter=50, nb_candidate=2, clip_min=-5, clip_max=5) orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1) new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1) self.assertTrue(np.mean(orig_labs == new_labs) < 0.1) def test_generate_gives_adversarial_example(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1) x = tf.placeholder(tf.float32, x_val.shape) x_adv_p = self.attack.generate(x, over_shoot=0.02, max_iter=50, nb_candidate=2, clip_min=-5, clip_max=5) x_adv = self.sess.run(x_adv_p, {x: x_val}) new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1) self.assertTrue(np.mean(orig_labs == new_labs) < 0.1) def test_generate_np_gives_clipped_adversarial_examples(self): x_val = np.random.rand(100, 2) x_val = np.array(x_val, dtype=np.float32) x_adv = self.attack.generate_np(x_val, over_shoot=0.02, max_iter=50, nb_candidate=2, clip_min=-0.2, clip_max=0.3) self.assertTrue(-0.201 < np.min(x_adv)) self.assertTrue(np.max(x_adv) < .301)
def query(self, X_train, Y_train, labeled_idx, amount): unlabeled_idx = get_unlabeled_idx(X_train, labeled_idx) unlabeled = X_train[unlabeled_idx] keras_wrapper = KerasModelWrapper(self.model) sess = K.get_session() deep_fool = DeepFool(keras_wrapper, sess=sess) deep_fool_params = { 'over_shoot': 0.02, 'clip_min': 0., 'clip_max': 1., 'nb_candidate': Y_train.shape[1], 'max_iter': 10 } true_predictions = np.argmax(self.model.predict(unlabeled, batch_size=256), axis=1) adversarial_predictions = np.copy(true_predictions) while np.sum(true_predictions != adversarial_predictions) < amount: adversarial_images = np.zeros(unlabeled.shape) for i in range(0, unlabeled.shape[0], 100): print("At {i} out of {n}".format(i=i, n=unlabeled.shape[0])) if i + 100 > unlabeled.shape[0]: adversarial_images[i:] = deep_fool.generate_np( unlabeled[i:], **deep_fool_params) else: adversarial_images[i:i + 100] = deep_fool.generate_np( unlabeled[i:i + 100], **deep_fool_params) pertubations = adversarial_images - unlabeled norms = np.linalg.norm(np.reshape(pertubations, (unlabeled.shape[0], -1)), axis=1) adversarial_predictions = np.argmax(self.model.predict( adversarial_images, batch_size=256), axis=1) norms[true_predictions == adversarial_predictions] = np.inf deep_fool_params['max_iter'] *= 2 selected_indices = np.argpartition(norms, amount)[:amount] del keras_wrapper del deep_fool gc.collect() return np.hstack((labeled_idx, unlabeled_idx[selected_indices]))
def get_DeepFool_samples(loaded_model, samples, max_iter): sess = backend.get_session() wrap = KerasModelWrapper(loaded_model) deepfool = DeepFool(wrap, sess=sess) deepfool_params = { 'max_iter': max_iter, 'clip_min': 0., 'clip_max': 1., 'nb_candidate': 10 } adv_x = deepfool.generate_np(samples, **deepfool_params) return adv_x
def mnist_deepfool_attack(sample, target, model, sess, targeted=True, attack_iterations=100): print('deepfool attack start') deepfool = DeepFool(model, sess=sess) deepfool_params = { 'over_shoot': 0.02, 'clip_min': 0., 'clip_max': 1., 'max_iter': 300, 'nb_candidate': 2, } adv_x = deepfool.generate_np(sample, **deepfool_params) return adv_x
X_train[st:st + batch_size].reshape(-1, 32 * 32 * 3) / 255, dtype=np.float) # sample = np.array([sample]) sess = keras.backend.get_session() model = KerasModelWrapper(model_keras) attack = DeepFool(model, sess=sess) # print(model.predict(panda.reshape(1, *panda.shape))) param = dict( nb_candidate=10, overshoot=0.01, #overshoot=0.0, max_iter=20, clip_min=0., clip_max=1.) advs = attack.generate_np(sample, **param) # plt.imsave("sample.png", advs[0]) adv_train.append(advs) preb = model_keras.predict(advs).argmax(axis=1).reshape( (sample.shape[0], )) y_sample = model_keras.predict(sample).argmax(axis=1).reshape( (sample.shape[0], )) success += (preb != y_sample).sum() print((preb != y_sample).sum()) print(success / data_size) new_train = np.concatenate(adv_train) np.save('new_train_deepfool', new_train) # plt.imshow(adv[0]) # plt.show()
if np.argmax(model.predict(x_test[i:i+1])) == np.argmax(y_test[i]): adv_inputs[j] = x_test[i] adv_labels[j] = y_test[i] # csvFile1.append([[i,j]]) j += 1 adv_inputs = adv_inputs[:100] adv_labels = adv_labels[:100] print("Legitimate test accuracy = %0.3f" % (j/y_test.shape[0])) print("Dataset of %d to be attacked." % adv_inputs.shape[0]) print(adv_inputs.shape, adv_labels.shape) # Attack wrap = KerasModelWrapper(model) deepfool = DeepFool(wrap, sess=sess) params = {} x_adv_1 = deepfool.generate_np(adv_inputs[:20], **params) x_adv_2 = deepfool.generate_np(adv_inputs[20:40], **params) x_adv_3 = deepfool.generate_np(adv_inputs[40:60], **params) x_adv_4 = deepfool.generate_np(adv_inputs[60:80], **params) x_adv_5 = deepfool.generate_np(adv_inputs[80:], **params) x_adv = np.concatenate((x_adv_1, x_adv_2, x_adv_3, x_adv_4, x_adv_5), axis=0) score = model.evaluate(x_adv, adv_labels, verbose=0) print('Adv. Test accuracy: %0.3f' % score[1]) # Initialize random choosing of adversarial images num_examples = 100 index_list = list(range(x_adv.shape[0])) import random random.seed(9123) random.shuffle(index_list)
def get_adversarial_version(self, x, y=None, eps=0.3, iterations=100, attack='FGSM', targeted=False, y_tar=None, clip_min=0.0, clip_max=1.0, nb_candidate=10, num_params=100): """ Desc: Caclulate the adversarial version for point x using FGSM x: matrix of n x input_shape samples y: matrix of n x input_label samples eps: used for FGSM attack: FGMS or CW """ if self.dataset == 'cifar10': model = KerasModelWrapper(self.model) else: model = KerasModelWrapper(self.model.model) if attack == 'CW-l2': K.set_learning_phase(0) # Instantiate a CW attack object cw = CarliniWagnerL2(model, sess=self.sess) cw_params = { 'batch_size': 10, 'confidence': 0, 'learning_rate': 1e-2, 'binary_search_steps': 5, 'max_iterations': iterations, 'abort_early': True, 'initial_const': 1e-4, 'clip_min': 0.0, 'clip_max': 1.0 } x_adv = cw.generate_np(x, **cw_params) elif attack == 'CW-l0': K.set_learning_phase(0) # Instantiate a CW attack object cw = CarliniWagnerL0(model, sess=self.sess) cw_params = { 'batch_size': 1, 'confidence': 0., 'learning_rate': 1e-2, 'binary_search_steps': 5, 'max_iterations': iterations, 'abort_early': True, 'initial_const': 1e-4, 'clip_min': 0.0, 'clip_max': 1.0 } x_adv = cw.generate_np(x, **cw_params) elif attack == 'DF': K.set_learning_phase(0) df = DeepFool(model, sess=self.sess) df_params = {'nb_candidate': nb_candidate} x_adv = df.generate_np(x, **df_params) elif attack == 'JSMA': K.set_learning_phase(0) jsma = SaliencyMapMethod(model, sess=self.sess) jsma_params = { 'theta': 1., 'gamma': 0.03, 'clip_min': clip_min, 'clip_max': clip_max, 'y_target': y_tar } x_adv = jsma.generate_np(x, **jsma_params) elif attack == 'FGSM': K.set_learning_phase(0) fgsm = FastGradientMethod(model, sess=self.sess) fgsm_params = { 'eps': 0.15, 'clip_min': clip_min, 'clip_max': clip_max, 'y_target': y_tar } x_adv = fgsm.generate_np(x, **fgsm_params) elif attack == 'BIM': K.set_learning_phase(0) fgsm = BasicIterativeMethod(model, sess=self.sess) fgsm_params = { 'eps': 0.015, 'eps_iter': 0.005, 'nb_iter': 100, 'clip_min': clip_min, 'clip_max': clip_max, 'y_target': y_tar } x_adv = fgsm.generate_np(x, **fgsm_params) return x_adv
def mnist_tutorial_deepfool(train_start=0, train_end=60000, #读60000训练 test_start=0,test_end=10000, #读10000测试 viz_enabled=True, nb_epochs=6, batch_size=128, nb_classes=2, source_samples=10, learning_rate=0.001, attack_iterations=100, model_path=os.path.join("models", "mnist")): """ MNIST tutorial for Deepfool's attack :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param viz_enabled: (boolean) activate plots of adversarial examples激活对抗例子 :param nb_epochs: number of epochs to train model(一个epoch指代所有的数据送入网络中完成一次前向计算及反向传播的过程。) :param batch_size: size of training batches :param nb_classes: number of output classes(输出几类) :param source_samples: number of test inputs to attack(测试输入用于攻击的数量) :param learning_rate: learning rate for training(学习率) :param model_path: path to the model file(文件路径) :param attack_iterations: 攻击迭代次数 :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies精确度报告 report = AccuracyReport() # MNIST-specific dimensions图像尺寸28*28*1 img_rows = 28 img_cols = 28 channels = 1 # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Create TF session sess = tf.Session() print("Created TensorFlow session.") set_log_level(logging.DEBUG) # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) y = tf.placeholder(tf.float32, shape=(None, 10)) # Define TF model graph model = make_basic_picklable_cnn() preds = model(x) print("Defined TensorFlow model graph.") ########################################################################### # Training the model using TensorFlow(构建训练模型) ########################################################################### # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'train_dir': os.path.join(*os.path.split(model_path)[:-1]), 'filename': os.path.split(model_path)[-1] } rng = np.random.RandomState([2018, 8, 9]) # check if we've trained before, and if we have, use that pre-trained model if os.path.exists(model_path+".meta"): tf_model_load(sess, model_path) else: model_train(sess, x, y, preds, X_train, Y_train, args=train_params, save=os.path.exists("models"), rng=rng) print("save success") # Evaluate the accuracy of the MNIST model on legitimate test examples eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params) assert X_test.shape[0] == test_end - test_start, X_test.shape print('Test accuracy on legitimate test examples: {0}'.format(accuracy)) report.clean_train_clean_eval = accuracy ########################################################################### # Craft adversarial examples using Carlini and Wagner's approach ########################################################################### print('Crafting ' + str(source_samples) + ' * ' + str(nb_classes-1) + ' adversarial examples') print("This could take some time ...") # Instantiate a DeepFool attack object deepfool = DeepFool(model, back='tf', sess=sess) idxs = [np.where(np.argmax(Y_test, axis=1) == i)[0][1] for i in range(10)] print("idxs:",idxs) # construct adv_inputs grid_shape = (nb_classes, 2, img_rows, img_cols, channels) grid_viz_data = np.zeros(grid_shape, dtype='f') print("grid_viz_data",grid_viz_data.shape) adv_inputs = X_test[idxs].reshape([-1,28,28,1]) deepfool_params = {'nb_candidate': 10, 'overshoot': 0.02, 'max_iter': attack_iterations, 'nb_classes': 10, 'clip_min': 0., 'clip_max': 1.} adv = deepfool.generate_np(adv_inputs, **deepfool_params) print("adv success") adv_accuracy = 1-model_eval(sess, x, y, preds, adv, Y_test[idxs], args={'batch_size': 10}) for j in range(10): grid_viz_data[j, 0] = adv_inputs[j] grid_viz_data[j, 1] = adv[j] print(grid_viz_data.shape) print('--------------------------------------') # Compute the number of adversarial examples that were successfully found print('Avg. rate of successful adv. examples {0:.4f}'.format(adv_accuracy)) report.clean_train_adv_eval = 1.-adv_accuracy # Compute the average distortion introduced by the algorithm percent_perturbed = np.mean(np.sum((adv - adv_inputs)**2, axis=(1, 2, 3))**.5) print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed)) # Close TF session sess.close() # Finally, block & display a grid of all the adversarial examples if viz_enabled: import matplotlib.pyplot as plt _ = grid_visual(grid_viz_data) return report
pred = np.argmax(keras_model.predict(x_test), axis=1) acc = np.mean(np.equal(pred, y_test)) print("The Test accuracy is: {}".format(acc)) #################################### Adversarial Attack (DF=X_train (30000 samples) ################################### wrap = KerasModelWrapper(keras_model) df = DeepFool(wrap, back='tf', sess=sess) df_params = { 'over_shoot': 0.09, 'max_iter': 10, 'clip_max': 1, 'clip_min': 0, 'nb_candidate': 10 } adv_x = df.generate_np(x_test, **df_params) adv_conf = keras_model.predict(adv_x) adv_pred = np.argmax(adv_conf, axis=1) adv_acc = np.mean(np.equal(adv_pred, y_test)) print("The adversarial accuracy is: {}".format(adv_acc)) ###################################### Original Image ########################################## x_sample = x_test[5].reshape(28, 28) plt.imshow(x_sample, cmap='Blues') plt.show() ###################################### Adversarial Image ######################################## adv_x_sample = adv_x[5].reshape(28, 28) plt.imshow(adv_x_sample, cmap='Blues') plt.show()