def test_krclassifier(self): """ Second test with the KerasClassifier. :return: """ # Initialize a tf session session = tf.Session() k.set_session(session) # Get MNIST batch_size, nb_train, nb_test = 100, 1000, 10 (x_train, y_train), (x_test, y_test), _, _ = load_mnist() x_train, y_train = x_train[:nb_train], y_train[:nb_train] x_test, y_test = x_test[:nb_test], y_test[:nb_test] # Create simple CNN model = Sequential() model.add(Conv2D(4, kernel_size=(5, 5), activation='relu', input_shape=(28, 28, 1))) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Flatten()) model.add(Dense(10, activation='softmax')) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(lr=0.01), metrics=['accuracy']) # Get classifier krc = KerasClassifier((0, 1), model, use_logits=False) krc.fit(x_train, y_train, batch_size=batch_size, nb_epochs=2) # First attack cl2m = CarliniL2Method(classifier=krc, targeted=True, max_iter=100, binary_search_steps=10, learning_rate=2e-2, initial_const=3, decay=1e-2) params = {'y': random_targets(y_test, krc.nb_classes)} x_test_adv = cl2m.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1) self.assertTrue((target == y_pred_adv).any()) # Second attack cl2m = CarliniL2Method(classifier=krc, targeted=False, max_iter=100, binary_search_steps=10, learning_rate=2e-2, initial_const=3, decay=1e-2) params = {'y': random_targets(y_test, krc.nb_classes)} x_test_adv = cl2m.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1) self.assertTrue((target != y_pred_adv).all()) # Third attack cl2m = CarliniL2Method(classifier=krc, targeted=False, max_iter=100, binary_search_steps=10, learning_rate=2e-2, initial_const=3, decay=1e-2) params = {} x_test_adv = cl2m.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) y_pred = np.argmax(krc.predict(x_test), axis=1) y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1) self.assertTrue((y_pred != y_pred_adv).any())
def test_mnist(self): session = tf.Session() k.set_session(session) comp_params = {"loss": 'categorical_crossentropy', "optimizer": 'adam', "metrics": ['accuracy']} # get MNIST batch_size, nb_train, nb_test = 100, 1000, 10 (X_train, Y_train), (X_test, Y_test), _, _ = load_mnist() X_train, Y_train = X_train[:nb_train], Y_train[:nb_train] X_test, Y_test = X_test[:nb_test], Y_test[:nb_test] im_shape = X_train[0].shape # get classifier classifier = CNN(im_shape, act="relu") classifier.compile(comp_params) classifier.fit(X_train, Y_train, epochs=1, batch_size=batch_size, verbose=0) scores = classifier.evaluate(X_test, Y_test) print("\naccuracy on test set: %.2f%%" % (scores[1] * 100)) df = CarliniL2Method(classifier, sess=session, targeted=False, max_iterations=100, binary_search_steps=2, learning_rate=1e-2, initial_const=1) params = {'y_val': random_targets(Y_test, classifier.model.get_output_shape_at(-1)[-1])} x_test_adv = df.generate(X_test, **params) self.assertFalse((X_test == x_test_adv).all()) y_pred = get_labels_np_array(classifier.predict(x_test_adv)) self.assertFalse((Y_test == y_pred).all()) scores = classifier.evaluate(x_test_adv, Y_test) print('\naccuracy on adversarial examples: %.2f%%' % (scores[1] * 100))
def GetAttackers(classifier, x_test, attacker_name): """ Function: Load classifier and generate adversarial samples """ t_start = time.time() if attacker_name == "FGSM": attacker = FastGradientMethod(classifier=classifier, eps=0.3) elif attacker_name == "Elastic": attacker = ElasticNet(classifier=classifier, confidence=0.5) elif attacker_name == "BasicIterativeMethod": attacker = BasicIterativeMethod(classifier=classifier, eps=0.3) elif attacker_name == "NewtonFool": attacker = NewtonFool(classifier=classifier, max_iter=20) elif attacker_name == "HopSkipJump": attacker = HopSkipJump(classifier=classifier, max_iter=20) elif attacker_name == "ZooAttack": attacker = ZooAttack(classifier=classifier, max_iter=20) elif attacker_name == "VirtualAdversarialMethod": attacker = VirtualAdversarialMethod(classifier=classifier, max_iter=20) elif attacker_name == "UniversalPerturbation": attacker = UniversalPerturbation(classifier=classifier, max_iter=20) elif attacker_name == "AdversarialPatch": attacker = AdversarialPatch(classifier=classifier, max_iter=20) elif attacker_name == "Attack": attacker = Attack(classifier=classifier) elif attacker_name == "BoundaryAttack": attacker = BoundaryAttack(classifier=classifier, targeted=False, epsilon=0.05, max_iter=20) #, max_iter=20 elif attacker_name == "CarliniL2": attacker = CarliniL2Method(classifier=classifier, confidence=0.5, learning_rate=0.001, max_iter=15) elif attacker_name == "CarliniLinf": attacker = CarliniLInfMethod(classifier=classifier, confidence=0.5, learning_rate=0.001, max_iter=15) elif attacker_name == "DeepFool": attacker = DeepFool(classifier) elif attacker_name == "SMM": attacker = SaliencyMapMethod(classifier=classifier, theta=2) elif attacker_name == "PGD": attacker = ProjectedGradientDescent(classifier=classifier, norm=2, eps=1, eps_step=0.5) else: raise ValueError("Please get the right attacker's name for the input.") test_adv = attacker.generate(x_test) dt = time.time() - t_start return test_adv, dt
def test_failure_attack(self): """ Test the corner case when attack is failed. :return: """ # Build a TFClassifier # Define input and output placeholders self._input_ph = tf.placeholder(tf.float32, shape=[None, 28, 28, 1]) self._output_ph = tf.placeholder(tf.int32, shape=[None, 10]) # Define the tensorflow graph conv = tf.layers.conv2d(self._input_ph, 4, 5, activation=tf.nn.relu) conv = tf.layers.max_pooling2d(conv, 2, 2) fc = tf.contrib.layers.flatten(conv) # Logits layer self._logits = tf.layers.dense(fc, 10) # Train operator self._loss = tf.reduce_mean( tf.losses.softmax_cross_entropy(logits=self._logits, onehot_labels=self._output_ph)) optimizer = tf.train.AdamOptimizer(learning_rate=0.01) self._train = optimizer.minimize(self._loss) # Tensorflow session and initialization self._sess = tf.Session() self._sess.run(tf.global_variables_initializer()) # Get MNIST batch_size, nb_train, nb_test = 100, 5000, 10 (x_train, y_train), (x_test, y_test), _, _ = load_mnist() x_train, y_train = x_train[:nb_train], y_train[:nb_train] x_test, y_test = x_test[:nb_test], y_test[:nb_test] # Train the classifier tfc = TFClassifier((0, 1), self._input_ph, self._logits, self._output_ph, self._train, self._loss, None, self._sess) tfc.fit(x_train, y_train, batch_size=batch_size, nb_epochs=10) # Failure attack cl2m = CarliniL2Method(classifier=tfc, targeted=True, max_iter=0, binary_search_steps=0, learning_rate=0, initial_const=1, decay=0) params = {'y': random_targets(y_test, tfc.nb_classes)} x_test_adv = cl2m.generate(x_test, **params) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) np.testing.assert_almost_equal(x_test, x_test_adv, 3)
else: if args.adv_method == 'deepfool': adv_crafter = DeepFool(classifier, session, clip_min=min_, clip_max=max_) elif args.adv_method == 'jsma': adv_crafter = SaliencyMapMethod(classifier, sess=session, clip_min=min_, clip_max=max_, gamma=1, theta=max_) elif args.adv_method == 'carlini': adv_crafter = CarliniL2Method(classifier, sess=session, targeted=False, confidence=10) else: adv_crafter = UniversalPerturbation(classifier, session, p=np.inf, attacker_params={ 'clip_min': min_, 'clip_max': max_ }) X_train_adv = adv_crafter.generate(x_val=X_train) X_test_adv = adv_crafter.generate(x_val=X_test) if args.save: np.save(os.path.join(SAVE_ADV, "train.npy"), X_train_adv)
l1_reg_levels = [0.0001, 0.0002, 0.0003, 0.0004, 0.0005] for l1_level in range(0, 5): classifier = neural_networks.two_layer_dnn_art(x_train.shape[1:], 0, l1_reg_levels[l1_level], 0) classifier.fit(x_train, y_train, nb_epochs=10, batch_size=128) # Evaluate the classifier on the test set preds = np.argmax(classifier.predict(x_test), axis=1) acc = np.sum(preds == np.argmax(y_test, axis=1)) / y_test.shape[0] print("\nTest accuracy on L1 regularization level %.5f%%: %.2f%%" % (l1_reg_levels[l1_level], acc * 100)) # Craft adversarial samples with CW attack adv_crafter = CarliniL2Method(classifier, targeted=False) x_test_adv = adv_crafter.generate(x=x_test[:1000]) # Evaluate the classifier on the adversarial examples preds = np.argmax(classifier.predict(x_test_adv), axis=1) acc = np.sum(preds == np.argmax(y_test, axis=1)) / y_test.shape[0] print( "\nTest accuracy on adversarial sample for L1 regularization level %.5f%%: %.2f%%" % (l1_reg_levels[l1_level], acc * 100)) # Calculate the average perturbation in L1 and L2 norms. Note that I don't de-normalize the values. perturbations = np.absolute((x_test_adv - x_test[:1000])) l1_perturbations = [ LA.norm(perturbation, 1) for perturbation in perturbations ] l2_perturbations = [
adv_plt = np.column_stack([ nat_img[0].reshape(28, 28), adv_nse[0].reshape(28, 28), adv_img[0].reshape(28, 28) ]) fig = plt.figure() plt.imshow(adv_plt) plt.title("IFGSM:y_test:{},y_adv:{} -- Var: {}".format(np.argmax(y_test[0]), adv_prd[0], adv_var)) fig.set_size_inches(24, 12) fo = ddir + "IFGSM-O{}A{}_varx{}_examp.png".format(np.argmax(y_test[0]), adv_prd[0], adv_var) fig.savefig(fo, dpi=100) ################ CW; adv_crafter = CarliniL2Method(classifier) #targets y_test_tar = np.random.randint(0, 10, y_test.__len__()) y_test_tarm = np.zeros(y_test.shape) y_test_tarm[np.arange(0, y_test.shape[0]), y_test_tar] = 1 x_test_adv = adv_crafter.generate(x=x_test, y=y_test_tarm) # Evaluate the classifier on the adversarial examples preds = np.argmax(classifier.predict(x_test_adv), axis=1) acc = np.sum(preds == np.argmax(y_test, axis=1)) / y_test.shape[0] print("\nTest accuracy on adversarial sample: %.2f%%" % (acc * 100)) # grab a particular example to play with a = (preds != np.argmax(y_test, axis=1)) nat_img = x_test[a] adv_img = x_test_adv[a] adv_nse = adv_img - nat_img adv_prd = preds[a]
def test_ptclassifier(self): """ Third test with the PyTorchClassifier. :return: """ # Get MNIST batch_size, nb_train, nb_test = 100, 5000, 10 (x_train, y_train), (x_test, y_test), _, _ = load_mnist() x_train, y_train = x_train[:nb_train], y_train[:nb_train] x_test, y_test = x_test[:nb_test], y_test[:nb_test] x_train = np.swapaxes(x_train, 1, 3) x_test = np.swapaxes(x_test, 1, 3) # Create simple CNN # Define the network model = Model() # Define a loss function and optimizer loss_fn = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.01) # Get classifier ptc = PyTorchClassifier((0, 1), model, loss_fn, optimizer, (1, 28, 28), 10) ptc.fit(x_train, y_train, batch_size=batch_size, nb_epochs=10) # First attack cl2m = CarliniL2Method(classifier=ptc, targeted=True, max_iter=100, binary_search_steps=1, learning_rate=1, initial_const=10, decay=0) params = {'y': random_targets(y_test, ptc.nb_classes)} x_test_adv = cl2m.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((target == y_pred_adv).any()) # Second attack cl2m = CarliniL2Method(classifier=ptc, targeted=False, max_iter=100, binary_search_steps=1, learning_rate=1, initial_const=10, decay=0) params = {'y': random_targets(y_test, ptc.nb_classes)} x_test_adv = cl2m.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((target != y_pred_adv).any()) # Third attack cl2m = CarliniL2Method(classifier=ptc, targeted=False, max_iter=100, binary_search_steps=1, learning_rate=1, initial_const=10, decay=0) params = {} x_test_adv = cl2m.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) y_pred = np.argmax(ptc.predict(x_test), axis=1) y_pred_adv = np.argmax(ptc.predict(x_test_adv), axis=1) self.assertTrue((y_pred != y_pred_adv).any())
def test_tfclassifier(self): """ First test with the TFClassifier. :return: """ # Build a TFClassifier # Define input and output placeholders self._input_ph = tf.placeholder(tf.float32, shape=[None, 28, 28, 1]) self._output_ph = tf.placeholder(tf.int32, shape=[None, 10]) # Define the tensorflow graph conv = tf.layers.conv2d(self._input_ph, 4, 5, activation=tf.nn.relu) conv = tf.layers.max_pooling2d(conv, 2, 2) fc = tf.contrib.layers.flatten(conv) # Logits layer self._logits = tf.layers.dense(fc, 10) # Train operator self._loss = tf.reduce_mean( tf.losses.softmax_cross_entropy(logits=self._logits, onehot_labels=self._output_ph)) optimizer = tf.train.AdamOptimizer(learning_rate=0.01) self._train = optimizer.minimize(self._loss) # Tensorflow session and initialization self._sess = tf.Session() self._sess.run(tf.global_variables_initializer()) # Get MNIST batch_size, nb_train, nb_test = 100, 5000, 10 (x_train, y_train), (x_test, y_test), _, _ = load_mnist() x_train, y_train = x_train[:nb_train], y_train[:nb_train] x_test, y_test = x_test[:nb_test], y_test[:nb_test] # Train the classifier tfc = TFClassifier((0, 1), self._input_ph, self._logits, self._output_ph, self._train, self._loss, None, self._sess) tfc.fit(x_train, y_train, batch_size=batch_size, nb_epochs=10) # First attack cl2m = CarliniL2Method(classifier=tfc, targeted=True, max_iter=100, binary_search_steps=1, learning_rate=1, initial_const=10, decay=0) params = {'y': random_targets(y_test, tfc.nb_classes)} x_test_adv = cl2m.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) #print(x_test_adv) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1) print("CW2 Target: %s" % target) print("CW2 Actual: %s" % y_pred_adv) print("CW2 Success Rate: %f" % (sum(target == y_pred_adv) / float(len(target)))) self.assertTrue((target == y_pred_adv).any()) # Second attack cl2m = CarliniL2Method(classifier=tfc, targeted=False, max_iter=100, binary_search_steps=1, learning_rate=1, initial_const=10, decay=0) params = {'y': random_targets(y_test, tfc.nb_classes)} x_test_adv = cl2m.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1) print("CW2 Target: %s" % target) print("CW2 Actual: %s" % y_pred_adv) print("CW2 Success Rate: %f" % (sum(target != y_pred_adv) / float(len(target)))) self.assertTrue((target != y_pred_adv).any()) # Third attack cl2m = CarliniL2Method(classifier=tfc, targeted=False, max_iter=100, binary_search_steps=1, learning_rate=1, initial_const=10, decay=0) params = {} x_test_adv = cl2m.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) y_pred = np.argmax(tfc.predict(x_test), axis=1) y_pred_adv = np.argmax(tfc.predict(x_test_adv), axis=1) print("CW2 Target: %s" % y_pred) print("CW2 Actual: %s" % y_pred_adv) print("CW2 Success Rate: %f" % (sum(y_pred != y_pred_adv) / float(len(y_pred)))) self.assertTrue((y_pred != y_pred_adv).any())
float(confidence * 5)) dropout_classifier = neural_networks.three_layer_dnn( x_train.shape[1:], 300, 100, dropout, 0, 0) baseline_classifier = neural_networks.three_layer_dnn( x_train.shape[1:], 300, 100, 0, 0, 0) dropout_classifier.fit(x_train, y_train, nb_epochs=10, batch_size=128) baseline_classifier.fit(x_train, y_train, nb_epochs=10, batch_size=128) true_labels = np.argmax(y_test[:1000], axis=1) target_labels = np.array([to_one_hot((c + 1) % 10) for c in true_labels]) # Craft adversarial samples with CW attack # We direct the attacks to find an adversarial sample with class (true label + 1) mod 10. baseline_attacker = CarliniL2Method(baseline_classifier, targeted=True, binary_search_steps=30, confidence=float(confidence * 5)) dropout_attacker = CarliniL2Method(dropout_classifier, targeted=True, binary_search_steps=30, confidence=float(confidence * 5)) x_adv_baseline = baseline_attacker.generate(x=x_test[:1000], y=target_labels) x_adv_dropout = dropout_attacker.generate(x=x_test[:1000], y=target_labels) # Verify transferability percentage on dropout adversarial examples to baseline model baseline_preds = np.argmax(baseline_classifier.predict(x_adv_dropout), axis=1) baseline_transfer = (np.sum(baseline_preds == true_labels) / 1000) * 100 print( "\nAccuracy on adversarial samples generated on the dropout model evaluated by baseline model:"
0.7, 0.75 ] for dropout in range(0, 16): classifier = neural_networks.three_layer_dnn(x_train.shape[1:], 300, 100, dropout_levels[dropout], 0, 0) classifier.fit(x_train, y_train, nb_epochs=10, batch_size=128) # Evaluate the classifier on the test set preds = np.argmax(classifier.predict(x_test), axis=1) acc = (np.sum(preds == np.argmax(y_test, axis=1)) / len(y_test)) * 100 print("\nTest accuracy on dropout level %.2f%%: %.3f%%" % (dropout_levels[dropout], acc)) # Craft adversarial samples with CW attack attacker = CarliniL2Method(classifier, targeted=False) x_real = x_test[:1000] y_real = np.argmax(y_test[:1000], axis=1) x_test_adv = attacker.generate(x_real) # Evaluate the classifier on the adversarial examples adversarial_preds = np.argmax(classifier.predict(x_test_adv), axis=1) adversarial_acc = (np.sum(adversarial_preds == y_real) / len(y_real)) * 100 print("\nTest accuracy on adversarial sample for dropout %.2f%%: %.3f%%" % (dropout_levels[dropout], adversarial_acc)) # Calculate the average perturbation in L1 and L2 norms. Note that I don't de-normalize the values. perturbations = np.absolute((x_test_adv - x_real)) l1_perturbations = [ LA.norm(perturbation, 1) for perturbation in perturbations ]
def attack(self, model=None, attack_str=""): imgs = self._load_images(attack_str, self._test_or_val_dataset) if self._test_or_val_dataset == "_x_test_set_": X = self.__data.x_test Y = self.__data.y_test else: X = self.__data.x_val Y = self.__data.y_val if type(imgs) != type(None): print('\n{0} adversarial examples using {1} attack loaded...\n'. format(self.__dataset, self.__attack)) return imgs if type(model) == type(None): model = self.surrogate_model.fit(self.__data.x_train, self.__data.y_train, verbose=1, epochs=self.__epochs, batch_size=128) wrap = KerasClassifier((0., 1.), model=self.surrogate_model) else: wrap = KerasClassifier((0., 1.), model=model) if self.__attack == 'FGSM': print('\nCrafting adversarial examples using FGSM attack...\n') fgsm = FastGradientMethod(wrap) if self.__data.dataset_name == 'MNIST': x_adv_images = fgsm.generate(x=X[self.idx_adv][:self._length], eps=0.2) else: x_adv_images = fgsm.generate(x=X[self.idx_adv][:self._length], eps=0.025) path = os.path.join( self._attack_dir, self.__dataset.lower() + self._test_or_val_dataset + "fgsm.pkl") helpers.save_pkl(x_adv_images, path) elif self.__attack.startswith("CW"): print('\nCrafting adversarial examples using CW attack...\n') cw = CarliniL2Method(wrap, confidence=0.0, targeted=False, binary_search_steps=1, learning_rate=0.2, initial_const=10, max_iter=100) x_adv_images = cw.generate(X[self.idx_adv][:self._length]) path = os.path.join( self._attack_dir, self.__dataset.lower() + self._test_or_val_dataset + "cw.pkl") helpers.save_pkl(x_adv_images, path) elif self.__attack == 'BIM': print('\nCrafting adversarial examples using BIM attack...\n') if self.__dataset == 'MNIST': bim = BasicIterativeMethod(wrap, eps=0.25, eps_step=0.2, max_iter=100, norm=np.inf) if self.__dataset == 'CIFAR': bim = BasicIterativeMethod(wrap, eps=0.025, eps_step=0.01, max_iter=1000, norm=np.inf) x_adv_images = bim.generate(x=X[self.idx_adv][:self._length]) path = os.path.join( self._attack_dir, self.__dataset.lower() + self._test_or_val_dataset + "bim.pkl") helpers.save_pkl(x_adv_images, path) elif self.__attack == 'DEEPFOOL': print('\nCrafting adversarial examples using DeepFool attack...\n') deepfool = DeepFool(wrap) x_adv_images = deepfool.generate(x=X[self.idx_adv][:self._length]) path = os.path.join( self._attack_dir, self.__dataset.lower() + self._test_or_val_dataset + "deepfool.pkl") helpers.save_pkl(x_adv_images, path) return x_adv_images
import torch.nn.functional as F import torchfile import numpy as np from vgg_face import VGG_16 import shutil import re import numpy as np from art.attacks.carlini import CarliniL2Method if __name__ == '__main__': model = torch.load('/home/research/tongwu/glass/donemodel/model2.pkl') data_dir = '/home/research/tongwu/glass/test' image_datasets = datasets.ImageFolder( data_dir, transforms.Compose( [transforms.Resize(size=(224, 224)), transforms.ToTensor()])) dataloaders = torch.utils.data.DataLoader(image_datasets, batch_size=1, shuffle=True) class_names = image_datasets.classes attack = CarliniL2Method() attack.generate(x_test[:100]) attacker = FastGradientMethod(classifier, eps=0.5)
def test_krclassifier(self): """ Second test with the KerasClassifier. :return: """ # Initialize a tf session session = tf.Session() k.set_session(session) # Get MNIST (x_train, y_train), (x_test, y_test) = self.mnist # Create simple CNN model = Sequential() model.add( Conv2D(4, kernel_size=(5, 5), activation='relu', input_shape=(28, 28, 1))) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Flatten()) model.add(Dense(10, activation='softmax')) model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(lr=0.01), metrics=['accuracy']) # Get classifier krc = KerasClassifier((0, 1), model, use_logits=False) krc.fit(x_train, y_train, batch_size=BATCH_SIZE, nb_epochs=10) # First attack cl2m = CarliniL2Method(classifier=krc, targeted=True, max_iter=100, binary_search_steps=1, learning_rate=1, initial_const=10, decay=0) params = {'y': random_targets(y_test, krc.nb_classes)} x_test_adv = cl2m.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1) print("CW2 Target: %s" % target) print("CW2 Actual: %s" % y_pred_adv) print("CW2 Success Rate: %f" % (sum(target == y_pred_adv) / float(len(target)))) self.assertTrue((target == y_pred_adv).any()) # Second attack cl2m = CarliniL2Method(classifier=krc, targeted=False, max_iter=100, binary_search_steps=1, learning_rate=1, initial_const=10, decay=0) params = {'y': random_targets(y_test, krc.nb_classes)} x_test_adv = cl2m.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) target = np.argmax(params['y'], axis=1) y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1) print("CW2 Target: %s" % target) print("CW2 Actual: %s" % y_pred_adv) print("CW2 Success Rate: %f" % (sum(target != y_pred_adv) / float(len(target)))) self.assertTrue((target != y_pred_adv).any()) # Third attack cl2m = CarliniL2Method(classifier=krc, targeted=False, max_iter=100, binary_search_steps=1, learning_rate=1, initial_const=10, decay=0) params = {} x_test_adv = cl2m.generate(x_test, **params) self.assertFalse((x_test == x_test_adv).all()) self.assertTrue((x_test_adv <= 1.0001).all()) self.assertTrue((x_test_adv >= -0.0001).all()) y_pred = np.argmax(krc.predict(x_test), axis=1) y_pred_adv = np.argmax(krc.predict(x_test_adv), axis=1) print("CW2 Target: %s" % y_pred) print("CW2 Actual: %s" % y_pred_adv) print("CW2 Success Rate: %f" % (sum(y_pred != y_pred_adv) / float(len(y_pred)))) self.assertTrue((y_pred != y_pred_adv).any())
def evaluation(x_test, y_test, classify_idx_lst, model, test_acc, ws, current_line, attack_name, flag, column_i): classifier = KerasClassifier((0., 1.), model=model) if attack_name == "FGM": # ===========================参数设置========================= # # Maximum perturbation # Order of the norm parameter_lst = [[10, 1], [20, 1], [30, 1], [40, 1], [50, 1], [60, 1], [70, 1], [80, 1], [90, 1], [100, 1], [1, 2], [2, 2], [3, 2], [4, 2], [5, 2], [6, 2], [7, 2], [8, 2], [9, 2], [10, 2], [0.05, np.inf], [0.10, np.inf], [0.15, np.inf], [0.20, np.inf], [0.25, np.inf], [0.30, np.inf], [0.35, np.inf], [0.40, np.inf], [0.45, np.inf], [0.50, np.inf]] # ===========================进行攻击========================= # for [epsilon, norm_type] in parameter_lst: # print("current parameter: " + str(epsilon) + ", " + str(norm_type)) adv_crafter = FastGradientMethod(classifier) x_test_adv = adv_crafter.generate(x=x_test[classify_idx_lst], eps=epsilon, norm=norm_type) score = model.evaluate(x_test_adv, y_test[classify_idx_lst], verbose=0) acc = score[1] ws.write(current_line, 0, attack_name) ws.write( current_line, 1, "(" + str(round(epsilon, 4)) + ", " + str(norm_type) + ")") if flag == "ori": ws.write(current_line, 3, test_acc) ws.write(current_line, 4, acc) elif flag == "adv": ws.write(current_line, 3, test_acc) ws.write(current_line, 4, acc) else: ws.write(current_line, 0 + 3 * column_i, test_acc) ws.write(current_line, 1 + 3 * column_i, acc) current_line += 1 elif attack_name == "BIM": # ===========================参数设置========================= # # Order of the norm # Maximum perturbation that the attacker can introduce # Attack step size (input variation) at each iteration # The maximum number of iterations. parameter_lst = [[1, 20.0, 2.0, 10], [1, 20.0, 4.0, 10], [1, 20.0, 6.0, 10], [1, 20.0, 8.0, 10], [1, 20.0, 10.0, 10], [1, 20.0, 2.0, 50], [1, 20.0, 4.0, 50], [1, 20.0, 6.0, 50], [1, 20.0, 8.0, 50], [1, 20.0, 10.0, 50], [2, 2.0, 0.2, 10], [2, 2.0, 0.4, 10], [2, 2.0, 0.6, 10], [2, 2.0, 0.8, 10], [2, 2.0, 1.0, 10], [2, 2.0, 0.2, 50], [2, 2.0, 0.4, 50], [2, 2.0, 0.6, 50], [2, 2.0, 0.8, 50], [2, 2.0, 1.0, 50], [np.inf, 0.1, 0.002, 10], [np.inf, 0.1, 0.004, 10], [np.inf, 0.1, 0.006, 10], [np.inf, 0.1, 0.008, 10], [np.inf, 0.1, 0.010, 10], [np.inf, 0.1, 0.002, 50], [np.inf, 0.1, 0.004, 50], [np.inf, 0.1, 0.006, 50], [np.inf, 0.1, 0.008, 50], [np.inf, 0.1, 0.010, 50]] # ===========================进行攻击========================= # for [norm_type, epsilon, epsilon_step, max_iteration] in parameter_lst: # print("current parameter: " + str(norm_type) + ", " + str(epsilon) + ", " + str(epsilon_step) + ", " + str( # max_iteration)) adv_crafter = BasicIterativeMethod(classifier) x_test_adv = adv_crafter.generate(x=x_test[classify_idx_lst], norm=norm_type, eps=epsilon, eps_step=epsilon_step, max_iter=max_iteration) score = model.evaluate(x_test_adv, y_test[classify_idx_lst], verbose=0) acc = score[1] ws.write(current_line, 0, attack_name) ws.write( current_line, 1, "(" + str(norm_type) + ", " + str(round(epsilon, 4)) + ", " + str(round(epsilon_step, 4)) + ", " + str(max_iteration) + ")") if flag == "ori": ws.write(current_line, 3, test_acc) ws.write(current_line, 4, acc) elif flag == "adv": ws.write(current_line, 3, test_acc) ws.write(current_line, 4, acc) else: ws.write(current_line, 0 + 3 * column_i, test_acc) ws.write(current_line, 1 + 3 * column_i, acc) current_line += 1 elif attack_name == "JSMA": # ===========================参数设置========================= # # Perturbation introduced to each modified feature per step (can be positive or negative). # Maximum percentage of perturbed features (between 0 and 1). parameter_lst = [[0.5, 0.5], [0.4, 0.5], [0.3, 0.5], [0.2, 0.5], [0.1, 0.5], [-0.1, 0.5], [-0.2, 0.5], [-0.3, 0.5], [-0.4, 0.5], [-0.5, 0.5]] # ===========================进行攻击========================= # for [theta, gamma] in parameter_lst: # print("current parameter: " + str(theta) + ", " + str(gamma)) adv_crafter = SaliencyMapMethod(classifier) x_test_adv = adv_crafter.generate(x=x_test[classify_idx_lst], theta=theta, gamma=gamma) score = model.evaluate(x_test_adv, y_test[classify_idx_lst], verbose=0) acc = score[1] ws.write(current_line, 0, attack_name) ws.write( current_line, 1, "(" + str(round(theta, 4)) + ", " + str(round(gamma, 4)) + ")") if flag == "ori": ws.write(current_line, 3, test_acc) ws.write(current_line, 4, acc) elif flag == "adv": ws.write(current_line, 3, test_acc) ws.write(current_line, 4, acc) else: ws.write(current_line, 0 + 3 * column_i, test_acc) ws.write(current_line, 1 + 3 * column_i, acc) current_line += 1 elif attack_name == "DeepFool": # ===========================参数设置========================= # # The maximum number of iterations. # Overshoot parameter. parameter_lst = [[2, 0.10], [4, 0.10], [6, 0.10], [8, 0.10], [10, 0.10], [12, 0.10], [14, 0.10], [16, 0.10], [18, 0.10], [20, 0.10]] # ===========================进行攻击========================= # for [max_iteration, epsilon] in parameter_lst: # print("current parameter: " + str(max_iteration) + ", " + str(epsilon)) adv_crafter = DeepFool(classifier) x_test_adv = adv_crafter.generate(x=x_test[classify_idx_lst], max_iter=max_iteration, epsilon=epsilon) score = model.evaluate(x_test_adv, y_test[classify_idx_lst], verbose=0) acc = score[1] ws.write(current_line, 0, attack_name) ws.write( current_line, 1, "(" + str(max_iteration) + ", " + str(round(epsilon, 4)) + ")") if flag == "ori": ws.write(current_line, 3, test_acc) ws.write(current_line, 4, acc) elif flag == "adv": ws.write(current_line, 3, test_acc) ws.write(current_line, 4, acc) else: ws.write(current_line, 0 + 3 * column_i, test_acc) ws.write(current_line, 1 + 3 * column_i, acc) current_line += 1 elif attack_name == "CW-L2": # ===========================参数设置========================= # # confidence: Confidence of adversarial examples: a higher value produces examples that are farther away, # from the original input, but classified with higher confidence as the target class. # The maximum number of iterations. parameter_lst = [[0, 1], [0, 2], [0, 3], [0, 4], [0, 5]] # ===========================进行攻击========================= # for [confidence_value, max_iter_value] in parameter_lst: # print("current parameter: " + str(confidence_value) + ", " + str(max_iter_value)) adv_crafter = CarliniL2Method(classifier) sum_adv_acc = 0 for adv_label in range(0, 10): one_hot_label = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] one_hot_label[adv_label] = 1 x_test_adv = adv_crafter.generate( x=x_test[classify_idx_lst], confidence=confidence_value, targeted=True, max_iter=max_iter_value, y=np.array([one_hot_label] * x_test[classify_idx_lst].shape[0])) score = model.evaluate(x_test_adv, y_test[classify_idx_lst], verbose=0) acc = score[1] sum_adv_acc += acc ws.write(current_line, 0, attack_name) ws.write( current_line, 1, "(" + str(round(confidence_value, 4)) + ", " + str(max_iter_value) + ")") if flag == "ori": ws.write(current_line, 3, test_acc) ws.write(current_line, 4, sum_adv_acc / 10) elif flag == "adv": ws.write(current_line, 3, test_acc) ws.write(current_line, 4, sum_adv_acc / 10) else: ws.write(current_line, 0 + 3 * column_i, test_acc) ws.write(current_line, 1 + 3 * column_i, sum_adv_acc / 10) current_line += 1 elif attack_name == "CW-Linf": # ===========================参数设置========================= # # confidence: Confidence of adversarial examples: a higher value produces examples that are farther away, # from the original input, but classified with higher confidence as the target class. # The maximum number of iterations. parameter_lst = [[0, 1], [0, 2], [0, 3], [0, 4], [0, 5]] # ===========================进行攻击========================= # for [confidence_value, max_iter_value] in parameter_lst: # print("current parameter: " + str(confidence_value) + ", " + str(max_iter_value)) adv_crafter = CarliniLInfMethod(classifier) sum_adv_acc = 0 for adv_label in range(0, 10): one_hot_label = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0] one_hot_label[adv_label] = 1 x_test_adv = adv_crafter.generate( x=x_test[classify_idx_lst], confidence=confidence_value, targeted=True, max_iter=max_iter_value, y=np.array([one_hot_label] * x_test[classify_idx_lst].shape[0])) score = model.evaluate(x_test_adv, y_test[classify_idx_lst], verbose=0) acc = score[1] sum_adv_acc += acc ws.write(current_line, 0, attack_name) ws.write( current_line, 1, "(" + str(round(confidence_value, 4)) + ", " + str(max_iter_value) + ")") if flag == "ori": ws.write(current_line, 3, test_acc) ws.write(current_line, 4, sum_adv_acc / 10) elif flag == "adv": ws.write(current_line, 3, test_acc) ws.write(current_line, 4, sum_adv_acc / 10) else: ws.write(current_line, 0 + 3 * column_i, test_acc) ws.write(current_line, 1 + 3 * column_i, sum_adv_acc / 10) current_line += 1 current_line += 1 # print("\n------------------------------------------------") return ws, current_line
"mnist") elif args.experiment_type == "leNet5": classifier = convolutional.leNet_cnn_single( dropout_levels[dropout]) classifier.fit(x_train, y_train, nb_epochs=50, batch_size=128) # Evaluate the classifier on the test set preds = np.argmax(classifier.predict(x_test), axis=1) acc = (np.sum(preds == np.argmax(y_test, axis=1)) / len(y_test)) * 100 accuracy.append(acc) # Craft adversarial samples with CW attack attacker = CarliniL2Method(classifier, targeted=False, binary_search_steps=args.binary_steps, confidence=args.confidence) x_real = x_test[:1000] y_real = np.argmax(y_test[:1000], axis=1) x_test_adv = attacker.generate(x_real) # Calculate the average perturbation and MMD metric. Note that I don't de-normalize the values. perturbations = np.absolute((x_test_adv - x_real)) l1_perturbations = [ LA.norm(perturbation.flatten(), 1) for perturbation in perturbations ] l2_perturbations = [ LA.norm(perturbation.flatten(), 2) for perturbation in perturbations