def deep_fool_attack(): counter = 0 image_iterator = importer.load_images_generator(importer.batch_shape) tf.reset_default_graph() sess = tf.Session() x_input = tf.placeholder(tf.float32, shape=importer.batch_shape) folder_path = os.path.join(config.ADVERSARIAL_FOLDER, "deep_full_base") os.makedirs(folder_path, exist_ok=True) while True: with tf.Session() as sess: filenames, images = next(image_iterator, (None, None)) model = Inception_V3_Model(np.float32(images)) params = {} attack = DeepFool(model=model, sess=sess) params['max_iter'] = 5 variables = tf.get_collection(tf.GraphKeys.VARIABLES) saver = tf.train.Saver(variables) saver.restore(sess, importer.checkpoint_path) x_adv = attack.generate(x_input, **params) #writer = tf.summary.FileWriter("/tmp/log/", sess.graph) adversarial_images = sess.run(x_adv, feed_dict={x_input: images}) utils.image_saver(advesrsarial_images, filenames, folder_path) print("adversarial_images counter:{}".format(counter)) #writer.close() counter += 1 if counter == 999: print("Attack is finished") break
def get_DeepFool_adversarial(targeted, xs, classifier, batch_size): # Targeted DeepFool attack not possible if targeted: print('DeepFool attack cannot be targeted.') exit() ATTACK_BATCH = batch_size samples_range = int(xs.shape[0] / ATTACK_BATCH) wrap = KerasModelWrapper(classifier) attack = DeepFool(wrap, sess=K.get_session()) fgsm_params = { 'overshoot': 0.02, 'max_iter': 50, 'nb_candidate': 2, 'clip_min': -5, 'clip_max': 5 } attack_xs = attack.generate_np(xs[:ATTACK_BATCH, :, :, :], **fgsm_params) for ii in range(1, samples_range): print('ITER', ii) new_attack_batch = attack.generate_np( xs[ii * ATTACK_BATCH:(ii + 1) * ATTACK_BATCH, :, :, :], **fgsm_params) attack_xs = np.concatenate((attack_xs, new_attack_batch), axis=0) return attack_xs
def untargeted_attack(model, images): sess = backend.get_session() wrap = KerasModelWrapper(model) df = DeepFool(wrap, back='tf', sess=sess) adv_x = df.generate_np(images) return adv_x
def __init__(self, model, n_candidates=10, overshoot=0.02, max_iterations=50, clip_min=-1., clip_max=1.): super().__init__(model=model, clip_min=clip_min, clip_max=clip_max) self._n_candidate = n_candidates self._overshoot = overshoot self._max_iterations = max_iterations with self.graph.as_default(): self._method = DeepFool(self._model, sess=self.session, nb_candidate=self._n_candidate, overshoot=self._overshoot, max_iter=self._max_iterations, nb_classes=self.n_classes, clip_min=self._clip_min, clip_max=self._clip_max)
def init_attack(model, attack_params_dict, sess): """ Initialize the adversarial attack using the cleverhans toolbox Parameters ---------- model : Keras Model The model to attack attack_params_dict : dict Self-defined dictionary specifying the attack and its parameters sess : Session The current tf session Returns ------- attack : cleverhans Attack The Attack object attack_params Dictionary with the value of the attack parameters, valid to generate adversarial examples with cleverhans. """ # Wrapper for the Keras model model_wrap = KerasModelWrapper(model) # Initialize attack batch_size = None if attack_params_dict['attack'] == 'fgsm': attack = FastGradientMethod(model_wrap, sess=sess) attack_params = {'eps': attack_params_dict['eps'], 'clip_min': 0., 'clip_max': 1.} elif attack_params_dict['attack'] == 'spsa': attack = SPSA(model_wrap, sess=sess) attack_params = {'epsilon': attack_params_dict['eps'], 'num_steps': attack_params_dict['n_steps']} batch_size = 1 elif attack_params_dict['attack'] == 'deepfool': attack = DeepFool(model_wrap, sess=sess) attack_params = {'clip_min': 0., 'clip_max': 1.} elif attack_params_dict['attack'] == 'pgd': attack = ProjectedGradientDescent(model_wrap, sess=sess) attack_params = {'eps': attack_params_dict['eps'], 'eps_iter': attack_params_dict['eps_iter'], 'nb_iter': attack_params_dict['n_steps'], 'clip_min': 0., 'clip_max': 1.} elif attack_params_dict['attack'] == 'carlini': attack = CarliniWagnerL2(model_wrap, sess=sess) attack_params = {'clip_min': 0., 'clip_max': 1.} else: raise NotImplementedError() return attack, attack_params, batch_size
def _DeepFool(self): deepfool_attack = DeepFool(self.wrapped_model, sess=self.sess) params = { 'nb_candidate': 10, 'max_iter': 100, 'clip_min': 0., 'clip_max': 1., 'verbose': False } adv_x = deepfool_attack.generate(self.x, **params) self.save_images(adv_x, self.save_loc)
def eval_cleverhans(): # Set test phase learning_phase = K.learning_phase() K.set_learning_phase(0) # Pre-process images images_tf = images.astype(K.floatx()) images_tf /= 255. # Wrapper for the Keras model model_wrap = KerasModelWrapper(model) # Initialize attack if attack_params_dict['attack'] == 'fgsm': attack = FastGradientMethod(model_wrap, sess=K.get_session()) attack_params = {'eps': attack_params_dict['eps'], 'clip_min': 0., 'clip_max': 1.} elif attack_params_dict['attack'] == 'deepfool': attack = DeepFool(model_wrap, sess=K.get_session()) attack_params = {'clip_min': 0., 'clip_max': 1.} elif attack_params_dict['attack'] == 'madry': attack = ProjectedGradientDescent(model_wrap, sess=K.get_session()) attack_params = {'clip_min': 0., 'clip_max': 1.} elif attack_params_dict['attack'] == 'carlini': attack = CarliniWagnerL2(model_wrap, sess=K.get_session()) attack_params = {'clip_min': 0., 'clip_max': 1.} else: raise NotImplementedError() # Define input TF placeholder x = tf.placeholder(K.floatx(), shape=(None,) + images.shape[1:]) y = tf.placeholder(K.floatx(), shape=(None,) + (labels.shape[-1],)) # Define adversarial predictions symbolically x_adv = attack.generate(x, **attack_params) x_adv = tf.stop_gradient(x_adv) predictions_adv = model(x_adv) # Evaluate the accuracy of the model on adversarial examples eval_par = {'batch_size': batch_size} # feed_dict = {K.learning_phase(): attack_params_dict['learning_phase']} # acc_adv = model_eval(K.get_session(), x, y, predictions_adv, images, # labels, feed=feed_dict, args=eval_par) acc_adv = model_eval(K.get_session(), x, y, predictions_adv, images_tf, labels, args=eval_par) print('Aversarial accuracy against %s: %.4f\n' % (attack_params_dict['attack'], acc_adv)) # Set original phase K.set_learning_phase(learning_phase) return acc_adv
def df(model): wrap = KerasModelWrapper(model) att = DeepFool(wrap, sess=session) def attack(X, eps): for i in tqdm(range(0, len(X), CHILD_BATCH_SIZE), desc=f'DF: ', file=sys.stdout, leave=False): # print(X[i:i+CHILD_BATCH_SIZE].shape) tensor = tf.convert_to_tensor(X[i:i + CHILD_BATCH_SIZE]) tensor = att.generate(tensor, nb_candidate=int(eps + 0.5)) X[i:i + CHILD_BATCH_SIZE] = session.run(tensor) # import matplotlib.pyplot as plt # plt.imshow(X[i]) # plt.show() return attack
def setUp(self): super(TestDeepFool, self).setUp() import tensorflow as tf # The world's simplest neural network def my_model(x): W1 = tf.constant([[1.5, .3], [-2, 0.3]], dtype=tf.float32) h1 = tf.nn.sigmoid(tf.matmul(x, W1)) W2 = tf.constant([[-2.4, 1.2], [0.5, -2.3]], dtype=tf.float32) res = tf.matmul(h1, W2) return res self.sess = tf.Session() self.model = my_model self.attack = DeepFool(self.model, sess=self.sess)
def df(X, which, prob, magn): wrapped = DeepFool(KerasModelWrapper(which.model), sess=session) X = X.copy() idx = np.random.uniform(size=len(X)) idx = np.where(idx < prob)[0] for i in tqdm(range(0, len(idx), CHILD_BATCH_SIZE), desc=f'batch: ', leave=False): tensor = tf.convert_to_tensor(X[idx[i:i + CHILD_BATCH_SIZE]]) init = tf.global_variables_initializer() session.run(init) tensor = wrapped.generate(tensor, clip_min=0., clip_max=magn * 0.3 + 0.3) X[idx[i:i + CHILD_BATCH_SIZE]] = session.run(tensor) return X
def get_DeepFool_samples(loaded_model, samples, max_iter): sess = backend.get_session() wrap = KerasModelWrapper(loaded_model) deepfool = DeepFool(wrap, sess=sess) deepfool_params = { 'max_iter': max_iter, 'clip_min': 0., 'clip_max': 1., 'nb_candidate': 10 } adv_x = deepfool.generate_np(samples, **deepfool_params) return adv_x
def query(self, X_train, Y_train, labeled_idx, amount): unlabeled_idx = get_unlabeled_idx(X_train, labeled_idx) unlabeled = X_train[unlabeled_idx] keras_wrapper = KerasModelWrapper(self.model) sess = K.get_session() deep_fool = DeepFool(keras_wrapper, sess=sess) deep_fool_params = { 'over_shoot': 0.02, 'clip_min': 0., 'clip_max': 1., 'nb_candidate': Y_train.shape[1], 'max_iter': 10 } true_predictions = np.argmax(self.model.predict(unlabeled, batch_size=256), axis=1) adversarial_predictions = np.copy(true_predictions) while np.sum(true_predictions != adversarial_predictions) < amount: adversarial_images = np.zeros(unlabeled.shape) for i in range(0, unlabeled.shape[0], 100): print("At {i} out of {n}".format(i=i, n=unlabeled.shape[0])) if i + 100 > unlabeled.shape[0]: adversarial_images[i:] = deep_fool.generate_np( unlabeled[i:], **deep_fool_params) else: adversarial_images[i:i + 100] = deep_fool.generate_np( unlabeled[i:i + 100], **deep_fool_params) pertubations = adversarial_images - unlabeled norms = np.linalg.norm(np.reshape(pertubations, (unlabeled.shape[0], -1)), axis=1) adversarial_predictions = np.argmax(self.model.predict( adversarial_images, batch_size=256), axis=1) norms[true_predictions == adversarial_predictions] = np.inf deep_fool_params['max_iter'] *= 2 selected_indices = np.argpartition(norms, amount)[:amount] del keras_wrapper del deep_fool gc.collect() return np.hstack((labeled_idx, unlabeled_idx[selected_indices]))
def mnist_deepfool_attack(sample, target, model, sess, targeted=True, attack_iterations=100): print('deepfool attack start') deepfool = DeepFool(model, sess=sess) deepfool_params = { 'over_shoot': 0.02, 'clip_min': 0., 'clip_max': 1., 'max_iter': 300, 'nb_candidate': 2, } adv_x = deepfool.generate_np(sample, **deepfool_params) return adv_x
def get_adversarial_attack_and_params(attack_name, wrap, sess): params = None stop_gradient = False if attack_name == "fgsm": attack = FastGradientMethod(wrap, sess=sess) params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} stop_gradient = True if attack_name == "deepfool": attack = DeepFool(wrap, sess=sess) if attack_name == "lbfgs": attack = LBFGS(wrap, sess=sess) if attack_name == "saliency": attack = SaliencyMapMethod(wrap, sess=sess) if attack_name == "bim": attack = BasicIterativeMethod(wrap, sess=sess) return attack, params, stop_gradient
def next_images(): tf.logging.set_verbosity(tf.logging.INFO) print("{} generator graph is ready!".format(mode)) tf.reset_default_graph() sess = tf.Session() x_input = tf.placeholder(tf.float32, shape=importer.batch_shape) params = {} model = InceptionModelLogits(importer.num_classes, x_input) if mode == 'deep_fool': graph = DeepFool(model, sess=sess) params['max_iter'] = 5 else: raise Exception("Not supported mode") print('graph params: {}'.format(params)) variables = tf.get_collection(tf.GraphKeys.VARIABLES) saver = tf.train.Saver(variables) saver.restore(sess, importer.checkpoint_path) image_iterator = importer.load_images_generator(batch_shape) while True: filenames, images = next(image_iterator, (None, None)) if filenames is None: break true_classes = importer.filename_to_class(filenames) target = np.expand_dims(np.zeros(importer.num_classes), 1) if mode == 'carlini_wagner': assert (len(true_classes) == 1) target[true_classes[0]] = 1 params["y"] = target x_adv = graph.generate(x_input, **params) adversarial_images = sess.run(x_adv, feed_dict={x_input: images}) print("Image:{}, diff:{}".format( filenames[0], np.sum(np.abs(images[0] - adversarial_images[0])))) if is_return_orig_images: yield filenames, adversarial_images, images else: yield filenames, adversarial_images
def train_child(t, p, m, load_dict=False): # model = nn.DataParallel(TestCNN().cuda(1), device_ids=[1, 2, 3]) raw_model = TestCNN().cuda(0) model = TestCNN().cuda(0) tf_model = convert_pytorch_model_to_tf(model) cleverhans_model = CallableModelWrapper(tf_model, output_layer='logits') session = tf.Session() x_op = tf.placeholder(tf.float32, shape=(None, 3, 32, 32)) fgsm = FastGradientMethod(cleverhans_model, sess=session) # stm = SpatialTransformationMethod(cleverhans_model, sess=session) # cw2 = CarliniWagnerL2(cleverhans_model, sess=session) pgd = ProjectedGradientDescent(cleverhans_model, sess=session) noise = Noise(cleverhans_model, sess=session) mim = MomentumIterativeMethod(cleverhans_model, sess=session) df = DeepFool(cleverhans_model, sess=session) tf_raw_model = convert_pytorch_model_to_tf(raw_model) cleverhans_raw_model = CallableModelWrapper(tf_raw_model, output_layer='logits') # pgd_raw = ProjectedGradientDescent(cleverhans_raw_model, sess=session) noise_raw = Noise(cleverhans_raw_model, sess=session) def fgsm_op(x, eps): att = fgsm.generate(x_op, eps=eps) return session.run(att, feed_dict={x_op: x}) # def stm_op(x, eps): # att = stm.generate(x_op, batch_size=len(x), dx_min=-0.1*eps, dx_max=0.1*eps, dy_min=-0.1*eps, dy_max=0.1*eps, angle_min=-30*eps, angle_max=30*eps) # return session.run(att, feed_dict={x_op: x}) # def cw2_op(x, eps): # att = cw2.generate(x_op, max_iterations=3) def pgd_op(x, eps): att = pgd.generate(x_op, eps=eps, eps_iter=eps * 0.2, nb_iter=3) return session.run(att, feed_dict={x_op: x}) # def pgd_raw_op(x, eps): # att = pgd_raw.generate(x_op, eps=eps, eps_iter=eps * 0.2, nb_iter=3) # return session.run(att, feed_dict={x_op: x}) def noise_op(x, eps): att = noise.generate(x_op, eps=eps) return session.run(att, feed_dict={x_op: x}) def noise_raw_op(x, eps): att = noise_raw.generate(x_op, eps=eps) return session.run(att, feed_dict={x_op: x}) def df_op(x): att = df.generate(x_op, nb_candidate=10, max_iter=3) return session.run(att, feed_dict={x_op: x}) def mim_op(x, eps): att = mim.generate(x_op, eps=eps, eps_iter=eps * 0.2) return session.run(att, feed_dict={x_op: x}) def attack_train(x): attacks = [fgsm_op, pgd_op, mim_op] attacks_name = ['FGSM', 'PGD', 'MIM'] eps = [[0.03, 0.3], [0.03, 0.3], [0.03, 0.3]] train_x_adv = x.copy() adv_type = np.random.randint(SUBPOLICY_COUNT, size=len(train_x_adv)) for i, (ti, pi, mi) in enumerate( tqdm(zip(t, p, m), total=len(t), desc='Subpolicy: ', leave=False)): adv_i = train_x_adv[adv_type == i] for j, (tj, pj, mj) in enumerate( tqdm(zip(ti, pi, mi), total=len(ti), desc='Operation: ', leave=False)): tj, pj, mj = (*tj, *pj, *mj) adv_j = adv_i[np.random.randn(len(adv_i)) < pj] for i in tqdm(range(0, len(adv_j), BATCH_SIZE), desc=attacks_name[tj] + ': ', leave=False): adv_j[i:][:BATCH_SIZE] = attacks[tj]( adv_j[i:][:BATCH_SIZE], (mj + 1) / MAGN_COUNT * (eps[tj][1] - eps[tj][0]) + eps[tj][0]) return train_x_adv optimizer = optim.SGD(model.parameters(), lr=1e-3) raw_optimizer = optim.SGD(raw_model.parameters(), lr=1e-3) train_x_adv = attack_train(train_x) adv_trainset = torch.utils.data.TensorDataset( torch.tensor(train_x_adv, dtype=torch.float), torch.tensor(train_y, dtype=torch.long)) adv_trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True, num_workers=4) if load_dict: model.load_state_dict(torch.load('black_eval_runs/model.pt')) optimizer.load_state_dict(torch.load('black_eval_runs/optimizer.pt')) raw_model.load_state_dict(torch.load('black_eval_runs/raw_model.pt')) raw_optimizer.load_state_dict( torch.load('black_eval_runs/raw_optimizer.pt')) model.train() batch_tqdm = tqdm(adv_trainloader, leave=False) for x, y in batch_tqdm: optimizer.zero_grad() output = model(x.cuda(0)) loss = criterion(output, y.cuda(0)) loss.backward() optimizer.step() acc = torch.sum(output.cpu().argmax(axis=1) == y) / y.size(0) batch_tqdm.set_description(f'adv {loss:.3f} {acc:.3f}') batch_tqdm = tqdm(trainloader, leave=False) raw_model.train() for x, y in batch_tqdm: raw_optimizer.zero_grad() output = raw_model(x.cuda(0)) loss = criterion(output, y.cuda(0)) loss.backward() raw_optimizer.step() acc = torch.sum(output.cpu().argmax(axis=1) == y) / y.size(0) batch_tqdm.set_description(f'raw {loss:.3f} {acc:.3f}') with torch.no_grad(): model.eval() batch_tqdm = tqdm(valloader, leave=False) tot_acc = 0 for x, y in batch_tqdm: output = model(x.cuda(0)) acc = float(torch.sum(output.cpu().argmax(axis=1) == y)) tot_acc += acc adv_raw_acc = tot_acc / len(val_x) val_x_adv = np.zeros_like(val_x) for i in tqdm(range(0, len(val_x_adv), BATCH_SIZE), desc='Noise: ', leave=False): val_x_adv[i:][:BATCH_SIZE] = noise_op(val_x[i:][:BATCH_SIZE], 0.3) adv_valset = torch.utils.data.TensorDataset( torch.tensor(val_x_adv, dtype=torch.float), torch.tensor(val_y, dtype=torch.long)) adv_valloader = torch.utils.data.DataLoader(adv_valset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4) batch_tqdm = tqdm(adv_valloader, leave=False) tot_acc = 0 for x, y in batch_tqdm: output = model(x.cuda(0)) acc = float(torch.sum(output.cpu().argmax(axis=1) == y)) tot_acc += acc adv_adv_acc = tot_acc / len(val_x) raw_model.eval() batch_tqdm = tqdm(valloader, leave=False) tot_acc = 0 for x, y in batch_tqdm: output = raw_model(x.cuda(0)) acc = float(torch.sum(output.cpu().argmax(axis=1) == y)) tot_acc += acc raw_raw_acc = tot_acc / len(val_x) val_x_adv = np.zeros_like(val_x) for i in tqdm(range(0, len(val_x_adv), BATCH_SIZE), desc='Noise: ', leave=False): val_x_adv[i:][:BATCH_SIZE] = noise_raw_op(val_x[i:][:BATCH_SIZE], 0.3) adv_valset = torch.utils.data.TensorDataset( torch.tensor(val_x_adv, dtype=torch.float), torch.tensor(val_y, dtype=torch.long)) adv_valloader = torch.utils.data.DataLoader(adv_valset, batch_size=BATCH_SIZE, shuffle=False, num_workers=4) batch_tqdm = tqdm(adv_valloader, leave=False) tot_acc = 0 for x, y in batch_tqdm: output = raw_model(x.cuda(0)) acc = float(torch.sum(output.cpu().argmax(axis=1) == y)) tot_acc += acc raw_adv_acc = tot_acc / len(val_x) with open('black_eval_runs/acc.csv', 'a') as f: f.write(f'{adv_raw_acc},{adv_adv_acc},{raw_raw_acc},{raw_adv_acc}\n') print( f'adv {adv_raw_acc:.3f} -> {adv_adv_acc:.3f} | raw {raw_raw_acc:.3f} -> {raw_adv_acc:.3f}' ) torch.save(model.state_dict(), 'black_eval_runs/model.pt') torch.save(optimizer.state_dict(), 'black_eval_runs/optimizer.pt') torch.save(raw_model.state_dict(), 'black_eval_runs/raw_model.pt') torch.save(raw_optimizer.state_dict(), 'black_eval_runs/raw_optimizer.pt')
def setUp(self): super(TestDeepFool, self).setUp() self.sess = tf.Session() self.model = SimpleModel() self.attack = DeepFool(self.model, sess=self.sess)
def attack_classifier(sess, x, model, x_test, attack_method="fgsm", target=None, batch_size=128): if attack_method == "fgsm": from cleverhans.attacks import FastGradientMethod params = {'eps': 8/255, 'clip_min': 0., 'clip_max': 1. } if target is not None: params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0)) method = FastGradientMethod(model, sess=sess) elif attack_method == "basic_iterative": from cleverhans.attacks import BasicIterativeMethod params = {'eps': 8./255, 'eps_iter': 1./255, 'nb_iter': 10, 'clip_min': 0., 'clip_max': 1., 'ord': np.inf } if target is not None: params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0)) method = BasicIterativeMethod(model,sess = sess) elif attack_method == "momentum_iterative": from cleverhans.attacks import MomentumIterativeMethod params = {'eps':8/255, 'eps_iter':1/255, 'nb_iter': 10, 'clip_min': 0., 'clip_max': 1. } if target is not None: params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0)) method = MomentumIterativeMethod(model,sess = sess) elif attack_method == "saliency": from cleverhans.attacks import SaliencyMapMethod params = {'theta':8/255, 'gamma':0.1, 'clip_min': 0., 'clip_max': 1. } assert target is None method = SaliencyMapMethod(model,sess = sess) elif attack_method == "virtual": from cleverhans.attacks import VirtualAdversarialMethod params = {'eps':8/255, 'num_iterations':10, 'xi' :1e-6, 'clip_min': 0., 'clip_max': 1. } assert target is None method = VirtualAdversarialMethod(model,sess = sess) elif attack_method == "cw": from cleverhans.attacks import CarliniWagnerL2 params = { "confidence":0, "batch_size":128, "learning_rate":1e-4, "binary_search_steps":10, "max_iterations":1000, "abort_early": True, "initial_const":1e-2, "clip_min":0, "clip_max":1 } if target is not None: params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0)) method = CarliniWagnerL2(model,sess = sess) elif attack_method == "elastic_net": from cleverhans.attacks import ElasticNetMethod params = { "fista": "FISTA", "beta": 0.1, "decision_rule":"EN", "confidence":0, "batch_size":128, "learning_rate":1e-4, "binary_search_steps":10, "max_iterations":1000, "abort_early": True, "initial_const":1e-2, "clip_min":0, "clip_max":1 } if target is not None: params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0)) method = ElasticNetMethod(model,sess = sess) elif attack_method == "deepfool": from cleverhans.attacks import DeepFool params = { "nb_candidate":10, "overshoot":1e-3, "max_iter":100, "nb_classes":10, "clip_min":0, "clip_max":1 } assert target is None method = DeepFool(model,sess = sess) elif attack_method == "lbfgs": from cleverhans.attacks import LBFGS params = { 'batch_size':128, "binary_search_steps":10, "max_iterations":1000, "initial_const":1e-2, 'clip_min': 0., 'clip_max': 1. } assert target is not None params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0)) method = LBFGS(model,sess = sess) elif attack_method == "madry": from cleverhans.attacks import MadryEtAl params = {'eps':8/255, 'eps_iter':1/255, 'nb_iter':10, 'ord':np.inf, 'clip_min': 0., 'clip_max': 1. } if target is not None: params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0)) method = MadryEtAl(model, sess = sess) elif attack_method == "SPSA": from cleverhans.attacks import SPSA params = { 'epsilon':1/255, 'num_steps':10, 'is_targeted':False, 'early_stop_loss_threshold':None, 'learning_rate':0.01, 'delta':0.01, 'batch_size':128, 'spsa_iters':1, 'is_debug':False } if target is not None: params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0)) params["is_targeted"] = True method = SPSA(model, sess = sess) else: raise ValueError("Can not recognize this attack method: %s" % attack_method) adv_x = method.generate(x, **params) num_batch = x_test.shape[0] // batch_size adv_imgs = [] for i in range(num_batch): x_feed = x_test[i*batch_size:(i+1)*batch_size] #y_feed = y_test[i*batch_size:(i+1)*batch_size] adv_img = sess.run(adv_x, feed_dict={x: x_feed}) adv_imgs.append(adv_img) adv_imgs = np.concatenate(adv_imgs, axis=0) return adv_imgs
j = 0 for i in range(0,x_test.shape[0]): if np.argmax(model.predict(x_test[i:i+1])) == np.argmax(y_test[i]): adv_inputs[j] = x_test[i] adv_labels[j] = y_test[i] # csvFile1.append([[i,j]]) j += 1 adv_inputs = adv_inputs[:100] adv_labels = adv_labels[:100] print("Legitimate test accuracy = %0.3f" % (j/y_test.shape[0])) print("Dataset of %d to be attacked." % adv_inputs.shape[0]) print(adv_inputs.shape, adv_labels.shape) # Attack wrap = KerasModelWrapper(model) deepfool = DeepFool(wrap, sess=sess) params = {} x_adv_1 = deepfool.generate_np(adv_inputs[:20], **params) x_adv_2 = deepfool.generate_np(adv_inputs[20:40], **params) x_adv_3 = deepfool.generate_np(adv_inputs[40:60], **params) x_adv_4 = deepfool.generate_np(adv_inputs[60:80], **params) x_adv_5 = deepfool.generate_np(adv_inputs[80:], **params) x_adv = np.concatenate((x_adv_1, x_adv_2, x_adv_3, x_adv_4, x_adv_5), axis=0) score = model.evaluate(x_adv, adv_labels, verbose=0) print('Adv. Test accuracy: %0.3f' % score[1]) # Initialize random choosing of adversarial images num_examples = 100 index_list = list(range(x_adv.shape[0])) import random
'clip_min': 0., 'clip_max': 1. } adv_x = attack.generate(x, **params) adv_x = tf.stop_gradient(adv_x) print('Epsilon:', eps) f.write('Epsilon: ' + str(eps) + '\n') save_images(kmodel, adv_x, x_test, y_test, preds1, save_loc + '_e' + str(eps)) # ------------------------------------------------------------------------------- # Attack DeepFool if FLAGS.attack == 'DeepFool': attack = DeepFool(wrap, sess=sess) params = { 'nb_candidate': 10, 'max_iter': 100, 'clip_min': 0., 'clip_max': 1. } adv_x = attack.generate(x, **params) save_images(kmodel, adv_x, x_test, y_test, preds1, save_loc) # ------------------------------------------------------------------------------- # Attack C&W if FLAGS.attack == 'CW': attack = CarliniWagnerL2(wrap, sess=sess) params = { 'batch_size': 1,
def main(args): normalize = Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) transform = Compose([Resize(256), CenterCrop(224), ToTensor(), normalize]) dataset = ImageDataset(args.image_folder, transform=transform, return_paths=True) # n_images = len(dataset) dataloader = DataLoader(dataset, shuffle=False, batch_size=args.batch_size, pin_memory=True, num_workers=0) model = models.resnet50(pretrained=True).to(args.device) model.eval() config = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1, allow_soft_placement=True, device_count={'CPU': 1}) sess = tf.Session(config=config) x_op = tf.placeholder(tf.float32, shape=( None, 3, 224, 224, )) tf_model = convert_pytorch_model_to_tf(model, args.device) cleverhans_model = CallableModelWrapper(tf_model, output_layer='logits') # compute clip_min and clip_max suing a full black and a full white image clip_min = normalize(torch.zeros(3, 1, 1)).min().item() clip_max = normalize(torch.ones(3, 1, 1)).max().item() eps = args.eps / 255. eps_iter = 20 nb_iter = 10 args.ord = np.inf if args.ord < 0 else args.ord grad_params = {'eps': eps, 'ord': args.ord} common_params = {'clip_min': clip_min, 'clip_max': clip_max} iter_params = {'eps_iter': eps_iter / 255., 'nb_iter': nb_iter} attack_name = '' if args.attack == 'fgsm': attack_name = '_L{}_eps{}'.format(args.ord, args.eps) attack_op = FastGradientMethod(cleverhans_model, sess=sess) attack_params = {**common_params, **grad_params} elif args.attack == 'iter': attack_name = '_L{}_eps{}_epsi{}_i{}'.format(args.ord, args.eps, eps_iter, nb_iter) attack_op = BasicIterativeMethod(cleverhans_model, sess=sess) attack_params = {**common_params, **grad_params, **iter_params} elif args.attack == 'm-iter': attack_name = '_L{}_eps{}_epsi{}_i{}'.format(args.ord, args.eps, eps_iter, nb_iter) attack_op = MomentumIterativeMethod(cleverhans_model, sess=sess) attack_params = {**common_params, **grad_params, **iter_params} elif args.attack == 'pgd': attack_name = '_L{}_eps{}_epsi{}_i{}'.format(args.ord, args.eps, eps_iter, nb_iter) attack_op = MadryEtAl(cleverhans_model, sess=sess) attack_params = {**common_params, **grad_params, **iter_params} elif args.attack == 'jsma': attack_op = SaliencyMapMethod(cleverhans_model, sess=sess) attack_params = {'theta': eps, 'symbolic_impl': False, **common_params} elif args.attack == 'deepfool': attack_op = DeepFool(cleverhans_model, sess=sess) attack_params = common_params elif args.attack == 'cw': attack_op = CarliniWagnerL2(cleverhans_model, sess=sess) attack_params = common_params elif args.attack == 'lbfgs': attack_op = LBFGS(cleverhans_model, sess=sess) target = np.zeros((1, 1000)) target[0, np.random.randint(1000)] = 1 y = tf.placeholder(tf.float32, target.shape) attack_params = {'y_target': y, **common_params} attack_name = args.attack + attack_name print('Running [{}]. Params: {}'.format(args.attack.upper(), attack_params)) adv_x_op = attack_op.generate(x_op, **attack_params) adv_preds_op = tf_model(adv_x_op) preds_op = tf_model(x_op) n_success = 0 n_processed = 0 progress = tqdm(dataloader) for paths, x in progress: progress.set_description('ATTACK') z, adv_x, adv_z = sess.run([preds_op, adv_x_op, adv_preds_op], feed_dict={ x_op: x, y: target }) src, dst = np.argmax(z, axis=1), np.argmax(adv_z, axis=1) success = src != dst success_paths = np.array(paths)[success] success_adv_x = adv_x[success] success_src = src[success] success_dst = dst[success] n_success += success_adv_x.shape[0] n_processed += x.shape[0] progress.set_postfix( {'Success': '{:3.2%}'.format(n_success / n_processed)}) progress.set_description('SAVING') for p, a, s, d in zip(success_paths, success_adv_x, success_src, success_dst): path = '{}_{}_src{}_dst{}.npz'.format(p, attack_name, s, d) path = os.path.join(args.out_folder, path) np.savez_compressed(path, img=a)
def JSMA_FGSM_BIM(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=6, batch_size=128, learning_rate=0.001, clean_train=True, testing=False, backprop_through_attack=False, nb_filters=64): """ MNIST cleverhans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param clean_train: perform normal training on clean examples only before performing adversarial training. :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate :param backprop_through_attack: If True, backprop through adversarial example construction process during adversarial training. :param clean_train: if true, train on clean examples :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session sess = tf.Session() # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) source_samples = batch_size # Use label smoothing # Hopefully this doesn't screw up JSMA... assert Y_train.shape[1] == 10 label_smooth = .1 Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) y = tf.placeholder(tf.float32, shape=(None, 10)) model_path = "models/mnist" # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } eval_par = {'batch_size': batch_size} rng = np.random.RandomState([2017, 8, 30]) if clean_train: model = make_basic_cnn(nb_filters=nb_filters) preds = model.get_probs(x) def evaluate(): # Evaluate the accuracy of the MNIST model on legitimate test # examples eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params) report.clean_train_clean_eval = acc assert X_test.shape[0] == test_end - test_start, X_test.shape print('Test accuracy on legitimate examples: %0.4f' % acc) model_train(sess, x, y, preds, X_train, Y_train, evaluate=evaluate, args=train_params, rng=rng) print("#####Starting attacks on clean model#####") ################################################################# #Clean test against JSMA jsma_params = { 'theta': 1., 'gamma': 0.1, 'clip_min': 0., 'clip_max': 1., 'y_target': None } jsma = SaliencyMapMethod(model, back='tf', sess=sess) adv_x = jsma.generate(x, **jsma_params) preds_adv = model.get_probs(adv_x) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par) print('Clean test accuracy on JSMA adversarial examples: %0.4f' % acc) ################################################################ #Clean test against FGSM fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} fgsm = FastGradientMethod(model, sess=sess) adv_x = fgsm.generate(x, **fgsm_params) preds_adv = model.get_probs(adv_x) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par) print('Clean test accuracy on FGSM adversarial examples: %0.4f' % acc) ################################################################ #Clean test against BIM bim_params = { 'eps': 0.3, 'eps_iter': 0.01, 'nb_iter': 100, 'clip_min': 0., 'clip_max': 1. } bim = BasicIterativeMethod(model, sess=sess) adv_x = bim.generate(x, **bim_params) preds_adv = model.get_probs(adv_x) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par) print('Clean test accuracy on BIM adversarial examples: %0.4f' % acc) ################################################################ #Clean test against EN en_params = { 'binary_search_steps': 1, #'y': None, 'max_iterations': 100, 'learning_rate': 0.1, 'batch_size': source_samples, 'initial_const': 10 } en = ElasticNetMethod(model, back='tf', sess=sess) adv_x = en.generate(x, **en_params) preds_adv = model.get_probs(adv_x) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par) print('Clean test accuracy on EN adversarial examples: %0.4f' % acc) ################################################################ #Clean test against DF deepfool_params = { 'nb_candidate': 10, 'overshoot': 0.02, 'max_iter': 50, 'clip_min': 0., 'clip_max': 1. } deepfool = DeepFool(model, sess=sess) adv_x = deepfool.generate(x, **deepfool_params) preds_adv = model.get_probs(adv_x) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par) print('Clean test accuracy on DF adversarial examples: %0.4f' % acc) ################################################################ #Clean test against VAT vat_params = { 'eps': 2.0, 'num_iterations': 1, 'xi': 1e-6, 'clip_min': 0., 'clip_max': 1. } vat = VirtualAdversarialMethod(model, sess=sess) adv_x = vat.generate(x, **vat_params) preds_adv = model.get_probs(adv_x) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par) print('Clean test accuracy on VAT adversarial examples: %0.4f\n' % acc) ################################################################ print("Repeating the process, using adversarial training\n") # Redefine TF model graph model_2 = make_basic_cnn(nb_filters=nb_filters) preds_2 = model_2(x) ################################################################# #Adversarial test against JSMA jsma_params = { 'theta': 1., 'gamma': 0.1, 'clip_min': 0., 'clip_max': 1., 'y_target': None } jsma = SaliencyMapMethod(model, back='tf', sess=sess) adv_x = jsma.generate(x, **jsma_params) preds_adv_jsma = model.get_probs(adv_x) ################################################################ #Adversarial test against FGSM fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} fgsm = FastGradientMethod(model, sess=sess) adv_x = fgsm.generate(x, **fgsm_params) preds_adv_fgsm = model.get_probs(adv_x) ################################################################ #Adversarial test against BIM bim_params = { 'eps': 0.3, 'eps_iter': 0.01, 'nb_iter': 100, 'clip_min': 0., 'clip_max': 1. } bim = BasicIterativeMethod(model, sess=sess) adv_x = bim.generate(x, **bim_params) preds_adv_bim = model.get_probs(adv_x) ################################################################ #Adversarial test against EN en_params = { 'binary_search_steps': 5, #'y': None, 'max_iterations': 100, 'learning_rate': 0.1, 'batch_size': source_samples, 'initial_const': 10 } en = ElasticNetMethod(model, back='tf', sess=sess) adv_x = en.generate(x, **en_params) preds_adv_en = model.get_probs(adv_x) ################################################################ #Adversarial test against DF deepfool_params = { 'nb_candidate': 10, 'overshoot': 0.02, 'max_iter': 200, 'clip_min': 0., 'clip_max': 1. } deepfool = DeepFool(model, sess=sess) adv_x = deepfool.generate(x, **deepfool_params) preds_adv_df = model.get_probs(adv_x) ################################################################ #Adversarial test against VAT vat_params = { 'eps': 2.0, 'num_iterations': 1, 'xi': 1e-6, 'clip_min': 0., 'clip_max': 1. } vat = VirtualAdversarialMethod(model, sess=sess) adv_x = vat.generate(x, **vat_params) preds_adv_vat = model.get_probs(adv_x) ################################################################ print("#####Evaluate trained model#####") def evaluate_2(): # Evaluate the accuracy of the MNIST model on JSMA adversarial examples acc = model_eval(sess, x, y, preds_adv_jsma, X_test, Y_test, args=eval_par) print('Test accuracy on JSMA adversarial examples: %0.4f' % acc) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv_fgsm, X_test, Y_test, args=eval_par) print('Test accuracy on FGSM adversarial examples: %0.4f' % acc) # Evaluate the accuracy of the MNIST model on BIM adversarial examples acc = model_eval(sess, x, y, preds_adv_bim, X_test, Y_test, args=eval_par) print('Test accuracy on BIM adversarial examples: %0.4f' % acc) # Evaluate the accuracy of the MNIST model on EN adversarial examples acc = model_eval(sess, x, y, preds_adv_en, X_test, Y_test, args=eval_par) print('Test accuracy on EN adversarial examples: %0.4f' % acc) # Evaluate the accuracy of the MNIST model on DF adversarial examples acc = model_eval(sess, x, y, preds_adv_df, X_test, Y_test, args=eval_par) print('Test accuracy on DF adversarial examples: %0.4f' % acc) # Evaluate the accuracy of the MNIST model on VAT adversarial examples acc = model_eval(sess, x, y, preds_adv_vat, X_test, Y_test, args=eval_par) print('Test accuracy on VAT adversarial examples: %0.4f\n' % acc) preds_2_adv = [ preds_adv_jsma, preds_adv_fgsm, preds_adv_bim # ,preds_adv_en # ,preds_adv_df ] model_train(sess, x, y, preds_2, X_train, Y_train, predictions_adv=preds_2_adv, evaluate=evaluate_2, args=train_params, rng=rng)
def eval(sess, model_name, X_train, Y_train, X_test, Y_test, cnn=False, rbf=False, fgsm=False, jsma=False, df=False, bim=False): """ Load model saved in model_name.json and model_name_weights.h5 and evaluate its accuracy on legitimate test samples and adversarial samples. Use cnn=True if the model is CNN based. """ # open text file and output accuracy results to it text_file = open("cifar_results.txt", "w") # load saved model print("Load model ... ") ''' json = open('models/{}.json'.format(model_name), 'r') model = json.read() json.close() loaded_model = model_from_json(model) loaded_model.load_weights("models/{}_weights.h5".format(model_name)) ''' if rbf: loaded_model = load_model("rbfmodels/{}.h5".format(model_name), custom_objects={'RBFLayer': RBFLayer}) text_file.write('Evaluating on rbfmodels/{}.h5\n\n'.format(model_name)) else: loaded_model = load_model("models/{}.h5".format(model_name)) text_file.write('Evaluating on models/{}.h5\n\n'.format(model_name)) # Set placeholders if cnn: x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3)) else: x = tf.placeholder(tf.float32, shape=(None, 3072)) y = tf.placeholder(tf.float32, shape=(None, 10)) predictions = loaded_model(x) accuracy = model_eval(sess, x, y, predictions, X_test, Y_test, args={"batch_size": 128}) text_file.write('Test accuracy on legitimate test examples: {0}\n'.format( str(accuracy))) #print('Test accuracy on legitimate test examples: ' + str(accuracy)) # Craft adversarial examples depending on the input parameters wrap = KerasModelWrapper(loaded_model) # FGSM if fgsm: fgsm = FastGradientMethod(wrap, sess=sess) fgsm_params = {'eps': 0.3} adv_x = fgsm.generate(x, **fgsm_params) adv_x = tf.stop_gradient(adv_x) preds_adv = loaded_model(adv_x) # Evaluate the accuracy of the CIFAR-10 model on adversarial examples accuracy = model_eval(sess, x, y, preds_adv, X_test, Y_test, args={"batch_size": 128}) text_file.write( 'Test accuracy on fgsm adversarial test examples: {0}\n'.format( str(accuracy))) #print('Test accuracy on fgsm adversarial test examples: ' + str(accuracy)) # JSMA if jsma: jsma = SaliencyMapMethod(wrap, sess=sess) jsma_params = { 'theta': 2., 'gamma': 0.145, 'clip_min': 0., 'clip_max': 1., 'y_target': None } adv_x = jsma.generate(x, **jsma_params) adv_x = tf.stop_gradient(adv_x) preds_adv = loaded_model(adv_x) # Evaluate the accuracy of the CIFAR-10 model on adversarial examples accuracy = model_eval(sess, x, y, preds_adv, X_test, Y_test, args={"batch_size": 128}) text_file.write( 'Test accuracy on jsma adversarial test examples: {0}\n'.format( str(accuracy))) #print('Test accuracy on jsma adversarial test examples: ' + str(accuracy)) # DeepFool if df: df = DeepFool(wrap, sess=sess) df_params = {'nb_candidate': 10, 'max_iter': 50} adv_x = df.generate(x, **df_params) adv_x = tf.stop_gradient(adv_x) preds_adv = loaded_model(adv_x) # Evaluate the accuracy of the CIFAR-10 model on adversarial examples accuracy = model_eval(sess, x, y, preds_adv, X_test, Y_test, args={"batch_size": 128}) text_file.write( 'Test accuracy on df adversarial test examples: {0}\n'.format( str(accuracy))) #print('Test accuracy on df adversarial test examples: ' + str(accuracy)) # Basic Iterative Method # Commented out as it is hanging on batch #0 at the moment ''' if bim: bim = ProjectedGradientDescent(wrap, sess=sess) bim_params = {'eps': 0.3} adv_x = bim.generate(x, **bim_params) adv_x = tf.stop_gradient(adv_x) preds_adv = loaded_model(adv_x) # Evaluate the accuracy of the CIFAR-10 model on adversarial examples accuracy = model_eval(sess, x, y, preds_adv, X_test, Y_test, args={ "batch_size" : 128}) text_file.write('Test accuracy on bim adversarial test examples: {0}\n'.format(str(accuracy))) #print('Test accuracy on bim adversarial test examples: ' + str(accuracy)) ''' print('Accuracy results outputted to cifar10_results.txt') text_file.close() # Close TF session sess.close()
def mnist_tutorial_jsma(train_start=0, train_end=5500, test_start=0, test_end=1000, nb_epochs=8, batch_size=100, nb_classes=10, nb_filters=64, learning_rate=0.001): """ MNIST tutorial for the Jacobian-based saliency map approach (JSMA) :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param nb_classes: number of output classes :param learning_rate: learning rate for training :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Create TF session and set as Keras backend session sess = tf.Session() print("Created TensorFlow session.") set_log_level(logging.DEBUG) # Get MNIST test data X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) label_smooth = .1 Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) y = tf.placeholder(tf.float32, shape=(None, 10)) # Define TF model graph model = make_basic_cnn() preds = model(x) print("Defined TensorFlow model graph.") ########################################################################### # Training the model using TensorFlow ########################################################################### # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } # sess.run(tf.global_variables_initializer()) rng = np.random.RandomState([2017, 8, 30]) print("x_train shape: ", X_train.shape) print("y_train shape: ", Y_train.shape) # do not log model_train(sess, x, y, preds, X_train, Y_train, args=train_params,verbose=False, rng=rng) f_out_clean = open("Clean_jsma_elastic_against5.log", "w") # Evaluate the accuracy of the MNIST model on legitimate test examples eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params) assert X_test.shape[0] == test_end - test_start, X_test.shape print('Test accuracy on legitimate test examples: {0}'.format(accuracy)) f_out_clean.write('Test accuracy on legitimate test examples: ' + str(accuracy) + '\n') # Clean test against JSMA jsma_params = {'theta': 1., 'gamma': 0.1, 'clip_min': 0., 'clip_max': 1., 'y_target': None} jsma = SaliencyMapMethod(model, back='tf', sess=sess) adv_x_jsma = jsma.generate(x, **jsma_params) preds_adv_jsma = model.get_probs(adv_x_jsma) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv_jsma, X_test, Y_test, args=eval_params) print('Clean test accuracy on JSMA adversarial examples: %0.4f' % acc) f_out_clean.write('Clean test accuracy on JSMA adversarial examples: ' + str(acc) + '\n') ################################################################ # Clean test against FGSM fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} fgsm = FastGradientMethod(model, sess=sess) adv_x_fgsm = fgsm.generate(x, **fgsm_params) preds_adv_fgsm = model.get_probs(adv_x_fgsm) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv_fgsm, X_test, Y_test, args=eval_params) print('Clean test accuracy on FGSM adversarial examples: %0.4f' % acc) f_out_clean.write('Clean test accuracy on FGSM adversarial examples: ' + str(acc) + '\n') ################################################################ # Clean test against BIM bim_params = {'eps': 0.3, 'eps_iter': 0.01, 'nb_iter': 100, 'clip_min': 0., 'clip_max': 1.} bim = BasicIterativeMethod(model, sess=sess) adv_x_bim = bim.generate(x, **bim_params) preds_adv_bim = model.get_probs(adv_x_bim) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv_bim, X_test, Y_test, args=eval_params) print('Clean test accuracy on BIM adversarial examples: %0.4f' % acc) f_out_clean.write('Clean test accuracy on BIM adversarial examples: ' + str(acc) + '\n') ################################################################ # Clean test against EN en_params = {'binary_search_steps': 1, # 'y': None, 'max_iterations': 100, 'learning_rate': 0.1, 'batch_size': batch_size, 'initial_const': 10} en = ElasticNetMethod(model, back='tf', sess=sess) adv_x_en = en.generate(x, **en_params) preds_adv_en = model.get_probs(adv_x_en) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv_en, X_test, Y_test, args=eval_params) print('Clean test accuracy on EN adversarial examples: %0.4f' % acc) f_out_clean.write('Clean test accuracy on EN adversarial examples: ' + str(acc) + '\n') ################################################################ # Clean test against DF deepfool_params = {'nb_candidate': 10, 'overshoot': 0.02, 'max_iter': 50, 'clip_min': 0., 'clip_max': 1.} deepfool = DeepFool(model, sess=sess) adv_x_df = deepfool.generate(x, **deepfool_params) preds_adv_df = model.get_probs(adv_x_df) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv_df, X_test, Y_test, args=eval_params) print('Clean test accuracy on DF adversarial examples: %0.4f' % acc) f_out_clean.write('Clean test accuracy on DF adversarial examples: ' + str(acc) + '\n') ################################################################ # Clean test against VAT vat_params = {'eps': 2.0, 'num_iterations': 1, 'xi': 1e-6, 'clip_min': 0., 'clip_max': 1.} vat = VirtualAdversarialMethod(model, sess=sess) adv_x_vat = vat.generate(x, **vat_params) preds_adv_vat = model.get_probs(adv_x_vat) # Evaluate the accuracy of the MNIST model on FGSM adversarial examples acc = model_eval(sess, x, y, preds_adv_vat, X_test, Y_test, args=eval_params) print('Clean test accuracy on VAT adversarial examples: %0.4f\n' % acc) f_out_clean.write('Clean test accuracy on VAT adversarial examples: ' + str(acc) + '\n') f_out_clean.close() ########################################################################### # Craft adversarial examples using the Jacobian-based saliency map approach ########################################################################### print('Crafting ' + str(X_train.shape[0]) + ' * ' + str(nb_classes-1) + ' adversarial examples') model_2 = make_basic_cnn() preds_2 = model(x) # need this for constructing the array sess.run(tf.global_variables_initializer()) # run this again # sess.run(tf.global_variables_initializer()) # 1. Instantiate a SaliencyMapMethod attack object jsma = SaliencyMapMethod(model_2, back='tf', sess=sess) jsma_params = {'theta': 1., 'gamma': 0.1, 'clip_min': 0., 'clip_max': 1., 'y_target': None} adv_random = jsma.generate(x, **jsma_params) preds_adv_random = model_2.get_probs(adv_random) # 2. Instantiate FGSM attack fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} fgsm = FastGradientMethod(model_2, sess=sess) adv_x_fgsm = fgsm.generate(x, **fgsm_params) preds_adv_fgsm = model_2.get_probs(adv_x_fgsm) # 3. Instantiate Elastic net attack en_params = {'binary_search_steps': 5, #'y': None, 'max_iterations': 100, 'learning_rate': 0.1, 'batch_size': batch_size, 'initial_const': 10} enet = ElasticNetMethod(model_2, sess=sess) adv_x_en = enet.generate(x, **en_params) preds_adv_elastic_net = model_2.get_probs(adv_x_en) # 4. Deepfool deepfool_params = {'nb_candidate':10, 'overshoot':0.02, 'max_iter': 50, 'clip_min': 0., 'clip_max': 1.} deepfool = DeepFool(model_2, sess=sess) adv_x_df = deepfool.generate(x, **deepfool_params) preds_adv_deepfool = model_2.get_probs(adv_x_df) # 5. Base Iterative bim_params = {'eps': 0.3, 'eps_iter': 0.01, 'nb_iter': 100, 'clip_min': 0., 'clip_max': 1.} base_iter = BasicIterativeMethod(model_2, sess=sess) adv_x_bi = base_iter.generate(x, **bim_params) preds_adv_base_iter = model_2.get_probs(adv_x_bi) # 6. C & W Attack cw = CarliniWagnerL2(model_2, back='tf', sess=sess) cw_params = {'binary_search_steps': 1, # 'y': None, 'max_iterations': 100, 'learning_rate': 0.1, 'batch_size': batch_size, 'initial_const': 10} adv_x_cw = cw.generate(x, **cw_params) preds_adv_cw = model_2.get_probs(adv_x_cw) #7 vat_params = {'eps': 2.0, 'num_iterations': 1, 'xi': 1e-6, 'clip_min': 0., 'clip_max': 1.} vat = VirtualAdversarialMethod(model_2, sess=sess) adv_x = vat.generate(x, **vat_params) preds_adv_vat = model_2.get_probs(adv_x) # ==> generate 10 targeted classes for every train data regardless # This call runs the Jacobian-based saliency map approach # Loop over the samples we want to perturb into adversarial examples X_train_adv_set = [] Y_train_adv_set = [] for index in range(X_train.shape[0]): print('--------------------------------------') x_val = X_train[index:(index+1)] y_val = Y_train[index] # add normal sample in!!!! X_train_adv_set.append(x_val) Y_train_adv_set.append(y_val) # We want to find an adversarial example for each possible target class # (i.e. all classes that differ from the label given in the dataset) current_class = int(np.argmax(y_val)) target_classes = other_classes(nb_classes, current_class) # Loop over all target classes for target in target_classes: # print('Generating adv. example for target class %i' % target) # This call runs the Jacobian-based saliency map approach one_hot_target = np.zeros((1, nb_classes), dtype=np.float32) one_hot_target[0, target] = 1 jsma_params['y_target'] = one_hot_target adv_x = jsma.generate_np(x_val, **jsma_params) # append to X_train_adv_set and Y_train_adv_set X_train_adv_set.append(adv_x) Y_train_adv_set.append(y_val) # shape is: (1, 28, 28, 1) # print("adv_x shape is: ", adv_x.shape) # check for success rate # res = int(model_argmax(sess, x, preds, adv_x) == target) print('-------------Finished Generating Np Adversarial Data-------------------------') X_train_data = np.concatenate(X_train_adv_set, axis=0) Y_train_data = np.stack(Y_train_adv_set, axis=0) print("X_train_data shape is: ", X_train_data.shape) print("Y_train_data shape is: ", Y_train_data.shape) # saves the output so later no need to re-fun file np.savez("jsma_training_data.npz", x_train=X_train_data , y_train=Y_train_data) # >>> data = np.load('/tmp/123.npz') # >>> data['a'] f_out = open("Adversarial_jsma_elastic_against5.log", "w") # evaluate the function against 5 attacks # fgsm, base iterative, jsma, elastic net, and deepfool def evaluate_against_all(): # 1 Clean Data eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params) print('Legitimate accuracy: %0.4f' % accuracy) tmp = 'Legitimate accuracy: '+ str(accuracy) + "\n" f_out.write(tmp) # 2 JSMA accuracy = model_eval(sess, x, y, preds_adv_random, X_test, Y_test, args=eval_params) print('JSMA accuracy: %0.4f' % accuracy) tmp = 'JSMA accuracy:'+ str(accuracy) + "\n" f_out.write(tmp) # 3 FGSM accuracy = model_eval(sess, x, y, preds_adv_fgsm, X_test, Y_test, args=eval_params) print('FGSM accuracy: %0.4f' % accuracy) tmp = 'FGSM accuracy:' + str(accuracy) + "\n" f_out.write(tmp) # 4 Base Iterative accuracy = model_eval(sess, x, y, preds_adv_base_iter, X_test, Y_test, args=eval_params) print('Base Iterative accuracy: %0.4f' % accuracy) tmp = 'Base Iterative accuracy:' + str(accuracy) + "\n" f_out.write(tmp) # 5 Elastic Net accuracy = model_eval(sess, x, y, preds_adv_elastic_net, X_test, Y_test, args=eval_params) print('Elastic Net accuracy: %0.4f' % accuracy) tmp = 'Elastic Net accuracy:' + str(accuracy) + "\n" f_out.write(tmp) # 6 DeepFool accuracy = model_eval(sess, x, y, preds_adv_deepfool, X_test, Y_test, args=eval_params) print('DeepFool accuracy: %0.4f' % accuracy) tmp = 'DeepFool accuracy:' + str(accuracy) + "\n" f_out.write(tmp) # 7 C & W Attack accuracy = model_eval(sess, x, y, preds_adv_cw, X_test, Y_test, args=eval_params) print('C & W accuracy: %0.4f' % accuracy) tmp = 'C & W accuracy:' + str(accuracy) + "\n" f_out.write(tmp) f_out.write("*******End of Epoch***********\n\n") # 8 Virtual Adversarial accuracy = model_eval(sess, x, y, preds_adv_vat, X_test, Y_test, args=eval_params) print('VAT accuracy: %0.4f' % accuracy) tmp = 'VAT accuracy:' + str(accuracy) + "\n" f_out.write(tmp) f_out.write("*******End of Epoch***********\n\n") print("*******End of Epoch***********\n\n") # report.adv_train_adv_eval = accuracy print("Now Adversarial Training with Elastic Net + modified X_train and Y_train") # trained_model.out train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'train_dir': '/home/stephen/PycharmProjects/jsma-runall-mac/', 'filename': 'trained_model.out' } model_train(sess, x, y, preds_2, X_train_data, Y_train_data, predictions_adv=preds_adv_elastic_net, evaluate=evaluate_against_all, verbose=False, args=train_params, rng=rng) # Close TF session sess.close() return report
model = old_models.ShallowConvNet(nb_classes=nb_classes, Chans=channels, Samples=samples) else: raise Exception('No such model:{}'.format(model_used)) model.compile(optimizer='Adam', loss='sparse_categorical_crossentropy', metrics=['acc']) model.load_weights(model_path) y_test = y_test.astype('int32').flatten() y_test_pre = np.argmax(model.predict(x_test), axis=1) ch_model = KerasModelWrapper(model) deepfool = DeepFool(ch_model, back='tf', sess=K.get_session()) raw_acc = np.sum(y_test_pre == y_test) / len(y_test_pre) # np.random.seed(2009) shape = x_test.shape # random_v = a * np.random.rand(1, 1, channels, samples) random_v = a * np.random.uniform(-1, 1, (1, 1, channels, samples)) random_x = x_test + random_v y_rand_pre = np.argmax(model.predict(random_x), axis=1) rand_acc = np.sum(y_rand_pre == y_test) / len(y_rand_pre) v, fool_list = universal_perturbation(model, deepfool, x_train,
def attack_batch(model, in_im, net_name, attack_name, im_list, gt_labels, sample_size, batch_size): logging.basicConfig(filename='Logs/'+net_name+"_"+attack_name+'.log', level=logging.INFO, format='%(asctime)s:%(levelname)s:%(message)s') config = tf.ConfigProto(device_count = {'GPU': 2}) imgs = open(im_list).readlines() # [::10] gt_labels = open(gt_labels).readlines() # [::10] top_1 = 0;top_1_real = 0;fool_rate = 0 isotropic, size = get_params(net_name) imageModel = CallableModelWrapper(model, 'logits') with tf.Session(config=config) as sess: if attack_name=='fgsm': attack = FastGradientMethod(imageModel, back='tf') adv_x = attack.generate(in_im,eps=8,clip_min=-124, clip_max=155) if attack_name=='ifgsm': attack = BasicIterativeMethod(imageModel, back='tf') adv_x = attack.generate(in_im,eps=8,eps_iter=1,nb_iter=12,clip_min=-124, clip_max=155) if attack_name=='cw2': attack = CarliniWagnerL2(imageModel, back='tf') adv_x = attack.generate(in_im,clip_min=-124, clip_max=155) if attack_name=='jsma': attack = SaliencyMapMethod(imageModel, back='tf') adv_x = attack.generate(in_im) if attack_name=='pgd': attack = MadryEtAl(imageModel, back='tf') adv_x = attack.generate(in_im,eps=8,eps_iter=1,nb_iter=12,clip_min=-124, clip_max=155) if attack_name=='deepfool': attack = DeepFool(imageModel, back='tf') adv_x = attack.generate(in_im, sess=sess, clip_min=-124, clip_max=155) sess.run(tf.global_variables_initializer()) img_loader = loader_func(net_name, sess, isotropic, size) batch_im = np.zeros((batch_size, size, size, 3)) for i in range(sample_size/batch_size): lim = min(batch_size, len(imgs)-i*batch_size) for j in range(lim): im = img_loader(imgs[i*batch_size+j].strip()) batch_im[j] = np.copy(im) gt = np.array([int(gt_labels[i*batch_size+j].strip()) for j in range(lim)]) adv_x_np=adv_x.eval(feed_dict={in_im: batch_im}) # Calculate the neural probabilities y_adv_prob=tf.nn.softmax(model(in_im), name="yadv").eval(feed_dict={in_im: adv_x_np}); y_adv = np.argmax(y_adv_prob,1) y_true_prob=tf.nn.softmax(model(in_im), name="ypred").eval(feed_dict={in_im: batch_im}); y_true = np.argmax(y_true_prob,1) # Calculate the top-1, top-1-true accuracies and fooling rate top_1 += np.sum(y_adv == gt); top_1_real += np.sum(y_true == gt) fool_rate += np.sum(y_true != y_adv ) if i != 0 and i % 2 == 0: logging.info("batch: {} ==================================================================".format(i)) logging.info("fooling rate {}".format((fool_rate)/float((i+1)*batch_size)*100)) logging.info("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++") logging.info('Real Top-1 Accuracy = {}'.format( top_1_real/float(sample_size)*100)) logging.info('Top-1 Accuracy = {}'.format((top_1/float(sample_size)*100))) logging.info('Top-1 Fooling Rate = {}'.format(fool_rate/float(sample_size)*100)) logging.info("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")
def main(type="Resnet", dataset="CIFAR10", attack_type="FGM"): size = 256 eval_params = {'batch_size': 128} ############################################# Prepare the Data ##################################################### if dataset == 'CIFAR10': (_, _), (x_test, y_test) = prepare_CIFAR10() num_classes = 10 input_dim = 32 elif dataset == 'CIFAR100': (_, _), (x_test, y_test) = prepare_CIFAR100() num_classes = 100 input_dim = 32 else: (_, _), (x_test, y_test) = prepare_SVHN("./Data/") num_classes = 10 input_dim = 32 x_test = x_test / 255. y_test = keras.utils.to_categorical(y_test, num_classes) ############################################# Prepare the Data ##################################################### config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: # prepare the placeholders x = tf.placeholder(tf.float32, [None, input_dim, input_dim, 3]) y = tf.placeholder(tf.float32, [None, num_classes]) input_output = [] def modelBuilder(x, num_classes, dataset, type, sess, input_output): if len(input_output) == 0: reuse = False # Model/Graph if type == 'End2End': _, tf_model = \ prepare_GBP_End2End(num_classes, inputT=x, sess=sess, checkpoint_dir='./{}_{}/'.format(dataset, type), reuse=reuse) else: _, tf_model = \ prepare_Resnet(num_classes, inputT=x, sess=sess, checkpoint_dir='./{}_{}/'.format(dataset, type), reuse=reuse) input_output.append(x) input_output.append(tf_model.logits) else: reuse = True # Model/Graph if type == 'End2End': _, tf_model = \ prepare_GBP_End2End(num_classes, inputT=x, reuse=reuse) else: _, tf_model = \ prepare_Resnet(num_classes, inputT=x, reuse=reuse) input_output.append(x) input_output.append(tf_model.logits) return tf_model.logits # create an attackable model for the cleverhans model = CallableModelWrapper(lambda placeholder: modelBuilder(placeholder, num_classes, dataset, type, sess, input_output), 'logits') # TODO: check the configurations if attack_type == "FGM": # pass attack = FastGradientMethod(model, back='tf', sess=sess) params = { 'eps' : 0.06, 'clip_min': 0., 'clip_max': 1. } elif attack_type == "CWL2": # pass attack = CarliniWagnerL2(model, back='tf', sess=sess) params = { 'confidence': 0.9, 'batch_size': 128, 'learning_rate': 0.005, } elif attack_type == "DF": # pass attack = DeepFool(model, back='tf', sess=sess) params = { } elif attack_type == "ENM": # configurations checked, quickly tested attack = ElasticNetMethod(model, back='tf', sess=sess) params = { 'confidence': 0.9, 'batch_size': 128, 'learning_rate': 0.005, } elif attack_type == "FFA": # configuration checked attack = FastFeatureAdversaries(model, back='tf', sess=sess) params = { 'eps': 0.06, 'eps_iter': 0.005, 'clip_min': 0., 'clip_max': 1. } elif attack_type == "LBFGS": attack = LBFGS(model, back='tf', sess=sess) params = { 'eps': 0.06, 'clip_min': 0., 'clip_max': 1. } elif attack_type == "MEA": attack = MadryEtAl(model, back='tf', sess=sess) params = { 'eps': 0.06, 'clip_min': 0., 'clip_max': 1. } elif attack_type == "MIM": attack = MomentumIterativeMethod(model, back='tf', sess=sess) params = { 'eps': 0.06, 'clip_min': 0., 'clip_max': 1. } elif attack_type == "SMM": attack = SaliencyMapMethod(model, back='tf', sess=sess) params = { 'eps': 0.06, 'clip_min': 0., 'clip_max': 1. } elif attack_type == "SPSA": attack = SPSA(model, back='tf', sess=sess) params = { 'eps': 0.06, 'clip_min': 0., 'clip_max': 1. } elif attack_type == "VATM": attack = vatm(model, back='tf', sess=sess) params = { 'eps': 0.06, 'clip_min': 0., 'clip_max': 1. } elif attack_type == "VAM": attack = VirtualAdversarialMethod(model, back='tf', sess=sess) params = { 'eps': 0.06, 'clip_min': 0., 'clip_max': 1. } else: raise Exception("I don't recognize {} this attack type. I will use FGM instead.".format(attack_type)) # tf operation adv_x = attack.generate(x, **params) # generate the adversarial examples adv_vals = sess.run(adv_x, feed_dict={x: x_test[:size]}) # notice that "adv_vals" may contain NANs because of the failure of the attack # also the input may not be perturbed at all because of the failure of the attack to_delete = [] for idx, adv in enumerate(adv_vals): # for nan if np.isnan(adv).any(): to_delete.append(idx) # for no perturbation if np.array_equiv(adv, x_test[idx]): to_delete.append(idx) # cleanings adv_vals_cleaned = np.delete(adv_vals, to_delete, axis=0) ori_cleaned = np.delete(x_test[:size], to_delete, axis=0) y_cleaned = np.delete(y_test[:size], to_delete, axis=0) if len(adv_vals_cleaned) == 0: print("No adversarial example is generated!") return print("{} out of {} adversarial examples are generated.".format(len(adv_vals_cleaned), size)) print("The average L_inf distortion is {}".format( np.mean([np.max(np.abs(adv - ori_cleaned[idx])) for idx, adv in enumerate(adv_vals_cleaned)]))) # TODO: visualize the adv_vals accuracy = model_eval(sess, input_output[0], y, tf.nn.softmax(input_output[1]), x_test[:size], y_test[:size], args=eval_params) print('Test accuracy on normal examples: %0.4f' % accuracy) accuracy = model_eval(sess, input_output[0], y, tf.nn.softmax(input_output[1]), adv_vals_cleaned, y_cleaned, args=eval_params) print('Test accuracy on adversarial examples: %0.4f' % accuracy)
# graph = tf.get_default_graph() x_input1 = tf.placeholder(tf.float32, shape=batch_shape) x_input2 = tf.placeholder(tf.float32, shape=batch_shape) prediction = model(x_input2, x_input1) # prediction = sess.run(predictions, feed_dict={phase_train_placeholder: False}) # Define FGSM for the model steps = 1 df_params = { 'nb_classes': 2, 'max_iter': 150, 'clip_min': 0.0, 'clip_max': 1.0, 'nb_candidate': 2 } DF = DeepFool(model, back='tf', sess=sess) adv_x = DF.generate(x_input1, x_input2, faces2_batch, **df_params) inception_vars = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='InceptionResnetV1') saver = tf.train.Saver(inception_vars, max_to_keep=3) # sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)) pretrained_model = '/home/fan/facenet_adversarial_faces/models/facenet/20170512-110547/' if pretrained_model: print('Restoring pretrained model: %s' % pretrained_model) # facenet.load_model(pretrained_model) model_exp = os.path.expanduser(pretrained_model) print('Model directory: %s' % model_exp) _, ckpt_file = facenet.get_model_filenames(model_exp) # print('Metagraph file: %s' % meta_file)
(X_train, y_train), (X_test, y_test) = cifar10.load_data() if __name__ == '__main__': model_keras = keras.models.load_model('model_cifar.h5') batch_size = 512 success = 0 data_size = X_train.shape[0] adv_train = [] for st in range(0, data_size, batch_size): sample = np.array( X_train[st:st + batch_size].reshape(-1, 32 * 32 * 3) / 255, dtype=np.float) # sample = np.array([sample]) sess = keras.backend.get_session() model = KerasModelWrapper(model_keras) attack = DeepFool(model, sess=sess) # print(model.predict(panda.reshape(1, *panda.shape))) param = dict( nb_candidate=10, overshoot=0.01, #overshoot=0.0, max_iter=20, clip_min=0., clip_max=1.) advs = attack.generate_np(sample, **param) # plt.imsave("sample.png", advs[0]) adv_train.append(advs) preb = model_keras.predict(advs).argmax(axis=1).reshape( (sample.shape[0], )) y_sample = model_keras.predict(sample).argmax(axis=1).reshape(
def whitebox(gan, rec_data_path=None, batch_size=128, learning_rate=0.001, nb_epochs=10, eps=0.3, online_training=False, test_on_dev=True, attack_type='fgsm', defense_type='gan', num_tests=-1, num_train=-1): """Based on MNIST tutorial from cleverhans. Args: gan: A `GAN` model. rec_data_path: A string to the directory. batch_size: The size of the batch. learning_rate: The learning rate for training the target models. nb_epochs: Number of epochs for training the target model. eps: The epsilon of FGSM. online_training: Training Defense-GAN with online reconstruction. The faster but less accurate way is to reconstruct the dataset once and use it to train the target models with: `python train.py --cfg <path-to-model> --save_recs` attack_type: Type of the white-box attack. It can be `fgsm`, `rand+fgsm`, or `cw`. defense_type: String representing the type of attack. Can be `none`, `defense_gan`, or `adv_tr`. """ FLAGS = tf.flags.FLAGS # Set logging level to see debug information. set_log_level(logging.WARNING) if defense_type == 'defense_gan': assert gan is not None # Create TF session. if defense_type == 'defense_gan': sess = gan.sess if FLAGS.train_on_recs: assert rec_data_path is not None or online_training else: config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) train_images, train_labels, test_images, test_labels = \ get_cached_gan_data(gan, test_on_dev) rec_test_images = test_images rec_test_labels = test_labels _, _, test_images, test_labels = \ get_cached_gan_data(gan, test_on_dev, orig_data_flag=True) x_shape = [None] + list(train_images.shape[1:]) images_pl = tf.placeholder(tf.float32, shape=[None] + list(train_images.shape[1:])) labels_pl = tf.placeholder(tf.float32, shape=[None] + [train_labels.shape[1]]) if num_tests > 0: test_images = test_images[:num_tests] rec_test_images = rec_test_images[:num_tests] test_labels = test_labels[:num_tests] if num_train > 0: train_images = train_images[:num_train] train_labels = train_labels[:num_train] # GAN defense flag. models = { 'A': model_a, 'B': model_b, 'C': model_c, 'D': model_d, 'E': model_e, 'F': model_f } model = models[FLAGS.model](input_shape=x_shape, nb_classes=train_labels.shape[1]) preds = model.get_probs(images_pl) report = AccuracyReport() def evaluate(): # Evaluate the accuracy of the MNIST model on legitimate test # examples. eval_params = {'batch_size': batch_size} acc = model_eval(sess, images_pl, labels_pl, preds, rec_test_images, rec_test_labels, args=eval_params, feed={K.learning_phase(): 0}) report.clean_train_clean_eval = acc print('Test accuracy on legitimate examples: %0.4f' % acc) train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, } rng = np.random.RandomState([11, 24, 1990]) tf.set_random_seed(11241990) preds_adv = None if FLAGS.defense_type == 'adv_tr': attack_params = { 'eps': FLAGS.fgsm_eps_tr, 'clip_min': 0., 'clip_max': 1. } if gan: if gan.dataset_name == 'celeba': attack_params['clip_min'] = -1.0 attack_obj = FastGradientMethod(model, sess=sess) adv_x_tr = attack_obj.generate(images_pl, **attack_params) adv_x_tr = tf.stop_gradient(adv_x_tr) preds_adv = model(adv_x_tr) model_train(sess, images_pl, labels_pl, preds, train_images, train_labels, args=train_params, rng=rng, predictions_adv=preds_adv, init_all=False, feed={K.learning_phase(): 1}, evaluate=evaluate) # Calculate training error. eval_params = {'batch_size': batch_size} acc = model_eval( sess, images_pl, labels_pl, preds, train_images, train_labels, args=eval_params, feed={K.learning_phase(): 0}, ) print('[#] Accuracy on clean examples {}'.format(acc)) if attack_type is None: return acc, 0, None # Initialize the Fast Gradient Sign Method (FGSM) attack object and # graph. if FLAGS.defense_type == 'defense_gan': z_init_val = None if FLAGS.same_init: z_init_val = tf.constant( np.random.randn(batch_size * gan.rec_rr, gan.latent_dim).astype(np.float32)) model.add_rec_model(gan, z_init_val, batch_size) min_val = 0.0 if gan: if gan.dataset_name == 'celeba': min_val = -1.0 if 'rand' in FLAGS.attack_type: test_images = np.clip( test_images + args.alpha * np.sign(np.random.randn(*test_images.shape)), min_val, 1.0) eps -= args.alpha if 'fgsm' in FLAGS.attack_type: attack_params = { 'eps': eps, 'ord': np.inf, 'clip_min': min_val, 'clip_max': 1. } attack_obj = FastGradientMethod(model, sess=sess) elif FLAGS.attack_type == 'cw': attack_obj = CarliniWagnerL2(model, back='tf', sess=sess) attack_iterations = 100 attack_params = { 'binary_search_steps': 1, 'max_iterations': attack_iterations, 'learning_rate': 10.0, 'batch_size': batch_size, 'initial_const': 100, 'feed': { K.learning_phase(): 0 } } elif FLAGS.attack_type == 'mim': attack_obj = MomentumIterativeMethod(model, back='tf', sess=sess) attack_params = { 'eps': eps, 'ord': np.inf, 'clip_min': min_val, 'clip_max': 1. } elif FLAGS.attack_type == 'deepfool': attack_obj = DeepFool(model, back='tf', sess=sess) attack_params = { 'eps': eps, 'clip_min': min_val, 'clip_max': 1., 'nb_candidate': 2, 'nb_classes': 2 } elif FLAGS.attack_type == 'lbfgs': attack_obj = LBFGS(model, back='tf', sess=sess) attack_params = {'clip_min': min_val, 'clip_max': 1.} adv_x = attack_obj.generate(images_pl, **attack_params) eval_par = {'batch_size': batch_size} if FLAGS.defense_type == 'defense_gan': preds_adv = model.get_probs(adv_x) num_dims = len(images_pl.get_shape()) avg_inds = list(range(1, num_dims)) diff_op = tf.reduce_mean(tf.square(adv_x - images_pl), axis=avg_inds) acc_adv, roc_info = model_eval_gan( sess, images_pl, labels_pl, preds_adv, None, test_images=test_images, test_labels=test_labels, args=eval_par, feed={K.learning_phase(): 0}, diff_op=diff_op, ) print('Test accuracy on adversarial examples: %0.4f\n' % acc_adv) else: preds_adv = model(adv_x) roc_info = None acc_adv = model_eval(sess, images_pl, labels_pl, preds_adv, test_images, test_labels, args=eval_par, feed={K.learning_phase(): 0}) print('Test accuracy on adversarial examples: %0.4f\n' % acc_adv) if FLAGS.debug and gan is not None: # To see some qualitative results. adv_x_debug = adv_x[:batch_size] images_pl_debug = images_pl[:batch_size] debug_dir = os.path.join('debug', 'whitebox', FLAGS.debug_dir) ensure_dir(debug_dir) reconstructed_tensors = gan.reconstruct(adv_x_debug, batch_size=batch_size, reconstructor_id=2) x_rec_orig = gan.reconstruct(images_tensor, batch_size=batch_size, reconstructor_id=3) x_adv_sub_val = sess.run(x_adv_sub, feed_dict={ images_tensor: images_pl_debug, K.learning_phase(): 0 }) sess.run(tf.local_variables_initializer()) x_rec_debug_val, x_rec_orig_val = sess.run( [reconstructed_tensors, x_rec_orig], feed_dict={ images_tensor: images_pl_debug, K.learning_phase(): 0 }) save_images_files(x_adv_sub_val, output_dir=debug_dir, postfix='adv') postfix = 'gen_rec' save_images_files(x_rec_debug_val, output_dir=debug_dir, postfix=postfix) save_images_files(images_pl_debug, output_dir=debug_dir, postfix='orig') save_images_files(x_rec_orig_val, output_dir=debug_dir, postfix='orig_rec') return acc_adv, 0, roc_info