def test_batch(self): """Tests the image batch generator.""" output_dir = os.path.join(self.debug_dir, 'test_batch') ensure_dir(output_dir) img, target = self.train_data_gen().next() img = img.reshape([self.batch_size] + self.image_dim) save_images_files(img / 255.0, output_dir=output_dir, labels=target)
def blackbox(gan, rec_data_path=None, batch_size=128, learning_rate=0.001, nb_epochs=10, holdout=150, data_aug=6, nb_epochs_s=10, lmbda=0.1, online_training=False, train_on_recs=False, test_on_dev=True, defense_type='none'): """MNIST tutorial for the black-box attack from arxiv.org/abs/1602.02697 Args: train_start: index of first training set example train_end: index of last training set example test_start: index of first test set example test_end: index of last test set example defense_type: Type of defense against blackbox attacks Returns: a dictionary with: * black-box model accuracy on test set * substitute model accuracy on test set * black-box model accuracy on adversarial examples transferred from the substitute model """ FLAGS = flags.FLAGS # Set logging level to see debug information. set_log_level(logging.WARNING) # Dictionary used to keep track and return key accuracies. accuracies = {} # Create TF session. adv_training = False if defense_type: if defense_type == 'defense_gan' and gan: sess = gan.sess gan_defense_flag = True else: gan_defense_flag = False config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) if 'adv_tr' in defense_type: adv_training = True else: gan_defense_flag = False config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) train_images, train_labels, test_images, test_labels = \ get_cached_gan_data(gan, test_on_dev, orig_data_flag=True) x_shape, classes = list(train_images.shape[1:]), train_labels.shape[1] nb_classes = classes type_to_models = { 'A': model_a, 'B': model_b, 'C': model_c, 'D': model_d, 'E': model_e, 'F': model_f, 'Q': model_q, 'Z': model_z } bb_model = type_to_models[FLAGS.bb_model]( input_shape=[None] + x_shape, nb_classes=train_labels.shape[1], ) sub_model = type_to_models[FLAGS.sub_model]( input_shape=[None] + x_shape, nb_classes=train_labels.shape[1], ) if FLAGS.debug: train_images = train_images[:20 * batch_size] train_labels = train_labels[:20 * batch_size] debug_dir = os.path.join('debug', 'blackbox', FLAGS.debug_dir) ensure_dir(debug_dir) x_debug_test = test_images[:batch_size] # Initialize substitute training set reserved for adversary images_sub = test_images[:holdout] labels_sub = np.argmax(test_labels[:holdout], axis=1) # Redefine test set as remaining samples unavailable to adversaries if FLAGS.num_tests > 0: test_images = test_images[:FLAGS.num_tests] test_labels = test_labels[:FLAGS.num_tests] test_images = test_images[holdout:] test_labels = test_labels[holdout:] # Define input and output TF placeholders if FLAGS.image_dim[0] == 3: FLAGS.image_dim = [ FLAGS.image_dim[1], FLAGS.image_dim[2], FLAGS.image_dim[0] ] images_tensor = tf.placeholder(tf.float32, shape=[None] + x_shape) labels_tensor = tf.placeholder(tf.float32, shape=(None, classes)) rng = np.random.RandomState([11, 24, 1990]) tf.set_random_seed(11241990) train_images_bb, train_labels_bb, test_images_bb, test_labels_bb = \ train_images, train_labels, test_images, \ test_labels cur_gan = None if defense_type: if 'gan' in defense_type: # Load cached dataset reconstructions. if online_training and not train_on_recs: cur_gan = gan elif not online_training and rec_data_path: train_images_bb, train_labels_bb, test_images_bb, \ test_labels_bb = get_cached_gan_data( gan, test_on_dev, orig_data_flag=False) else: assert not train_on_recs if FLAGS.debug: train_images_bb = train_images_bb[:20 * batch_size] train_labels_bb = train_labels_bb[:20 * batch_size] # Prepare the black_box model. prep_bbox_out = prep_bbox(sess, images_tensor, labels_tensor, train_images_bb, train_labels_bb, test_images_bb, test_labels_bb, nb_epochs, batch_size, learning_rate, rng=rng, gan=cur_gan, adv_training=adv_training, cnn_arch=bb_model) else: prep_bbox_out = prep_bbox(sess, images_tensor, labels_tensor, train_images_bb, train_labels_bb, test_images_bb, test_labels_bb, nb_epochs, batch_size, learning_rate, rng=rng, gan=cur_gan, adv_training=adv_training, cnn_arch=bb_model) model, bbox_preds, accuracies['bbox'] = prep_bbox_out # Train substitute using method from https://arxiv.org/abs/1602.02697 print("Training the substitute model.") reconstructed_tensors = tf.stop_gradient( gan.reconstruct(images_tensor, batch_size=batch_size, reconstructor_id=1)) model_sub, preds_sub = train_sub( sess, images_tensor, labels_tensor, model(reconstructed_tensors), images_sub, labels_sub, nb_classes, nb_epochs_s, batch_size, learning_rate, data_aug, lmbda, rng=rng, substitute_model=sub_model, ) accuracies['sub'] = 0 # Initialize the Fast Gradient Sign Method (FGSM) attack object. fgsm_par = { 'eps': FLAGS.fgsm_eps, 'ord': np.inf, 'clip_min': 0., 'clip_max': 1. } if gan: if gan.dataset_name == 'celeba': fgsm_par['clip_min'] = -1.0 fgsm = FastGradientMethod(model_sub, sess=sess) # Craft adversarial examples using the substitute. eval_params = {'batch_size': batch_size} x_adv_sub = fgsm.generate(images_tensor, **fgsm_par) if FLAGS.debug and gan is not None: # To see some qualitative results. reconstructed_tensors = gan.reconstruct(x_adv_sub, batch_size=batch_size, reconstructor_id=2) x_rec_orig = gan.reconstruct(images_tensor, batch_size=batch_size, reconstructor_id=3) x_adv_sub_val = sess.run(x_adv_sub, feed_dict={ images_tensor: x_debug_test, K.learning_phase(): 0 }) sess.run(tf.local_variables_initializer()) x_rec_debug_val, x_rec_orig_val = sess.run( [reconstructed_tensors, x_rec_orig], feed_dict={ images_tensor: x_debug_test, K.learning_phase(): 0 }) save_images_files(x_adv_sub_val, output_dir=debug_dir, postfix='adv') postfix = 'gen_rec' save_images_files(x_rec_debug_val, output_dir=debug_dir, postfix=postfix) save_images_files(x_debug_test, output_dir=debug_dir, postfix='orig') save_images_files(x_rec_orig_val, output_dir=debug_dir, postfix='orig_rec') return if gan_defense_flag: reconstructed_tensors = gan.reconstruct( x_adv_sub, batch_size=batch_size, reconstructor_id=4, ) num_dims = len(images_tensor.get_shape()) avg_inds = list(range(1, num_dims)) diff_op = tf.reduce_mean(tf.square(x_adv_sub - reconstructed_tensors), axis=avg_inds) outs = model_eval_gan(sess, images_tensor, labels_tensor, predictions=model(reconstructed_tensors), test_images=test_images, test_labels=test_labels, args=eval_params, diff_op=diff_op, feed={K.learning_phase(): 0}) accuracies['bbox_on_sub_adv_ex'] = outs[0] accuracies['roc_info'] = outs[1] print('Test accuracy of oracle on adversarial examples generated ' 'using the substitute: ' + str(outs[0])) else: accuracy = model_eval(sess, images_tensor, labels_tensor, model(x_adv_sub), test_images, test_labels, args=eval_params, feed={K.learning_phase(): 0}) print('Test accuracy of oracle on adversarial examples generated ' 'using the substitute: ' + str(accuracy)) accuracies['bbox_on_sub_adv_ex'] = accuracy return accuracies
def reconstruct_dataset(self, ckpt_path=None, max_num=-1, max_num_load=-1): """Reconstructs the images of the config's dataset with the generator. """ if not self.initialized: self.load_generator(ckpt_path=ckpt_path) splits = ['train', 'dev', 'test'] rec = self.reconstruct(self.real_data_test) self.sess.run(tf.local_variables_initializer()) rets = {} for split in splits: if max_num > 0: output_dir = os.path.join( self.checkpoint_dir, 'recs_rr{:d}_lr{:.5f}_' 'iters{:d}_num{:d}'.format(self.rec_rr, self.rec_lr, self.rec_iters, max_num), split) else: output_dir = os.path.join( self.checkpoint_dir, 'recs_rr{:d}_lr{:.5f}_' 'iters{:d}'.format(self.rec_rr, self.rec_lr, self.rec_iters), split) if self.debug: output_dir += '_debug' ensure_dir(output_dir) feats_path = os.path.join(output_dir, 'feats.pkl'.format(split)) could_load = False try: if os.path.exists(feats_path) and not self.test_again: with open(feats_path) as f: all_recs = cPickle.load(f) could_load = True print('[#] Successfully loaded features.') else: all_recs = [] except Exception as e: all_recs = [] print('[#] Exception loading features {}'.format(str(e))) gen_func = getattr(self, '{}_gen_test'.format(split)) all_targets = [] orig_imgs = [] ctr = 0 sti = time.time() # Pickle files per reconstructed image. pickle_out_dir = os.path.join(output_dir, 'pickles') ensure_dir(pickle_out_dir) single_feat_path_template = os.path.join(pickle_out_dir, 'rec_{:07d}_l{}.pkl') for images, targets in gen_func(): batch_size = len(images) im_paths = [ single_feat_path_template.format(ctr * batch_size + i, targets[i]) for i in range(batch_size) ] mn = max(max_num, max_num_load) if (mn > -1 and ctr * (len(images)) > mn) or (self.debug and ctr > 2): break batch_could_load = not self.test_again batch_rec_list = [] for imp in im_paths: # Load per image cached files. try: with open(imp) as f: loaded_rec = cPickle.load(f) batch_rec_list.append(loaded_rec) # print('[-] Loaded batch {}'.format(ctr)) except: batch_could_load = False break if batch_could_load and not could_load: recs = np.stack(batch_rec_list) all_recs.append(recs) if not (could_load or batch_could_load): self.sess.run(tf.local_variables_initializer()) recs = self.sess.run( rec, feed_dict={self.real_data_test_pl: images}, ) print('[#] t:{:.2f} batch: {:d} '.format( time.time() - sti, ctr)) all_recs.append(recs) else: print('[*] could load batch: {:d}'.format(ctr)) if not batch_could_load and not could_load: for i in range(len(recs)): pkl_path = im_paths[i] with open(pkl_path, 'w') as f: cPickle.dump(recs[i], f, protocol=cPickle.HIGHEST_PROTOCOL) #print('[*] Saved reconstruction for {}'.format(pkl_path)) all_targets.append(targets) orig_transformed = self.sess.run( self.real_data_test, feed_dict={self.real_data_test_pl: images}) orig_imgs.append(orig_transformed) ctr += 1 if not could_load: all_recs = np.concatenate(all_recs) all_recs = all_recs.reshape([-1] + self.image_dim) orig_imgs = np.concatenate(orig_imgs).reshape([-1] + self.image_dim) all_targets = np.concatenate(all_targets) if self.debug: save_images_files(all_recs, output_dir=output_dir, labels=all_targets) save_images_files( (orig_imgs + min(0, orig_imgs.min()) / (orig_imgs.max() - min(0, orig_imgs.min()))), output_dir=output_dir, labels=all_targets, postfix='_orig') rets[split] = [all_recs, all_targets, orig_imgs] return rets
def model_eval_gan(sess, images, labels, predictions=None, predictions_rec=None, test_images=None, test_labels=None, feed=None, args=None, model=None, diff_op=None, z_norm=None, recons_adv=None, adv_x=None, debug=False, vis_dir=''): """Computes the accuracy of a model on test data as well as the reconstruction errors for attack detection. Args: sess: TF session to use when training the graph. images: input placeholder. labels: output placeholder (for labels). predictions: model output predictions. predictions_rec: model output prediction for reconstructions. test_images: numpy array with training inputs test_labels: numpy array with training outputs feed: An optional dictionary that is appended to the feeding dictionary before the session runs. Can be used to feed the learning phase of a Keras model for instance. args: dict or argparse `Namespace` object. Should contain `batch_size` model: (deprecated) if not None, holds model output predictions. diff_op: The operation that calculates the difference between input and attack. Returns: accuracy: The accuracy on the test data. accuracy_rec: The accuracy on the reconstructed test data (if predictions_rec is provided) roc_info: The differences between input and reconstruction for attack detection. """ args = _ArgsWrapper(args or {}) assert args.batch_size, "Batch size was not given in args dict" if test_images is None or test_labels is None: raise ValueError("X_test argument and Y_test argument " "must be supplied.") if model is None and predictions is None: raise ValueError("One of model argument " "or predictions argument must be supplied.") if model is not None: warnings.warn("model argument is deprecated. " "Switch to predictions argument. " "model argument will be removed after 2018-01-05.") if predictions is None: predictions = model else: raise ValueError("Exactly one of model argument" " and predictions argument should be specified.") # Define accuracy symbolically. correct_preds = tf.equal(tf.argmax(labels, axis=-1), tf.argmax(predictions, axis=-1)) if predictions_rec is not None: correct_preds_rec = tf.equal(tf.argmax(labels, axis=-1), tf.argmax(predictions_rec, axis=-1)) acc_value_rec = tf.reduce_sum(tf.to_float(correct_preds_rec)) accuracy_rec = 0.0 cur_labels = tf.argmax(labels, axis=-1), cur_preds = tf.argmax(predictions, axis=-1) acc_value = tf.reduce_sum(tf.to_float(correct_preds)) diffs = [] z_norms = [] all_labels = [] preds = [] accuracy = 0.0 # Compute number of batches. nb_batches = int(math.ceil(float(len(test_images)) / args.batch_size)) assert nb_batches * args.batch_size >= len(test_images) X_cur = np.zeros((args.batch_size, ) + test_images.shape[1:], dtype=test_images.dtype) Y_cur = np.zeros((args.batch_size, ) + test_labels.shape[1:], dtype=test_labels.dtype) for batch in range(nb_batches): # To initialize the variables of Defense-GAN at test time. sess.run(tf.local_variables_initializer()) print("[#] Eval batch {}/{}".format(batch, nb_batches)) # Must not use the `batch_indices` function here, because it # repeats some examples. # It's acceptable to repeat during training, but not eval. start = batch * args.batch_size end = min(len(test_images), start + args.batch_size) cur_batch_size = end - start if cur_batch_size < args.batch_size: break X_cur[:cur_batch_size] = test_images[start:end] Y_cur[:cur_batch_size] = test_labels[start:end] # The last batch may be smaller than all others, so we need to # account for variable batch size here. feed_dict = {images: X_cur, labels: Y_cur} if feed is not None: feed_dict.update(feed) run_list = [acc_value, cur_labels, cur_preds] if diff_op is not None: run_list += [diff_op] if z_norm is not None: run_list += [z_norm] z_norm_ind = len(run_list) - 1 if predictions_rec is not None: run_list += [acc_value_rec] acc_val_ind = len(run_list) - 1 if debug: assert recons_adv is not None assert adv_x is not None run_list += [recons_adv] run_list += [adv_x] outs = sess.run(run_list, feed_dict=feed_dict) cur_acc = outs[0] if diff_op is not None: cur_diffs_val = outs[3] diffs.append(cur_diffs_val) if z_norm is not None: cur_znorm = outs[z_norm_ind] z_norms.append(cur_znorm) if predictions_rec is not None: cur_acc_rec = outs[acc_val_ind] accuracy_rec += cur_acc_rec cur_labels_val = outs[1][0] cur_preds_val = outs[2] all_labels.append(cur_labels_val) preds.append(cur_preds_val) if debug: if batch < 25: img_vis = test_images[start:end] recons_vis = outs[-2] adv_vis = outs[-1] prefix = 'im_{}'.format(batch) save_images_files(img_vis, output_dir=vis_dir, labels=cur_labels_val, prefix=prefix, postfix='_orig') save_images_files(adv_vis, output_dir=vis_dir, labels=cur_labels_val, prefix=prefix, postfix='_adv') save_images_files(recons_vis, output_dir=vis_dir, labels=cur_preds_val, prefix=prefix, postfix='_rec') accuracy += cur_acc assert end >= len(test_images) # Divide by number of examples to get final value. accuracy /= len(test_images) accuracy_rec /= len(test_images) preds = np.concatenate(preds) all_labels = np.concatenate(all_labels) if diff_op is not None: diffs = np.concatenate(diffs) diffs_mean = np.mean(diffs) if z_norm is not None: z_norms = np.concatenate(z_norms) roc_info = [all_labels, preds, diffs, z_norms] if diff_op is not None: return accuracy, diffs_mean, roc_info else: return accuracy, roc_info
def whitebox(gan, rec_data_path=None, batch_size=128, learning_rate=0.001, nb_epochs=10, eps=0.3, online_training=False, test_on_dev=True, attack_type='fgsm', defense_type='gan', num_tests=-1, num_train=-1): """Based on MNIST tutorial from cleverhans. Args: gan: A `GAN` model. rec_data_path: A string to the directory. batch_size: The size of the batch. learning_rate: The learning rate for training the target models. nb_epochs: Number of epochs for training the target model. eps: The epsilon of FGSM. online_training: Training Defense-GAN with online reconstruction. The faster but less accurate way is to reconstruct the dataset once and use it to train the target models with: `python train.py --cfg <path-to-model> --save_recs` attack_type: Type of the white-box attack. It can be `fgsm`, `rand+fgsm`, or `cw`. defense_type: String representing the type of attack. Can be `none`, `defense_gan`, or `adv_tr`. """ FLAGS = tf.flags.FLAGS # Set logging level to see debug information. set_log_level(logging.WARNING) if defense_type == 'defense_gan': assert gan is not None # Create TF session. if defense_type == 'defense_gan': sess = gan.sess if FLAGS.train_on_recs: assert rec_data_path is not None or online_training else: config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess = gan.sess ######### manual #sess = tf_debug.LocalCLIDebugWrapperSession(sess) train_images, train_labels, test_images, test_labels = \ get_cached_gan_data(gan, test_on_dev) import matplotlib.pyplot as plt # plt.imsave("testim_0.jpg",test_images[0]) # plt.imsave("testim_1.jpg",test_images[1]) rec_test_images = test_images rec_test_labels = test_labels # _, _, test_images, test_labels = \ # get_cached_gan_data(gan, test_on_dev, orig_data_flag=True) x_shape = [None] + list(train_images.shape[1:]) images_pl = tf.placeholder(tf.float32, shape=[None] + list(train_images.shape[1:])) labels_pl = tf.placeholder(tf.float32, shape=[None] + [train_labels.shape[1]]) # if num_tests > 0: # test_images = test_images[:num_tests] # rec_test_images = rec_test_images[:num_tests] # test_labels = test_labels[:num_tests] #if num_train > 0: # train_images = train_images[:num_train] # train_labels = train_labels[:num_train] #import numpy as np #import cv2 #import glob #import matplotlib.pyplot as plt # Change address addrs = glob.glob('pics/64bit_new/Phase7/Physical_07/*.jpg') image = [] for add in addrs: img = cv2.imread(add) img = cv2.resize(img, (64, 64), interpolation=cv2.INTER_CUBIC) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = img / 255 image.append(img) clean_img = np.array(image) print(clean_img.shape) #for i, a in enumerate(clean_img): # plt.imsave("clean{}.jpg".format(i),a) # print("clean{}".format(i)) im_tr = [] for image in clean_img: #im_tr.append(2 * ((tf.cast(image, tf.float32) / 255.) - .5)) im_tr.append(2 * ((image / 255.) - .5)) #reconstruction ,z11 = gan.reconstruct(tf.convert_to_tensor(test_images[0:128], np.float32),batch_size=128,reconstructor_id = 2) # sess.run(tf.local_variables_initializer()) #reconstruction,z11 = gan.reconstruct(tf.convert_to_tensor(im_tr[:5], np.float32),batch_size=5,reconstructor_id = 1) #x_shape = [None] + list(clean_img.shape[1:]) images_pl = tf.placeholder(tf.float32, shape=[None] + list(clean_img.shape[1:])) reconstruction, z11 = gan.reconstruct(images_pl, batch_size=50, reconstructor_id=1) # with tf.Session() as sess: #sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) # rec = sess.run(reconstruction,feed_dict={images_pl:test_images[:50],K.learning_phase(): 0}) # z112 =sess.run(z11,feed_dict={images_pl:test_images[:50],K.learning_phase(): 0}) rec = sess.run(reconstruction, feed_dict={ images_pl: clean_img, K.learning_phase(): 0 }) z112 = sess.run(z11, feed_dict={ images_pl: clean_img, K.learning_phase(): 0 }) # rec =sess.run(reconstruction) print("recloss shape", z112.shape) import pandas as pd # pd.DataFrame(np.array(z112)).to_csv("recloss_new.csv") #rec = reconstruction.eval(session= sess) # rec = sess.run(reconstruction,feed_dict={images_pl:clean_img,K.learning_phase(): 0}) save_images_files(rec, output_dir="/home/mihirpathare34/results", postfix='crop_save') #x_shape = tf.convert_to_tensor(clean_img, np.float32).get_shape().as_list() #print(clean_img.shape) #print(x_shape) #print(x_shape[0]) #rec = (rec + 1.0) / 2 #rec[rec < 0] = 0.0 #rec[rec > 1] = 1.0 #plt.imsave("rec_norm.jpg",rec[0]) #print("shape is:",rec.shape) #print("No Success") #print(rec[0].shape) #plt.imsave("reconstructed1.jpg",rec[0]) for i, a in enumerate(rec): #norm_image = round((a + 1) * 255 / 2) #print(a.shape) norm_image = cv2.normalize(a, None, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F) cv2.imwrite("recon-" + str(i) + ".jpg", norm_image)
def blackbox(gan, rec_data_path=None, batch_size=128, learning_rate=0.001, nb_epochs=10, holdout=150, data_aug=6, nb_epochs_s=10, lmbda=0.1, online_training=False, train_on_recs=False, test_on_dev=False, defense_type='none'): """MNIST tutorial for the black-box attack from arxiv.org/abs/1602.02697 Args: train_start: index of first training set example train_end: index of last training set example test_start: index of first test set example test_end: index of last test set example defense_type: Type of defense against blackbox attacks Returns: a dictionary with: * black-box model accuracy on test set * substitute model accuracy on test set * black-box model accuracy on adversarial examples transferred from the substitute model """ FLAGS = flags.FLAGS # Set logging level to see debug information. set_log_level(logging.WARNING) # Dictionary used to keep track and return key accuracies. accuracies = {} # Create TF session. adv_training = False if defense_type: if defense_type == 'defense_gan' and gan: sess = gan.sess gan_defense_flag = True else: gan_defense_flag = False config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) if 'adv_tr' in defense_type: adv_training = True else: gan_defense_flag = False config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) train_images, train_labels, test_images, test_labels = \ get_cached_gan_data(gan, test_on_dev, orig_data_flag=True) x_shape, classes = list(train_images.shape[1:]), train_labels.shape[1] nb_classes = classes type_to_models = { 'A': model_a, 'B': model_b, 'C': model_c, 'D': model_d, 'E': model_e, 'F': model_f, 'Q': model_q, 'Y': model_y, 'Z': model_z } with tf.variable_scope(tf.get_variable_scope(), reuse=tf.AUTO_REUSE): bb_model = type_to_models[FLAGS.bb_model]( input_shape=[None] + x_shape, nb_classes=train_labels.shape[1], ) with tf.variable_scope("Substitute", reuse=tf.AUTO_REUSE): sub_model = type_to_models[FLAGS.sub_model]( input_shape=[None] + x_shape, nb_classes=train_labels.shape[1], ) if FLAGS.debug: train_images = train_images[:20 * batch_size] train_labels = train_labels[:20 * batch_size] debug_dir = os.path.join('debug', 'blackbox', FLAGS.debug_dir) ensure_dir(debug_dir) x_debug_test = test_images[:batch_size] # Initialize substitute training set reserved for adversary images_sub = test_images[:holdout] labels_sub = np.argmax(test_labels[:holdout], axis=1) print(labels_sub) # Redefine test set as remaining samples unavailable to adversaries if FLAGS.num_tests > 0: test_images = test_images[:FLAGS.num_tests] test_labels = test_labels[:FLAGS.num_tests] test_images = test_images[holdout:] test_labels = test_labels[holdout:] # Define input and output TF placeholders if FLAGS.image_dim[0] == 3: FLAGS.image_dim = [FLAGS.image_dim[1], FLAGS.image_dim[2], FLAGS.image_dim[0]] images_tensor = tf.placeholder(tf.float32, shape=[None] + x_shape) labels_tensor = tf.placeholder(tf.float32, shape=(None, classes)) rng = np.random.RandomState([11, 24, 1990]) train_images_bb, train_labels_bb, test_images_bb, test_labels_bb = \ train_images, train_labels, test_images, \ test_labels cur_gan = gan if FLAGS.debug: train_images_bb = train_images_bb[:20 * batch_size] train_labels_bb = train_labels_bb[:20 * batch_size] # Prepare the black_box model. prep_bbox_out = prep_bbox( sess, images_tensor, labels_tensor, train_images_bb, train_labels_bb, test_images_bb, test_labels_bb, nb_epochs, batch_size, learning_rate, rng=rng, gan=cur_gan, adv_training=adv_training, cnn_arch=bb_model) model, bbox_preds, accuracies['bbox'] = prep_bbox_out # Train substitute using method from https://arxiv.org/abs/1602.02697 print("Training the substitute model.") reconstructor = get_reconstructor(gan) recon_tensors, _ = reconstructor.reconstruct(images_tensor, batch_size=batch_size, reconstructor_id=2) model_sub, preds_sub = train_sub( sess, images_tensor, labels_tensor, model.get_logits(recon_tensors), images_sub, labels_sub, nb_classes, nb_epochs_s, batch_size, learning_rate, data_aug, lmbda, rng=rng, substitute_model=sub_model, dataset_name=gan.dataset_name ) accuracies['sub'] = 0 # Initialize the Fast Gradient Sign Method (FGSM) attack object. eps = attack_config_dict[gan.dataset_name]['eps'] min_val = attack_config_dict[gan.dataset_name]['clip_min'] fgsm_par = { 'eps': eps, 'ord': np.inf, 'clip_min': min_val, 'clip_max': 1. } fgsm = FastGradientMethod(model_sub, sess=sess) # Craft adversarial examples using the substitute. eval_params = {'batch_size': batch_size} x_adv_sub = fgsm.generate(images_tensor, **fgsm_par) if FLAGS.debug and gan is not None: # To see some qualitative results. recon_tensors, _ = reconstructor.reconstruct(x_adv_sub, batch_size=batch_size, reconstructor_id=2) x_rec_orig, _ = reconstructor.reconstruct(images_tensor, batch_size=batch_size, reconstructor_id=3) x_adv_sub_val = sess.run(x_adv_sub, feed_dict={images_tensor: x_debug_test}) x_rec_debug_val = sess.run(recon_tensors, feed_dict={images_tensor: x_debug_test}) x_rec_orig_val = sess.run(x_rec_orig, feed_dict={images_tensor: x_debug_test}) #sess.run(tf.local_variables_initializer()) #x_rec_debug_val, x_rec_orig_val = sess.run([reconstructed_tensors, x_rec_orig], feed_dict={images_tensor: x_debug_test}) save_images_files(x_adv_sub_val, output_dir=debug_dir, postfix='adv') postfix = 'gen_rec' save_images_files(x_rec_debug_val, output_dir=debug_dir, postfix=postfix) save_images_files(x_debug_test, output_dir=debug_dir, postfix='orig') save_images_files(x_rec_orig_val, output_dir=debug_dir, postfix='orig_rec') if gan_defense_flag: num_dims = len(images_tensor.get_shape()) avg_inds = list(range(1, num_dims)) recons_adv, zs = reconstructor.reconstruct(x_adv_sub, batch_size=batch_size) diff_op = tf.reduce_mean(tf.square(x_adv_sub - recons_adv), axis=avg_inds) z_norm = tf.reduce_sum(tf.square(zs), axis=1) acc_adv, diffs_mean, roc_info_adv = model_eval_gan(sess, images_tensor, labels_tensor, predictions=model.get_logits(recons_adv), test_images=test_images, test_labels=test_labels, args=eval_params, diff_op=diff_op, z_norm=z_norm, recons_adv=recons_adv, adv_x=x_adv_sub, debug=False) # reconstruction on clean images recons_clean, zs = reconstructor.reconstruct(images_tensor, batch_size=batch_size) diff_op = tf.reduce_mean(tf.square(images_tensor - recons_clean), axis=avg_inds) z_norm = tf.reduce_sum(tf.square(zs), axis=1) acc_rec, diffs_mean_rec, roc_info_rec = model_eval_gan( sess, images_tensor, labels_tensor, model.get_logits(recons_clean), None, test_images=test_images, test_labels=test_labels, args=eval_params, diff_op=diff_op, z_norm=z_norm, recons_adv=recons_clean, adv_x=images_tensor, debug=False) print('Evaluation accuracy with reconstruction: {}'.format(acc_rec)) print('Test accuracy of oracle on cleaned images : {}'.format(acc_adv)) return {'acc_adv': acc_adv, 'acc_rec': acc_rec, 'roc_info_adv': roc_info_adv, 'roc_info_rec': roc_info_rec} else: acc_adv = model_eval(sess, images_tensor, labels_tensor, model.get_logits(x_adv_sub), test_images, test_labels, args=eval_params) print('Test accuracy of oracle on adversarial examples generated ' 'using the substitute: ' + str(acc_adv)) return {'acc_adv': acc_adv, 'acc_rec': 0, 'roc_info_adv': None, 'roc_info_rec': None}