class Data: input_dim = 784 Nclasses = 10 X = np.reshape(unambiguous_X, (-1, 28, 28, 1)) Y = to_categorical(unambiguous_Y, 10) Xtest = np.reshape(es, (-1, 28, 28, 1)) Ytest = to_categorical(ls, 10) if use_cifar: data = CIFAR10() Nclasses = 10 X, Y = data.get_set('train') Xtest, Ytest = data.get_set('test') img_rows, img_cols, nchannels = Xtest.shape[1:4] input_dim = img_rows * img_cols * nchannels
def save_images(model, attack, set_type, first_index, last_index): """ Applies the saliency map attack against the specified model. Parameters ---------- model: str The name of the model used. attack: str The type of used attack (either "jsma", "wjsma" or "tjsma"). set_type: str The type of set used (either "train" or "test"). first_index: The index of the first image attacked. last_index: int The index of the last image attacked. """ if model in MNIST_SETS: from cleverhans.dataset import MNIST x_set, y_set = MNIST(train_start=0, train_end=60000, test_start=0, test_end=10000).get_set(set_type) gamma = 0.155 elif model in CIFAR10_SETS: from cleverhans.dataset import CIFAR10 x_set, y_set = CIFAR10(train_start=0, train_end=50000, test_start=0, test_end=10000).get_set(set_type) y_set = y_set.reshape((y_set.shape[0], 10)) gamma = 0.039 else: raise ValueError("Invalid model: " + model) generate_attacks(save_path="attack/" + model + "/" + attack + "_" + set_type, file_path="models/joblibs/" + model + ".joblib", x_set=x_set, y_set=y_set, attack=attack, gamma=gamma, first_index=first_index, last_index=last_index)
def model_test(file_name=FILE_NAME): """ Evaluates the performances of the model over the CIFAR-10 dataset. Parameters ---------- file_name: str, optional The name of the joblib file. """ cifar10 = CIFAR10(train_start=0, train_end=50000, test_start=0, test_end=10000) x_train, y_train = cifar10.get_set('train') x_test, y_test = cifar10.get_set('test') y_train = y_train.reshape((50000, 10)) y_test = y_test.reshape((10000, 10)) model_testing(file_name, x_train, y_train, x_test, y_test)
def model_train(file_name=FILE_NAME): """ Creates the joblib file of AllConvolutional CIFAR-10 model trained over the MNIST dataset. Parameters ---------- file_name: str, optional The name of the joblib file. """ layers = [Conv2D(64, (3, 3), (1, 1), "SAME"), ReLU(), Conv2D(128, (3, 3), (1, 1), "SAME"), ReLU(), MaxPooling2D((2, 2), (2, 2), "VALID"), Conv2D(128, (3, 3), (1, 1), "SAME"), ReLU(), Conv2D(256, (3, 3), (1, 1), "SAME"), ReLU(), MaxPooling2D((2, 2), (2, 2), "VALID"), Conv2D(256, (3, 3), (1, 1), "SAME"), ReLU(), Conv2D(512, (3, 3), (1, 1), "SAME"), ReLU(), MaxPooling2D((2, 2), (2, 2), "VALID"), Conv2D(10, (3, 3), (1, 1), "SAME"), GlobalAveragePool(), Softmax()] model = MLP(layers, (None, 32, 32, 3)) cifar10 = CIFAR10(train_start=0, train_end=50000, test_start=0, test_end=10000) x_train, y_train = cifar10.get_set('train') x_test, y_test = cifar10.get_set('test') y_train = y_train.reshape((50000, 10)) y_test = y_test.reshape((10000, 10)) model_training(model, file_name, x_train, y_train, x_test, y_test, nb_epochs=10, batch_size=128, learning_rate=.001, label_smoothing=0.1)
def generate_adv_images(gpu, attack_algo, dataset, source_data_dir, train_start=0, train_end=1000000, test_start=0, test_end=100000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, learning_rate=0.001, testing=False, num_threads=None, label_smoothing=0.1, args=FLAGS): """ CIFAR10 cleverhans tutorial :param source_data_dir: the CIFAR-10 source data directory :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param clean_train: perform normal training on clean examples only before performing adversarial training. :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate :param label_smoothing: float, amount of label smoothing for cross entropy :return: an AccuracyReport object """ os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = gpu if "batch_size" in UNTARGETED_ATTACKER_PARAM[attack_algo]: global BATCH_SIZE batch_size = UNTARGETED_ATTACKER_PARAM[attack_algo]["batch_size"] config.BATCH_SIZE = batch_size output_dir = DATASET_ADV_OUTPUT[args.dataset] + "/" + args.arch os.makedirs(output_dir, exist_ok=True) report = AccuracyReport() # if (os.path.exists(output_dir + "/{0}_untargeted_train.npz".format(attack_algo)) and # os.path.exists(output_dir + "/{0}_untargeted_test.npz".format(attack_algo))): # return report # Object used to keep track of (and return) key accuracies # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session config_args = {} if num_threads: config_args = dict(intra_op_parallelism_threads=1) config_args["gpu_options"] = tf.GPUOptions(allow_growth=True) sess = tf.Session(config=tf.ConfigProto(**config_args)) # Get CIFAR10 data if dataset == "CIFAR10": data = CIFAR10(data_dir=source_data_dir, train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) elif dataset == "CIFAR100" or dataset == "CIFAR100_coarse_label": data = CIFAR100(data_dir=source_data_dir, dataset_name=dataset, train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) elif dataset == "MNIST" or dataset == "FashionMNIST": data = MNIST(data_dir=source_data_dir, train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) elif dataset == "ImageNet": data = MiniImageNet(data_dir=source_data_dir, train_start=train_start, train_end=train_end, test_start=test_start, num_classes=CLASS_NUM["ImageNet"], arch=args.arch) elif dataset == "TinyImageNet": data = TinyImageNet(data_dir=source_data_dir, train_start=train_start, train_end=train_end, test_start=test_start, num_classes=CLASS_NUM["TinyImageNet"]) dataset_size = data.x_train.shape[0] dataset_train = data.to_tensorflow()[0] # dataset_train = dataset_train.shuffle(buffer_size=2000) dataset_train = dataset_train.map( lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4) dataset_train = dataset_train.batch(batch_size) dataset_train = dataset_train.prefetch(16) x_train, y_train = data.get_set('train') x_test, y_test = data.get_set('test') # Use Image Parameters img_rows, img_cols, nchannels = x_test.shape[1:4] nb_classes = y_test.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(batch_size, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(batch_size, nb_classes)) # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } eval_params = {'batch_size': batch_size} rng = np.random.RandomState([2017, 8, 30]) def do_generate_eval(adv_x, pred_adv_x, x_set, y_set, report_key, is_adv=None): adv_images_total, adv_pred_total, gt_label_total, success_rate = untargeted_advx_image_eval( sess, x, y, adv_x, pred_adv_x, x_set, y_set, args=eval_params) setattr(report, report_key, success_rate) if is_adv is None: report_text = None elif is_adv: report_text = 'adversarial' else: report_text = 'legitimate' if report_text: print('adversarial attack successful rate on %s: %0.4f' % (report_text, success_rate)) return adv_images_total, adv_pred_total, gt_label_total, success_rate # shape = (total, H,W,C) def do_eval(preds, x_set, y_set, report_key, is_adv=None): acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params) setattr(report, report_key, acc) if is_adv is None: report_text = None elif is_adv: report_text = 'adversarial' else: report_text = 'legitimate' print('Test accuracy on %s examples: %0.4f' % (report_text, acc)) if args.arch == "conv4": model = Shallow4ConvLayersConv( args.arch, IMG_SIZE[dataset], CLASS_NUM[dataset], in_channels=DATASET_INCHANNELS[args.dataset], dim_hidden=64) model.is_training = False # elif args.arch == "conv10": # model = Shallow10ConvLayersConv(args.arch, CLASS_NUM[dataset], nb_filters=64, # input_shape=[IMG_SIZE[dataset], IMG_SIZE[dataset], DATASET_INCHANNELS[args.dataset]]) elif args.arch == "vgg16": model = VGG16("vgg_16", CLASS_NUM[dataset], [ IMG_SIZE[dataset], IMG_SIZE[dataset], DATASET_INCHANNELS[args.dataset] ]) model.is_training = False elif args.arch == "vgg16small": model = VGG16Small(args.arch, CLASS_NUM[dataset], [ IMG_SIZE[dataset], IMG_SIZE[dataset], DATASET_INCHANNELS[args.dataset] ]) elif args.arch == "resnet10": model = ResNet10(args.arch, CLASS_NUM[dataset], [ IMG_SIZE[dataset], IMG_SIZE[dataset], DATASET_INCHANNELS[args.dataset] ]) elif args.arch == "resnet18": model = ResNet18(args.arch, CLASS_NUM[dataset], [ IMG_SIZE[dataset], IMG_SIZE[dataset], DATASET_INCHANNELS[args.dataset] ]) elif args.arch == "resnet50": model = ResNet50(args.arch, CLASS_NUM[dataset], [ IMG_SIZE[dataset], IMG_SIZE[dataset], DATASET_INCHANNELS[args.dataset] ]) elif args.arch == "resnet101": model = ResNet101(args.arch, CLASS_NUM[dataset], [ IMG_SIZE[dataset], IMG_SIZE[dataset], DATASET_INCHANNELS[args.dataset] ]) def evaluate(): if hasattr(model, "is_training"): model.is_training = False preds = model.get_logits(x) # tf.tensor do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False) if hasattr(model, "is_training"): model.is_training = True resume = TF_CLEAN_IMAGE_MODEL_PATH[args.dataset] + "/{0}".format(args.arch) os.makedirs(resume, exist_ok=True) print("using folder {} to store model".format(resume)) resume_files = os.listdir(resume) loss = CrossEntropy(model, smoothing=label_smoothing) if len(resume_files) == 0 or len( list( filter(lambda e: os.path.isfile(resume + "/" + e), resume_files))) == 0: # clean train must be done! if hasattr(model, "is_training"): model.is_training = True var_list = tf.trainable_variables() g_list = tf.global_variables() bn_moving_vars = [g for g in g_list if 'moving_mean' in g.name] bn_moving_vars += [g for g in g_list if 'moving_variance' in g.name] var_list += bn_moving_vars saver = tf.train.Saver(var_list=var_list) train(sess, loss, None, None, model, dataset_train=dataset_train, dataset_size=dataset_size, evaluate=evaluate, args=train_params, rng=rng, var_list=model.get_params()) # 训练nb_epochs个epochs save_path = saver.save(sess, "{}/model".format(resume), global_step=nb_epochs) print("Model saved in path: %s" % save_path) else: if len(os.listdir(resume)) == 1 and os.listdir(resume)[0].endswith( "ckpt"): path = resume + "/" + os.listdir(resume)[0] var_list = tf.trainable_variables() g_list = tf.global_variables() bn_moving_vars = [g for g in g_list if 'moving_mean' in g.name] bn_moving_vars += [ g for g in g_list if 'moving_variance' in g.name ] var_list += bn_moving_vars saver = tf.train.Saver(var_list=var_list) saver.restore(sess, path) print("load pretrained model {}".format(path)) else: # resume from old latest_checkpoint = tf.train.latest_checkpoint(resume) var_list = tf.trainable_variables() g_list = tf.global_variables() bn_moving_vars = [g for g in g_list if 'moving_mean' in g.name] bn_moving_vars += [ g for g in g_list if 'moving_variance' in g.name ] var_list += bn_moving_vars saver = tf.train.Saver(var_list=var_list) saver.restore(sess, latest_checkpoint) print("load pretrained model {}".format(resume)) # Calculate training error if testing: evaluate() if hasattr(model, "is_training"): model.is_training = False # Initialize the Fast Gradient Sign Method (FGSM) attack object and # graph attacker = ATTACKERS[attack_algo](model, sess=sess) param_dict = UNTARGETED_ATTACKER_PARAM[attack_algo] if attack_algo in NEED_TARGETED_Y: y_target = look_for_target_otherthan_gt(y, CLASS_NUM[args.dataset]) y_target = tf.reshape(y_target, (batch_size, -1)) param_dict["y_target"] = y_target adv_x = attacker.generate(x, **param_dict) # tensor preds_adv = model.get_logits(adv_x) # generate adversarial examples adv_images_total, adv_pred_total, gt_label_total, success_rate = do_generate_eval( adv_x, preds_adv, x_train, y_train, "clean_train_adv_eval", True) np.savez(output_dir + "/{0}_untargeted_train.npz".format(attack_algo), adv_images=adv_images_total, adv_pred=adv_pred_total, gt_label=gt_label_total, attack_success_rate=success_rate) adv_images_total, adv_pred_total, gt_label_total, success_rate = do_generate_eval( adv_x, preds_adv, x_test, y_test, "clean_test_adv_eval", True) np.savez(output_dir + "/{0}_untargeted_test.npz".format(attack_algo), adv_images=adv_images_total, adv_pred=adv_pred_total, gt_label=gt_label_total, attack_success_rate=success_rate) print('generate {} adversarial image done'.format(attack_algo)) return report
def cifar10_tutorial(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, clean_train=CLEAN_TRAIN, testing=False, backprop_through_attack=BACKPROP_THROUGH_ATTACK, nb_filters=NB_FILTERS, num_threads=None, label_smoothing=0.1, retrain=False, source_samples=SOURCE_SAMPLES, attack_iterations=ATTACK_ITERATIONS, targeted=TARGETED): """ CIFAR10 cleverhans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param clean_train: perform normal training on clean examples only before performing adversarial training. :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate :param backprop_through_attack: If True, backprop through adversarial example construction process during adversarial training. :param label_smoothing: float, amount of label smoothing for cross entropy :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {} sess = tf.Session(config=tf.ConfigProto(**config_args)) # Get CIFAR10 data data = CIFAR10(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) dataset_size = data.x_train.shape[0] dataset_train = data.to_tensorflow()[0] dataset_train = dataset_train.map( lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4) dataset_train = dataset_train.batch(batch_size) dataset_train = dataset_train.prefetch(16) x_train, y_train = data.get_set('train') x_test, y_test = data.get_set('test') ########################### # Adjust hue / saturation # ########################### # hueValue = 0.3 # tf_x_test = tf.image.adjust_saturation(tf.image.adjust_hue(x_test, hueValue), hueValue) # tf_x_test = tf.image.adjust_saturation(tx_test, hueValue) # x_test = sess.run(tf_x_test) ############################### # Transform image to uniimage # ############################### # x_train = convert_uniimage(x_train) # Use Image Parameters img_rows, img_cols, nchannels = x_test.shape[1:4] nb_classes = y_test.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) saveFileNumArr = [] # saveFileNumArr = [50, 500, 1000] count = 0 while count < 1000: count = count + 50 saveFileNumArr.append(count) distortionArr = [] accuracyArr = [] for i in range(len(saveFileNumArr)): saveFileNum = saveFileNumArr[i] model_path = os.path.join(save_dir, filename + "-" + str(saveFileNum)) # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Create TF session sess = tf.Session() print("Created TensorFlow session.") # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) nb_filters = 64 # Define TF model graph model = ModelAllConvolutional('model1', nb_classes, nb_filters, input_shape=[32, 32, 3]) preds = model.get_logits(x) loss = CrossEntropy(model, smoothing=0.1) print("Defined TensorFlow model graph.") ########################################################################### # Training the model using TensorFlow ########################################################################### # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'filename': os.path.split(model_path)[-1] } rng = np.random.RandomState([2017, 8, 30]) print("Trying to load trained model from: " + model_path) # check if we've trained before, and if we have, use that pre-trained model if os.path.exists(model_path + ".meta"): tf_model_load(sess, model_path) print("Load trained model") else: train(sess, loss, x_train, y_train, args=train_params, rng=rng) saver = tf.train.Saver() saver.save(sess, model_path) # Evaluate the accuracy of the MNIST model on legitimate test examples eval_params = {'batch_size': batch_size} # accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params) # assert x_test.shape[0] == test_end - test_start, x_test.shape # print('Test accuracy on legitimate test examples: {0}'.format(accuracy)) # report.clean_train_clean_eval = accuracy ########################################################################### # Craft adversarial examples using Carlini and Wagner's approach ########################################################################### nb_adv_per_sample = str(nb_classes - 1) if targeted else '1' print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample + ' adversarial examples') print("This could take some time ...") # Instantiate a CW attack object cw = CarliniWagnerL2(model, sess=sess) if targeted: adv_inputs = np.array( [[instance] * nb_classes for instance in x_test[:source_samples]], dtype=np.float32) one_hot = np.zeros((nb_classes, nb_classes)) one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1 adv_inputs = adv_inputs.reshape( (source_samples * nb_classes, img_rows, img_cols, nchannels)) adv_ys = np.array([one_hot] * source_samples, dtype=np.float32).reshape((source_samples * nb_classes, nb_classes)) yname = "y_target" else: adv_inputs = x_test[:source_samples] adv_inputs = x_test adv_ys = None yname = "y" if targeted: cw_params_batch_size = source_samples * nb_classes else: cw_params_batch_size = source_samples cw_params = {'binary_search_steps': 1, 'max_iterations': attack_iterations, 'learning_rate': CW_LEARNING_RATE, 'batch_size': cw_params_batch_size, 'initial_const': 10} adv2 = cw.generate(x, **cw_params) cw_params[yname] = adv_ys adv_x = None # adv_x = cw.generate_np(adv_inputs, **cw_params) eval_params = {'batch_size': np.minimum(nb_classes, source_samples)} if targeted: accuracy = model_eval( sess, x, y, preds, adv_x, adv_ys, args=eval_params) else: # err = model_eval(sess, x, y, preds, adv, y_test[:source_samples], # args=eval_params) accuracy, distortion = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params, is_adv=True, ae=adv2, type=type, datasetName="CIFAR10", discretizeColor=discretizeColor) print('--------------------------------------') print("load save file: ", saveFileNum) # Compute the number of adversarial examples that were successfully found # print('Test with adv. examples {0:.4f}'.format(adv_accuracy)) print('Test accuracy on examples: %0.4f ,distortion: %0.4f' % (accuracy, distortion)) distortionArr.append(distortion) accuracyArr.append(accuracy) # print(str(accuracy)) # print(str(distortion)) tf.reset_default_graph() print("accuracy:") for accuracy in accuracyArr: print(accuracy) print("distortion:") for distortion in distortionArr: print(distortion) # Close TF session sess.close() return report
def cifar10_cw_recon(train_start=0, train_end=60000, test_start=0, test_end=10000, viz_enabled=VIZ_ENABLED, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, source_samples=SOURCE_SAMPLES, learning_rate=LEARNING_RATE, attack_iterations=ATTACK_ITERATIONS, model_path=MODEL_PATH, model_path_cls=MODEL_PATH, targeted=TARGETED, num_threads=None, label_smoothing=0.1, nb_filters=NB_FILTERS): # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) rng = np.random.RandomState() # Create TF session sess = tf.Session() print("Created TensorFlow session.") set_log_level(logging.DEBUG) if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {} sess = tf.Session(config=tf.ConfigProto(**config_args)) # Get CIFAR10 data data = CIFAR10(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) dataset_size = data.x_train.shape[0] dataset_train = data.to_tensorflow()[0] dataset_train = dataset_train.map( lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4) dataset_train = dataset_train.batch(batch_size) dataset_train = dataset_train.prefetch(16) x_train, y_train = data.get_set('train') x_test, y_test = data.get_set('test') nb_latent_size = 100 # Get MNIST test data # Obtain Image Parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] print("img_Rows, img_cols, nchannels: ", img_rows, img_cols, nchannels) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) x_t = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) y_t = tf.placeholder(tf.float32, shape=(None, nb_classes)) z = tf.placeholder(tf.float32, shape=(None, nb_latent_size)) z_t = tf.placeholder(tf.float32, shape=(None, nb_latent_size)) #nb_filters = 64 nb_layers = 500 ''' def do_eval_cls(preds, x_set, y_set, x_tar_set,report_key, is_adv = None): acc = model_eval(sess, x, y, preds, x_t, x_set, y_set, x_tar_set, args=eval_params_cls) setattr(report, report_key, acc) if is_adv is None: report_text = None elif is_adv: report_text = 'adversarial' else: report_text = 'legitimate' if report_text: print('Test accuracy on %s examples: %0.4f' % (report_text, acc)) def eval_cls(): do_eval_cls(y_logits, x_test, y_test, x_test,'clean_train_clean_eval', False) ''' ''' def evaluate(): do_eval(y_logits, x_test, y_test, 'clean_train_clean_eval', False) filepath_ae = "clean_model_cifar10_ae.joblib" filepath_cl = "classifier_cifar10.joblib" # Define TF model graph model = ModelBasicAE('model', nb_layers, nb_latent_size) #cl_model = ModelCls('cl_model') #cl_model = ModelAllConvolutional('model1', nb_classes, nb_filters, # input_shape=[32, 32, 3]) #preds = model.get_logits(x) recons = model.get_layer(x, 'RECON') latent1_orig = model.get_layer(x, 'LATENT') latent1_orig_recon = model.get_layer(recons, 'LATENT') loss = SquaredError(model) print("Defined TensorFlow model graph.") #y_logits = cl_model.get_logits(x) #loss_cls = CrossEntropy(cl_model, smoothing=label_smoothing) ########################################################################### # Training the model using TensorFlow ########################################################################### # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'filename': os.path.split(model_path)[-1] } train_params_cls = { 'nb_epochs': 4, 'batch_size': batch_size, 'learning_rate': learning_rate } rng = np.random.RandomState([2017, 8, 30]) # check if we've trained before, and if we have, use that pre-trained model #if os.path.exists(model_path + ".meta"): # tf_model_load(sess, model_path) #else: #eval_params_cls = {'batch_size': batch_size} # Evaluate the accuracy of the MNIST model on legitimate test examples eval_params = {'batch_size': batch_size} def do_eval(recons, x_orig, x_target, y_orig, y_target, report_key, is_adv=False, x_adv = None, recon_adv = False, lat_orig = None, lat_orig_recon = None): noise, d_orig, d_targ, avg_dd, d_latent = model_eval_ae(sess, x, x_t, recons, x_orig, x_target, x_adv, recon_adv, lat_orig, lat_orig_recon, args = eval_params) setattr(report, report_key, avg_dd) if is_adv is None: report_text = None elif is_adv: report_text = 'adversarial' else: report_text = 'legitimate' if report_text: print('Test d1 on ', report_text, ' examples: ', d_orig) print('Test d2 on ', report_text,' examples: ', d_targ) print('Test distance difference on %s examples: %0.4f' % (report_text, avg_dd)) print('Noise added: ', noise) print("dist_latent_orig_recon on ", report_text, "examples : ", d_latent) print() def evaluate_ae(): do_eval(recons, x_test, x_test, y_test, y_test, 'clean_train_clean_eval', False, None, None, latent1_orig, latent1_orig_recon) print("Training autoencoder") train_ae(sess, loss, x_train,x_train, evaluate = evaluate_ae, args=train_params, rng=rng, var_list=model.get_params()) #with sess.as_default(): # save(filepath_ae, model) ''' save_dir = 'models' model_name = 'cifar10_AE' model_path_ae = os.path.join(save_dir, model_name) if clean_train_ae == True: input_img = Input(shape=(32, 32, 3)) x = Conv2D(64, (3, 3), padding='same')(input_img) x = BatchNormalization()(x) x = Activation('relu')(x) x = MaxPooling2D((2, 2), padding='same')(x) x = Conv2D(32, (3, 3), padding='same')(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = MaxPooling2D((2, 2), padding='same')(x) x = Conv2D(16, (3, 3), padding='same')(x) x = BatchNormalization()(x) x = Activation('relu')(x) encoded = MaxPooling2D((2, 2), padding='same')(x) x = Conv2D(16, (3, 3), padding='same')(encoded) x = BatchNormalization()(x) x = Activation('relu')(x) x = UpSampling2D((2, 2))(x) x = Conv2D(32, (3, 3), padding='same')(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = UpSampling2D((2, 2))(x) x = Conv2D(64, (3, 3), padding='same')(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = UpSampling2D((2, 2))(x) x = Conv2D(3, (3, 3), padding='same')(x) x = BatchNormalization()(x) decoded = Activation('sigmoid')(x) model = Model(input_img, decoded) model.compile(optimizer='adam', loss='binary_crossentropy') #es_cb = EarlyStopping(monitor='val_loss', patience=2, verbose=1, mode='auto') #chkpt = saveDir + 'AutoEncoder_Cifar10_Deep_weights.{epoch:02d}-{loss:.2f}-{val_loss:.2f}.hdf5' #cp_cb = ModelCheckpoint(filepath = chkpt, monitor='val_loss', verbose=1, save_best_only=True, mode='auto') model.fit( x_train, x_train, batch_size=128, epochs=2, verbose=1, validation_data=(x_test, x_test), #callbacks=[es_cb, cp_cb], shuffle=True) score = model.evaluate(x_test, x_test, verbose=1) print(score) model.save(model_path_ae) print('Saved trained model at %s ' % model_path) else: model = load_model(model_path_ae) num_classes = 10 save_dir = 'models' model_name = 'cifar10_CNN' model_path_cls = os.path.join(save_dir, model_name) if clean_train_cl == True: print("Training CNN classifier") cl_model = Sequential() cl_model.add( Conv2D(32, (3, 3), padding='same', input_shape=x_train.shape[1:])) cl_model.add(Activation('relu')) cl_model.add(Conv2D(32, (3, 3))) cl_model.add(Activation('relu')) cl_model.add(MaxPooling2D(pool_size=(2, 2))) cl_model.add(Dropout(0.25)) cl_model.add(Conv2D(64, (3, 3), padding='same')) cl_model.add(Activation('relu')) cl_model.add(Conv2D(64, (3, 3))) cl_model.add(Activation('relu')) cl_model.add(MaxPooling2D(pool_size=(2, 2))) cl_model.add(Dropout(0.25)) cl_model.add(Flatten()) cl_model.add(Dense(512)) cl_model.add(Activation('relu')) cl_model.add(Dropout(0.5)) cl_model.add(Dense(num_classes)) cl_model.add(Activation('softmax')) opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6) # Let's train the model using RMSprop cl_model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) cl_model.fit(x_train, y_train, batch_size=90, epochs=4, validation_data=(x_test, y_test), shuffle=True) cl_model.save(model_path_cls) print('Saved trained model at %s ' % model_path) else: cl_model = load_model(model_path_cls) # Score trained model. scores = cl_model.evaluate(x_test, y_test, verbose=1) print('Test loss:', scores[0]) print('Test accuracy:', scores[1]) ''' train(sess, loss_cls, None, None, dataset_train=dataset_train, dataset_size=dataset_size, evaluate=eval_cls, args=train_params_cls, rng=rng, var_list=cl_model.get_params()) ''' #with sess.as_default(): # save(filepath_cl, cl_model) ''' else: model = load(filepath_ae) cl_model = load(filepath_cl) ''' #train_cls(sess, loss_cls, x_train, y_train, evaluate = eval_cls, args = train_params_cls, rng = rng, var_list = cl_model.get_params()) #train_cls(sess, loss_cls, x_train, y_train, evaluate = eval_cls, args = train_params_cls, rng = rng, var_list = cl_model.get_params()) ########################################################################### # Craft adversarial examples using Carlini and Wagner's approach ########################################################################### nb_adv_per_sample = str(nb_classes - 1) if targeted else '1' print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample + ' adversarial examples') print("This could take some time ...") # Instantiate a CW attack object cw = CarliniWagnerAE(model, cl_model, sess=sess) if viz_enabled: assert source_samples == nb_classes idxs = [ np.where(np.argmax(y_test, axis=1) == i)[0][0] for i in range(nb_classes) ] if targeted: if viz_enabled: # Initialize our array for grid visualization grid_shape = (nb_classes, nb_classes, img_rows, img_cols, nchannels) grid_viz_data = np.zeros(grid_shape, dtype='f') grid_viz_data_1 = np.zeros(grid_shape, dtype='f') adv_inputs = np.array([[instance] * (nb_classes - 1) for instance in x_test[idxs]], dtype=np.float32) #adv_input_y = np.array([[instance]*(nb_classes-1) for instance in y_test[idxs]]) adv_input_y = [] for curr_num in range(nb_classes): targ = [] for id in range(nb_classes - 1): targ.append(y_test[idxs[curr_num]]) adv_input_y.append(targ) adv_input_y = np.array(adv_input_y) adv_target_y = [] for curr_num in range(nb_classes): targ = [] for id in range(nb_classes): if (id != curr_num): targ.append(y_test[idxs[id]]) adv_target_y.append(targ) adv_target_y = np.array(adv_target_y) #print("adv_input_y: \n", adv_input_y) #print("adv_target_y: \n", adv_target_y) adv_input_targets = [] for curr_num in range(nb_classes): targ = [] for id in range(nb_classes): if (id != curr_num): targ.append(x_test[idxs[id]]) adv_input_targets.append(targ) adv_input_targets = np.array(adv_input_targets) adv_inputs = adv_inputs.reshape((source_samples * (nb_classes - 1), img_rows, img_cols, nchannels)) adv_input_targets = adv_input_targets.reshape( (source_samples * (nb_classes - 1), img_rows, img_cols, nchannels)) adv_input_y = adv_input_y.reshape( source_samples * (nb_classes - 1), 10) adv_target_y = adv_target_y.reshape( source_samples * (nb_classes - 1), 10) one_hot = np.zeros((nb_classes, nb_classes)) one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1 adv_ys = np.array([one_hot] * source_samples, dtype=np.float32).reshape( (source_samples * nb_classes, nb_classes)) yname = "y_target" cw_params_batch_size = source_samples * (nb_classes - 1) cw_params = { 'binary_search_steps': 4, yname: adv_ys, 'max_iterations': attack_iterations, 'learning_rate': CW_LEARNING_RATE, 'batch_size': cw_params_batch_size, 'initial_const': 1 } adv = cw.generate_np(adv_inputs, adv_input_targets, **cw_params) adv = sess.run(adv) #print("shaep of adv: ", np.shape(adv)) ''' recons = model.get_layer(x, 'RECON') recon_orig = model.get_layer(adv_inputs, 'RECON') recon_adv = model.get_layer(adv, 'RECON') lat_orig = model.get_layer(x, 'LATENT') lat_orig_recon = model.get_layer(recons, 'LATENT') #pred_adv_recon = cl_model.get_logits(recon_adv) ''' recon_orig = model.predict(adv_inputs) recon_adv = model.predict(adv) #eval_params = {'batch_size': np.minimum(nb_classes, source_samples)} #eval_params = {'batch_size': 90} #acc_1 = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_target_y, adv_input_targets, args=eval_params_cls) #acc_2 = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_input_y, adv_input_targets, args=eval_params_cls) #noise, d1, d2, dist_diff, avg_dist_lat = model_eval_ae(sess, x, x_t,recons, adv_inputs, adv_input_targets, adv, recon_adv,lat_orig, lat_orig_recon, args=eval_params) shape = np.shape(adv_inputs) noise = reduce_sum(np.square(adv_inputs - adv), list(range(1, len(shape)))) print("noise: ", noise) #recon_adv = sess.run(recon_adv) #recon_orig = sess.run(recon_orig) scores1 = cl_model.evaluate(recon_adv, adv_input_y, verbose=1) scores2 = cl_model.evaluate(recon_adv, adv_target_y, verbose=1) print("classifier acc_target: ", scores2[1]) print("classifier acc_true: ", scores1[1]) #print("recon_adv[0]\n", recon_adv[0,:,:,0]) curr_class = 0 if viz_enabled: for j in range(nb_classes): if targeted: for i in range(nb_classes): #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i] if (i == j): grid_viz_data[i, j] = recon_orig[curr_class * 9] grid_viz_data_1[i, j] = adv_inputs[curr_class * 9] curr_class = curr_class + 1 else: if (j > i): grid_viz_data[i, j] = recon_adv[i * (nb_classes - 1) + j - 1] grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) + j - 1] else: grid_viz_data[i, j] = recon_adv[i * (nb_classes - 1) + j] grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) + j] #rint(grid_viz_data.shape) print('--------------------------------------') # Compute the number of adversarial examples that were successfully found # Compute the average distortion introduced by the algorithm percent_perturbed = np.mean( np.sum((adv - adv_inputs)**2, axis=(1, 2, 3))**.5) print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed)) # Close TF session #sess.close() # Finally, block & display a grid of all the adversarial examples if viz_enabled: #_ = grid_visual(grid_viz_data) #_ = grid_visual(grid_viz_data_1) plt.ioff() figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') # Add the images to the plot num_cols = grid_viz_data.shape[0] num_rows = grid_viz_data.shape[1] num_channels = grid_viz_data.shape[4] for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols)) plt.axis('off') plt.imshow(grid_viz_data[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_fig1') figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols)) plt.axis('off') plt.imshow(grid_viz_data_1[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_fig2') #return report #adversarial training if (adv_train == True): print("starting adversarial training") #sess1 = tf.Session() adv_input_set = [] adv_input_target_set = [] for i in range(20): indices = np.arange(np.shape(x_train)[0]) np.random.shuffle(indices) print("indices: ", indices[1:10]) x_train = x_train[indices] y_train = y_train[indices] idxs = [ np.where(np.argmax(y_train, axis=1) == i)[0][0] for i in range(nb_classes) ] adv_inputs_2 = np.array([[instance] * (nb_classes - 1) for instance in x_train[idxs]], dtype=np.float32) adv_input_targets_2 = [] for curr_num in range(nb_classes): targ = [] for id in range(nb_classes): if (id != curr_num): targ.append(x_train[idxs[id]]) adv_input_targets_2.append(targ) adv_input_targets_2 = np.array(adv_input_targets_2) adv_inputs_2 = adv_inputs_2.reshape( (source_samples * (nb_classes - 1), img_rows, img_cols, nchannels)) adv_input_targets_2 = adv_input_targets_2.reshape( (source_samples * (nb_classes - 1), img_rows, img_cols, nchannels)) adv_input_set.append(adv_inputs_2) adv_input_target_set.append(adv_input_targets_2) adv_input_set = np.array(adv_input_set), adv_input_target_set = np.array(adv_input_target_set) print("shape of adv_input_set: ", np.shape(adv_input_set)) print("shape of adv_input_target_set: ", np.shape(adv_input_target_set)) adv_input_set = np.reshape( adv_input_set, (np.shape(adv_input_set)[0] * np.shape(adv_input_set)[1] * np.shape(adv_input_set)[2], np.shape(adv_input_set)[3], np.shape(adv_input_set)[4], np.shape(adv_input_set)[5])) adv_input_target_set = np.reshape(adv_input_target_set, (np.shape(adv_input_target_set)[0] * np.shape(adv_input_target_set)[1], np.shape(adv_input_target_set)[2], np.shape(adv_input_target_set)[3], np.shape(adv_input_target_set)[4])) print("generated adversarial training set") adv_set = cw.generate_np(adv_input_set, adv_input_target_set, **cw_params) x_train_aim = np.append(x_train, adv_input_set, axis=0) x_train_app = np.append(x_train, adv_set, axis=0) model_name = 'cifar10_AE_adv' model_path_ae = os.path.join(save_dir, model_name) input_img = Input(shape=(32, 32, 3)) x = Conv2D(64, (3, 3), padding='same')(input_img) x = BatchNormalization()(x) x = Activation('relu')(x) x = MaxPooling2D((2, 2), padding='same')(x) x = Conv2D(32, (3, 3), padding='same')(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = MaxPooling2D((2, 2), padding='same')(x) x = Conv2D(16, (3, 3), padding='same')(x) x = BatchNormalization()(x) x = Activation('relu')(x) encoded = MaxPooling2D((2, 2), padding='same')(x) x = Conv2D(16, (3, 3), padding='same')(encoded) x = BatchNormalization()(x) x = Activation('relu')(x) x = UpSampling2D((2, 2))(x) x = Conv2D(32, (3, 3), padding='same')(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = UpSampling2D((2, 2))(x) x = Conv2D(64, (3, 3), padding='same')(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = UpSampling2D((2, 2))(x) x = Conv2D(3, (3, 3), padding='same')(x) x = BatchNormalization()(x) decoded = Activation('sigmoid')(x) model2 = Model(input_img, decoded) model2.compile(optimizer='adam', loss='binary_crossentropy') model2.fit(x_train_app, x_train_aim, batch_size=128, epochs=20, verbose=1, validation_data=(x_test, x_test), callbacks=[es_cb, cp_cb], shuffle=True) score = model.evaluate(x_test, x_test, verbose=1) print(score) model2.save(model_path_ae_adv) print('Saved adv trained model at %s ' % model_path) ''' model_adv_trained = ModelBasicAE('model_adv_trained', nb_layers, nb_latent_size) recons_2 = model_adv_trained.get_layer(x, 'RECON') loss_2 = SquaredError(model_adv_trained) train_ae(sess, loss_2, x_train_app, x_train_aim ,args=train_params, rng=rng, var_list=model_adv_trained.get_params()) saver = tf.train.Saver() saver.save(sess, model_path) ''' cw2 = CarliniWagnerAE(model_adv_trained, cl_model, sess=sess) adv_2 = cw2.generate_np(adv_inputs, adv_input_targets, **cw_params) recon_adv = model2.predict(adv) recon_orig = model2.predict(adv_inputs) #print("shaep of adv: ", np.shape(adv)) ''' recon_orig = model_adv_trained.get_layer(adv_inputs, 'RECON') recon_adv = model_adv_trained.get_layer(adv_2, 'RECON') lat_orig = model_adv_trained.get_layer(x, 'LATENT') lat_orig_recon = model_adv_trained.get_layer(recons, 'LATENT') ''' #pred_adv_recon = cl_model.get_logits(recon_adv) #eval_params = {'batch_size': np.minimum(nb_classes, source_samples)} #eval_params = {'batch_size': 90} if targeted: #noise, d1, d2, dist_diff, avg_dist_lat = model_eval_ae(sess, x, x_t,recons, adv_inputs, adv_input_targets, adv_2, recon_adv,lat_orig, lat_orig_recon, args=eval_params) #acc = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_target_y, adv_input_targets, args=eval_params_cls) noise = reduce_sum(tf.square(adv_inputs - adv_2), list(range(1, len(shape)))) print("noise: ", noise) #print("d1: ", d1) #print("d2: ", d2) #print("d1-d2: ", dist_diff) #print("Avg_dist_lat: ", avg_dist_lat) #print("classifier acc: ", acc) ''' recon_adv = sess.run(recon_adv) recon_orig = sess.run(recon_orig) ''' scores1 = cl_model.evaluate(recon_adv, adv_input_y, verbose=1) scores2 = cl_model.eval_params(recon_adv, adv_target_y, verbose=1) print("classifier acc_target: ", scores2[1]) print("classifier acc_true: ", scores1[1]) #print("recon_adv[0]\n", recon_adv[0,:,:,0]) curr_class = 0 if viz_enabled: for j in range(nb_classes): for i in range(nb_classes): #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i] if (i == j): grid_viz_data[i, j] = recon_orig[curr_class * 9] grid_viz_data_1[i, j] = adv_inputs[curr_class * 9] curr_class = curr_class + 1 else: if (j > i): grid_viz_data[i, j] = recon_adv[i * (nb_classes - 1) + j - 1] grid_viz_data_1[i, j] = adv_2[i * (nb_classes - 1) + j - 1] else: grid_viz_data[i, j] = recon_adv[i * (nb_classes - 1) + j] grid_viz_data_1[i, j] = adv_2[i * (nb_classes - 1) + j] #rint(grid_viz_data.shape) print('--------------------------------------') # Compute the number of adversarial examples that were successfully found # Compute the average distortion introduced by the algorithm percent_perturbed = np.mean( np.sum((adv_2 - adv_inputs)**2, axis=(1, 2, 3))**.5) print( 'Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed)) # Close TF session sess.close() # Finally, block & display a grid of all the adversarial examples if viz_enabled: #_ = grid_visual(grid_viz_data) #_ = grid_visual(grid_viz_data_1) plt.ioff() figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') # Add the images to the plot num_cols = grid_viz_data.shape[0] num_rows = grid_viz_data.shape[1] num_channels = grid_viz_data.shape[4] for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols)) plt.axis('off') if num_channels == 1: plt.imshow(grid_viz_data[xx, yy, :, :, 0]) else: plt.imshow(grid_viz_data[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_fig1_adv_trained') figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols)) plt.axis('off') plt.imshow(grid_viz_data_1[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_fig2_adv_trained') return report #binarization defense if (binarization_defense == True or mean_filtering == True): #adv = sess.run(adv) # print(adv[0]) if (binarization_defense == True): adv[adv > 0.5] = 1.0 adv[adv <= 0.5] = 0.0 else: #radius = 2 #adv_list = [mean(adv[i,:,:,0], disk(radius)) for i in range(0, np.shape(adv)[0])] #adv = np.array(adv_list) #adv = np.expand_dims(adv, axis = 3) adv = uniform_filter(adv, 2) #adv = median_filter(adv, 2) #print("after bin ") #print(adv[0]) ''' recons = model.get_layer(x, 'RECON') recon_orig = model.get_layer(adv_inputs, 'RECON') recon_adv = model.get_layer(adv, 'RECON') lat_orig = model.get_layer(x, 'LATENT') lat_orig_recon = model.get_layer(recon_orig, 'LATENT') ''' recon_orig = model.predict(adv_inputs) recon_adv = model.predict(adv) #pred_adv_recon = cl_model.get_logits(recon_adv) #eval_params = {'batch_size': np.minimum(nb_classes, source_samples)} eval_params = {'batch_size': 90} if targeted: #noise, d1, d2, dist_diff, avg_dist_lat = model_eval_ae(sess, x, x_t,recons, adv_inputs, adv_input_targets, adv, recon_adv,lat_orig, lat_orig_recon, args=eval_params) #acc1 = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_target_y, adv_input_targets, args=eval_params_cls) #acc2 = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_input_y, adv_input_targets, args=eval_params_cls) #print("d1: ", d1) #print("d2: ", d2) noise = reduce_sum(tf.square(x_orig - x_adv), list(range(1, len(shape)))) print("noise: ", noise) #print("classifier acc for target class: ", acc1) #print("classifier acc for true class: ", acc2) ''' recon_adv = sess.run(recon_adv) recon_orig = sess.run(recon_orig) ''' scores1 = cl_model.evaluate(recon_adv, adv_input_y, verbose=1) scores2 = cl_model.evalluate(recon_adv, adv_target_y, verbose=1) print("classifier acc_target: ", scores2[1]) print("classifier acc_true: ", scores1[1]) #print("recon_adv[0]\n", recon_adv[0,:,:,0]) curr_class = 0 if viz_enabled: for j in range(nb_classes): for i in range(nb_classes): #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i] if (i == j): grid_viz_data[i, j] = recon_orig[curr_class * 9] grid_viz_data_1[i, j] = adv_inputs[curr_class * 9] curr_class = curr_class + 1 else: if (j > i): grid_viz_data[i, j] = recon_adv[i * (nb_classes - 1) + j - 1] grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) + j - 1] else: grid_viz_data[i, j] = recon_adv[i * (nb_classes - 1) + j] grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) + j] sess.close() #_ = grid_visual(grid_viz_data) #_ = grid_visual(grid_viz_data_1) plt.ioff() figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') # Add the images to the plot num_cols = grid_viz_data.shape[0] num_rows = grid_viz_data.shape[1] num_channels = grid_viz_data.shape[4] for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols)) plt.axis('off') if num_channels == 1: plt.imshow(grid_viz_data[xx, yy, :, :, 0]) else: plt.imshow(grid_viz_data[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_fig1_bin') figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols)) plt.axis('off') if num_channels == 1: plt.imshow(grid_viz_data_1[xx, yy, :, :, 0]) else: plt.imshow(grid_viz_data_1[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_fig2_bin')
def cifar10_cw_latent(train_start=0, train_end=60000, test_start=0, test_end=10000, viz_enabled=VIZ_ENABLED, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, source_samples=SOURCE_SAMPLES, learning_rate=LEARNING_RATE, attack_iterations=ATTACK_ITERATIONS, targeted=TARGETED, num_threads=None, label_smoothing=0.1, nb_filters=NB_FILTERS): # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) rng = np.random.RandomState() # Create TF session sess = tf.Session() print("Created TensorFlow session.") set_log_level(logging.DEBUG) if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {} sess = tf.Session(config=tf.ConfigProto(**config_args)) # Get CIFAR10 data data = CIFAR10(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) dataset_size = data.x_train.shape[0] dataset_train = data.to_tensorflow()[0] dataset_train = dataset_train.map( lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4) dataset_train = dataset_train.batch(batch_size) dataset_train = dataset_train.prefetch(16) x_train, y_train = data.get_set('train') x_test, y_test = data.get_set('test') nb_latent_size = 100 # Get MNIST test data # Obtain Image Parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] print("img_Rows, img_cols, nchannels: ", img_rows, img_cols, nchannels) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) x_t = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) y_t = tf.placeholder(tf.float32, shape=(None, nb_classes)) z = tf.placeholder(tf.float32, shape=(None, nb_latent_size)) z_t = tf.placeholder(tf.float32, shape=(None, nb_latent_size)) save_dir = 'models' model_name = 'cifar10_AE' model_path_ae = os.path.join(save_dir, model_name) if clean_train_ae == True: input_img = Input(shape=(32, 32, 3)) x = Conv2D(64, (3, 3), padding='same')(input_img) x = BatchNormalization()(x) x = Activation('relu')(x) x = MaxPooling2D((2, 2), padding='same')(x) x = Conv2D(32, (3, 3), padding='same')(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = MaxPooling2D((2, 2), padding='same')(x) x = Conv2D(16, (3, 3), padding='same')(x) x = BatchNormalization()(x) x = Activation('relu')(x) encoded = MaxPooling2D((2, 2), padding='same')(x) x = Conv2D(16, (3, 3), padding='same')(encoded) x = BatchNormalization()(x) x = Activation('relu')(x) x = UpSampling2D((2, 2))(x) x = Conv2D(32, (3, 3), padding='same')(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = UpSampling2D((2, 2))(x) x = Conv2D(64, (3, 3), padding='same')(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = UpSampling2D((2, 2))(x) x = Conv2D(3, (3, 3), padding='same')(x) x = BatchNormalization()(x) decoded = Activation('sigmoid')(x) model = Model(input_img, decoded) model.compile(optimizer='adam', loss='binary_crossentropy') #es_cb = EarlyStopping(monitor='val_loss', patience=2, verbose=1, mode='auto') #chkpt = saveDir + 'AutoEncoder_Cifar10_Deep_weights.{epoch:02d}-{loss:.2f}-{val_loss:.2f}.hdf5' #cp_cb = ModelCheckpoint(filepath = chkpt, monitor='val_loss', verbose=1, save_best_only=True, mode='auto') model.fit( x_train, x_train, batch_size=128, epochs=5, verbose=1, validation_data=(x_test, x_test), #callbacks=[es_cb, cp_cb], shuffle=True) score = model.evaluate(x_test, x_test, verbose=1) print(score) model.save(model_path_ae) print('Saved trained model at %s ' % model_path_ae) else: model = load_model(model_path_ae) x_lat_train = model.predict(x_train) x_lat_test = model.predict(x_test) num_classes = 10 save_dir = 'models' model_name = 'cifar10_CNN_latent' model_path_cls = os.path.join(save_dir, model_name) if clean_train_cl == True: print("Training CNN AE") cl_model = Sequential() cl_model.add( Conv2D(32, (3, 3), padding='same', input_shape=x_train.shape[1:])) cl_model.add(Activation('relu')) cl_model.add(Conv2D(32, (3, 3))) cl_model.add(Activation('relu')) cl_model.add(MaxPooling2D(pool_size=(2, 2))) cl_model.add(Dropout(0.25)) cl_model.add(Conv2D(64, (3, 3), padding='same')) cl_model.add(Activation('relu')) cl_model.add(Conv2D(64, (3, 3))) cl_model.add(Activation('relu')) cl_model.add(MaxPooling2D(pool_size=(2, 2))) cl_model.add(Dropout(0.25)) cl_model.add(Flatten()) cl_model.add(Dense(512)) cl_model.add(Activation('relu')) cl_model.add(Dropout(0.5)) cl_model.add(Dense(num_classes)) cl_model.add(Activation('softmax')) opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6) # Let's train the model using RMSprop cl_model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy']) cl_model.fit(x_lat_train, y_train, batch_size=90, epochs=2, validation_data=(x_test, y_test), shuffle=True) cl_model.save(model_path_cls) print('Saved trained model at %s ' % model_path_cls) else: cl_model = load_model(model_path_cls) # Score trained model. scores = cl_model.evaluate(x_lat_test, y_test, verbose=1) print('Test loss:', scores[0]) print('Test accuracy:', scores[1]) ########################################################################### # Craft adversarial examples using Carlini and Wagner's approach ########################################################################### nb_adv_per_sample = str(nb_classes - 1) if targeted else '1' print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample + ' adversarial examples') print("This could take some time ...") # Instantiate a CW attack Object cw = CarliniWagnerAE_Lat_Keras(model, cl_model, sess=sess) if viz_enabled: assert source_samples == nb_classes idxs = [ np.where(np.argmax(y_test, axis=1) == i)[0][0] for i in range(nb_classes) ] if targeted: if viz_enabled: # Initialize our array for grid visualization grid_shape = (nb_classes, nb_classes, img_rows, img_cols, nchannels) grid_viz_data = np.zeros(grid_shape, dtype='f') grid_viz_data_1 = np.zeros(grid_shape, dtype='f') adv_inputs = np.array([[instance] * (nb_classes - 1) for instance in x_test[idxs]], dtype=np.float32) #adv_input_y = np.array([[instance]*(nb_classes-1) for instance in y_test[idxs]]) adv_input_y = [] for curr_num in range(nb_classes): targ = [] for id in range(nb_classes - 1): targ.append(y_test[idxs[curr_num]]) adv_input_y.append(targ) adv_input_y = np.array(adv_input_y) adv_target_y = [] for curr_num in range(nb_classes): targ = [] for id in range(nb_classes): if (id != curr_num): targ.append(y_test[idxs[id]]) adv_target_y.append(targ) adv_target_y = np.array(adv_target_y) #print("adv_input_y: \n", adv_input_y) #print("adv_target_y: \n", adv_target_y) adv_input_targets = [] for curr_num in range(nb_classes): targ = [] for id in range(nb_classes): if (id != curr_num): targ.append(x_test[idxs[id]]) adv_input_targets.append(targ) adv_input_targets = np.array(adv_input_targets) adv_inputs = adv_inputs.reshape((source_samples * (nb_classes - 1), img_rows, img_cols, nchannels)) adv_input_targets = adv_input_targets.reshape( (source_samples * (nb_classes - 1), img_rows, img_cols, nchannels)) adv_input_y = adv_input_y.reshape( source_samples * (nb_classes - 1), 10) adv_target_y = adv_target_y.reshape( source_samples * (nb_classes - 1), 10) one_hot = np.zeros((nb_classes, nb_classes)) one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1 adv_ys = np.array([one_hot] * source_samples, dtype=np.float32).reshape( (source_samples * nb_classes, nb_classes)) yname = "y_target" cw_params_batch_size = source_samples * (nb_classes - 1) cw_params = { 'binary_search_steps': 4, yname: adv_ys, 'max_iterations': attack_iterations, 'learning_rate': CW_LEARNING_RATE, 'batch_size': cw_params_batch_size, 'initial_const': 1 } adv = cw.generate_np(adv_inputs, adv_input_targets, **cw_params) adv = sess.run(adv) recon_orig = model.predict(adv_inputs) recon_adv = model.predict(adv) shape = np.shape(adv_inputs) noise = reduce_sum(np.square(adv_inputs - adv), list(range(1, len(shape)))) print("noise: ", noise) scores1 = cl_model.evaluate(recon_adv, adv_input_y, verbose=1) scores2 = cl_model.evaluate(recon_adv, adv_target_y, verbose=1) print("classifier acc_target: ", scores2[1]) print("classifier acc_true: ", scores1[1]) #print("recon_adv[0]\n", recon_adv[0,:,:,0]) curr_class = 0 if viz_enabled: for j in range(nb_classes): if targeted: for i in range(nb_classes): #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i] if (i == j): grid_viz_data[i, j] = recon_orig[curr_class * 9] grid_viz_data_1[i, j] = adv_inputs[curr_class * 9] curr_class = curr_class + 1 else: if (j > i): grid_viz_data[i, j] = recon_adv[i * (nb_classes - 1) + j - 1] grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) + j - 1] else: grid_viz_data[i, j] = recon_adv[i * (nb_classes - 1) + j] grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) + j] #rint(grid_viz_data.shape) print('--------------------------------------') # Compute the number of adversarial examples that were successfully found # Compute the average distortion introduced by the algorithm percent_perturbed = np.mean( np.sum((adv - adv_inputs)**2, axis=(1, 2, 3))**.5) print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed)) # Finally, block & display a grid of all the adversarial examples if viz_enabled: plt.ioff() figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') # Add the images to the plot num_cols = grid_viz_data.shape[0] num_rows = grid_viz_data.shape[1] num_channels = grid_viz_data.shape[4] for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols)) plt.axis('off') plt.imshow(grid_viz_data[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_fig1') figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols)) plt.axis('off') plt.imshow(grid_viz_data_1[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_fig2') #return report #adversarial training if (adv_train == True): print("starting adversarial training") #sess1 = tf.Session() adv_input_set = [] adv_input_target_set = [] for i in range(20): indices = np.arange(np.shape(x_train)[0]) np.random.shuffle(indices) print("indices: ", indices[1:10]) x_train = x_train[indices] y_train = y_train[indices] idxs = [ np.where(np.argmax(y_train, axis=1) == i)[0][0] for i in range(nb_classes) ] adv_inputs_2 = np.array([[instance] * (nb_classes - 1) for instance in x_train[idxs]], dtype=np.float32) adv_input_targets_2 = [] for curr_num in range(nb_classes): targ = [] for id in range(nb_classes): if (id != curr_num): targ.append(x_train[idxs[id]]) adv_input_targets_2.append(targ) adv_input_targets_2 = np.array(adv_input_targets_2) adv_inputs_2 = adv_inputs_2.reshape( (source_samples * (nb_classes - 1), img_rows, img_cols, nchannels)) adv_input_targets_2 = adv_input_targets_2.reshape( (source_samples * (nb_classes - 1), img_rows, img_cols, nchannels)) adv_input_set.append(adv_inputs_2) adv_input_target_set.append(adv_input_targets_2) adv_input_set = np.array(adv_input_set), adv_input_target_set = np.array(adv_input_target_set) print("shape of adv_input_set: ", np.shape(adv_input_set)) print("shape of adv_input_target_set: ", np.shape(adv_input_target_set)) adv_input_set = np.reshape( adv_input_set, (np.shape(adv_input_set)[0] * np.shape(adv_input_set)[1] * np.shape(adv_input_set)[2], np.shape(adv_input_set)[3], np.shape(adv_input_set)[4], np.shape(adv_input_set)[5])) adv_input_target_set = np.reshape(adv_input_target_set, (np.shape(adv_input_target_set)[0] * np.shape(adv_input_target_set)[1], np.shape(adv_input_target_set)[2], np.shape(adv_input_target_set)[3], np.shape(adv_input_target_set)[4])) print("generated adversarial training set") adv_set = cw.generate_np(adv_input_set, adv_input_target_set, **cw_params) x_train_aim = np.append(x_train, adv_input_set, axis=0) x_train_app = np.append(x_train, adv_set, axis=0) model_name = 'cifar10_AE_adv_lat' model_path_ae_adv = os.path.join(save_dir, model_name) input_img = Input(shape=(32, 32, 3)) x = Conv2D(64, (3, 3), padding='same')(input_img) x = BatchNormalization()(x) x = Activation('relu')(x) x = MaxPooling2D((2, 2), padding='same')(x) x = Conv2D(32, (3, 3), padding='same')(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = MaxPooling2D((2, 2), padding='same')(x) x = Conv2D(16, (3, 3), padding='same')(x) x = BatchNormalization()(x) x = Activation('relu')(x) encoded = MaxPooling2D((2, 2), padding='same')(x) x = Conv2D(16, (3, 3), padding='same')(encoded) x = BatchNormalization()(x) x = Activation('relu')(x) x = UpSampling2D((2, 2))(x) x = Conv2D(32, (3, 3), padding='same')(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = UpSampling2D((2, 2))(x) x = Conv2D(64, (3, 3), padding='same')(x) x = BatchNormalization()(x) x = Activation('relu')(x) x = UpSampling2D((2, 2))(x) x = Conv2D(3, (3, 3), padding='same')(x) x = BatchNormalization()(x) decoded = Activation('sigmoid')(x) model2 = Model(input_img, decoded) model2.compile(optimizer='adam', loss='binary_crossentropy') model2.fit(x_train_app, x_train_aim, batch_size=128, epochs=20, verbose=1, validation_data=(x_test, x_test), callbacks=[es_cb, cp_cb], shuffle=True) score = model.evaluate(x_test, x_test, verbose=1) print(score) model2.save(model_path_ae_adv) print('Saved adv trained model at ', model_path_ae_adv) cw2 = CarliniWagnerAE_Lat_Keras(model_adv_trained, cl_model, sess=sess) adv_2 = cw2.generate_np(adv_inputs, adv_input_targets, **cw_params) recon_adv = model2.predict(adv) recon_orig = model2.predict(adv_inputs) if targeted: noise = reduce_sum(tf.square(adv_inputs - adv_2), list(range(1, len(shape)))) print("noise: ", noise) scores1 = cl_model.evaluate(recon_adv, adv_input_y, verbose=1) scores2 = cl_model.eval_params(recon_adv, adv_target_y, verbose=1) print("classifier acc_target: ", scores2[1]) print("classifier acc_true: ", scores1[1]) #print("recon_adv[0]\n", recon_adv[0,:,:,0]) curr_class = 0 if viz_enabled: for j in range(nb_classes): for i in range(nb_classes): #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i] if (i == j): grid_viz_data[i, j] = recon_orig[curr_class * 9] grid_viz_data_1[i, j] = adv_inputs[curr_class * 9] curr_class = curr_class + 1 else: if (j > i): grid_viz_data[i, j] = recon_adv[i * (nb_classes - 1) + j - 1] grid_viz_data_1[i, j] = adv_2[i * (nb_classes - 1) + j - 1] else: grid_viz_data[i, j] = recon_adv[i * (nb_classes - 1) + j] grid_viz_data_1[i, j] = adv_2[i * (nb_classes - 1) + j] #rint(grid_viz_data.shape) print('--------------------------------------') # Compute the number of adversarial examples that were successfully found # Compute the average distortion introduced by the algorithm percent_perturbed = np.mean( np.sum((adv_2 - adv_inputs)**2, axis=(1, 2, 3))**.5) print( 'Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed)) # Close TF session sess.close() # Finally, block & display a grid of all the adversarial examples if viz_enabled: #_ = grid_visual(grid_viz_data) #_ = grid_visual(grid_viz_data_1) plt.ioff() figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') # Add the images to the plot num_cols = grid_viz_data.shape[0] num_rows = grid_viz_data.shape[1] num_channels = grid_viz_data.shape[4] for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols)) plt.axis('off') if num_channels == 1: plt.imshow(grid_viz_data[xx, yy, :, :, 0]) else: plt.imshow(grid_viz_data[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_fig1_adv_trained') figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols)) plt.axis('off') plt.imshow(grid_viz_data_1[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_fig2_adv_trained') return report #binarization defense if (binarization_defense == True or mean_filtering == True): if (binarization_defense == True): adv[adv > 0.5] = 1.0 adv[adv <= 0.5] = 0.0 else: adv = uniform_filter(adv, 2) recon_orig = model.predict(adv_inputs) recon_adv = model.predict(adv) eval_params = {'batch_size': 90} if targeted: noise = reduce_sum(tf.square(x_orig - x_adv), list(range(1, len(shape)))) print("noise: ", noise) scores1 = cl_model.evaluate(recon_adv, adv_input_y, verbose=1) scores2 = cl_model.evalluate(recon_adv, adv_target_y, verbose=1) print("classifier acc_target: ", scores2[1]) print("classifier acc_true: ", scores1[1]) #print("recon_adv[0]\n", recon_adv[0,:,:,0]) curr_class = 0 if viz_enabled: for j in range(nb_classes): for i in range(nb_classes): #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i] if (i == j): grid_viz_data[i, j] = recon_orig[curr_class * 9] grid_viz_data_1[i, j] = adv_inputs[curr_class * 9] curr_class = curr_class + 1 else: if (j > i): grid_viz_data[i, j] = recon_adv[i * (nb_classes - 1) + j - 1] grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) + j - 1] else: grid_viz_data[i, j] = recon_adv[i * (nb_classes - 1) + j] grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) + j] sess.close() plt.ioff() figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') # Add the images to the plot num_cols = grid_viz_data.shape[0] num_rows = grid_viz_data.shape[1] num_channels = grid_viz_data.shape[4] for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols)) plt.axis('off') if num_channels == 1: plt.imshow(grid_viz_data[xx, yy, :, :, 0]) else: plt.imshow(grid_viz_data[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_fig1_bin') figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols)) plt.axis('off') if num_channels == 1: plt.imshow(grid_viz_data_1[xx, yy, :, :, 0]) else: plt.imshow(grid_viz_data_1[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_fig2_bin')
def cifar10_tutorial(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, architecture=ARCHITECTURE, load_model=LOAD_MODEL, ckpt_dir='None', learning_rate=LEARNING_RATE, clean_train=CLEAN_TRAIN, backprop_through_attack=BACKPROP_THROUGH_ATTACK, nb_filters=NB_FILTERS, num_threads=None, label_smoothing=0.): """ CIFAR10 cleverhans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param clean_train: perform normal training on clean examples only before performing adversarial training. :param backprop_through_attack: If True, backprop through adversarial example construction process during adversarial training. :param label_smoothing: float, amount of label smoothing for cross entropy :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(int(time.time() * 1000) % 2**31) np.random.seed(int(time.time() * 1001) % 2**31) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {} sess = tf.Session(config=tf.ConfigProto(**config_args)) # Get CIFAR10 data data = CIFAR10(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) dataset_size = data.x_train.shape[0] dataset_train = data.to_tensorflow()[0] dataset_train = dataset_train.map( lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4) dataset_train = dataset_train.batch(batch_size) dataset_train = dataset_train.prefetch(16) x_train, y_train = data.get_set('train') pgd_train = None if FLAGS.load_pgd_train_samples: pgd_path = os.path.expanduser('~/data/advhyp/{}/samples'.format( FLAGS.load_pgd_train_samples)) x_train = np.load(os.path.join(pgd_path, 'train_clean.npy')) y_train = np.load(os.path.join(pgd_path, 'train_y.npy')) pgd_train = np.load(os.path.join(pgd_path, 'train_pgd.npy')) if x_train.shape[1] == 3: x_train = x_train.transpose((0, 2, 3, 1)) pgd_train = pgd_train.transpose((0, 2, 3, 1)) if len(y_train.shape) == 1: y_tmp = np.zeros((len(y_train), np.max(y_train) + 1), y_train.dtype) y_tmp[np.arange(len(y_tmp)), y_train] = 1. y_train = y_tmp x_test, y_test = data.get_set('test') pgd_test = None if FLAGS.load_pgd_test_samples: pgd_path = os.path.expanduser('~/data/advhyp/{}/samples'.format( FLAGS.load_pgd_test_samples)) x_test = np.load(os.path.join(pgd_path, 'test_clean.npy')) y_test = np.load(os.path.join(pgd_path, 'test_y.npy')) pgd_test = np.load(os.path.join(pgd_path, 'test_pgd.npy')) if x_test.shape[1] == 3: x_test = x_test.transpose((0, 2, 3, 1)) pgd_test = pgd_test.transpose((0, 2, 3, 1)) if len(y_test.shape) == 1: y_tmp = np.zeros((len(y_test), np.max(y_test) + 1), y_test.dtype) y_tmp[np.arange(len(y_tmp)), y_test] = 1. y_test = y_tmp train_idcs = np.arange(len(x_train)) np.random.shuffle(train_idcs) x_train, y_train = x_train[train_idcs], y_train[train_idcs] if pgd_train is not None: pgd_train = pgd_train[train_idcs] test_idcs = np.arange(len(x_test))[:FLAGS.test_size] np.random.shuffle(test_idcs) x_test, y_test = x_test[test_idcs], y_test[test_idcs] if pgd_test is not None: pgd_test = pgd_test[test_idcs] # Use Image Parameters img_rows, img_cols, nchannels = x_test.shape[1:4] nb_classes = y_test.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } eval_params = {'batch_size': batch_size} pgd_params = { # ord: , 'eps': FLAGS.eps, 'eps_iter': (FLAGS.eps / 5), 'nb_iter': 10, 'clip_min': 0, 'clip_max': 255 } cw_params = { 'binary_search_steps': FLAGS.cw_search_steps, 'max_iterations': FLAGS.cw_steps, #1000 'abort_early': True, 'learning_rate': FLAGS.cw_lr, 'batch_size': batch_size, 'confidence': 0, 'initial_const': FLAGS.cw_c, 'clip_min': 0, 'clip_max': 255 } # Madry dosen't divide by 255 x_train *= 255 x_test *= 255 if pgd_train is not None: pgd_train *= 255 if pgd_test is not None: pgd_test *= 255 print('x_train amin={} amax={}'.format(np.amin(x_train), np.amax(x_train))) print('x_test amin={} amax={}'.format(np.amin(x_test), np.amax(x_test))) print( 'clip_min : {}, clip_max : {} >> CHECK WITH WHICH VALUES THE CLASSIFIER WAS PRETRAINED !!! <<' .format(pgd_params['clip_min'], pgd_params['clip_max'])) rng = np.random.RandomState() # [2017, 8, 30] debug_dict = dict() if FLAGS.save_debug_dict else None def do_eval(preds, x_set, y_set, report_key, is_adv=None, predictor=None, x_adv=None): if predictor is None: acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params) else: do_eval(preds, x_set, y_set, report_key, is_adv=is_adv) if x_adv is not None: x_set_adv, = batch_eval(sess, [x], [x_adv], [x_set], batch_size=batch_size) assert x_set.shape == x_set_adv.shape x_set = x_set_adv n_batches = math.ceil(x_set.shape[0] / batch_size) p_set, p_det = np.concatenate([ predictor.send(x_set[b * batch_size:(b + 1) * batch_size]) for b in tqdm.trange(n_batches) ]).T acc = np.equal(p_set, y_set[:len(p_set)].argmax(-1)).mean() # if is_adv: # import IPython ; IPython.embed() ; exit(1) if FLAGS.save_debug_dict: debug_dict['x_set'] = x_set debug_dict['y_set'] = y_set ddfn = 'logs/debug_dict_{}.pkl'.format( 'adv' if is_adv else 'clean') if not os.path.exists(ddfn): with open(ddfn, 'wb') as f: pickle.dump(debug_dict, f) debug_dict.clear() if is_adv is None: report_text = None elif is_adv: report_text = 'adversarial' else: report_text = 'legitimate' if report_text: print('Test accuracy on %s examples %s: %0.4f' % (report_text, 'with correction' if predictor is not None else 'without correction', acc)) if is_adv is not None: label = 'test_acc_{}_{}'.format( report_text, 'corrected' if predictor else 'uncorrected') swriter.add_scalar(label, acc) if predictor is not None: detect = np.equal(p_det, is_adv).mean() label = 'test_det_{}_{}'.format( report_text, 'corrected' if predictor else 'uncorrected') print(label, detect) swriter.add_scalar(label, detect) label = 'test_dac_{}_{}'.format( report_text, 'corrected' if predictor else 'uncorrected') swriter.add_scalar( label, np.equal(p_set, y_set[:len(p_set)].argmax(-1))[np.equal( p_det, is_adv)].mean()) return acc if clean_train: if architecture == 'ConvNet': model = ModelAllConvolutional('model1', nb_classes, nb_filters, input_shape=[32, 32, 3]) elif architecture == 'ResNet': model = ResNet(scope='ResNet') else: raise Exception('Specify valid classifier architecture!') preds = model.get_logits(x) loss = CrossEntropy(model, smoothing=label_smoothing) if load_model: model_name = 'naturally_trained' if FLAGS.load_adv_trained: model_name = 'adv_trained' if ckpt_dir is not 'None': ckpt = tf.train.get_checkpoint_state( os.path.join(os.path.expanduser(ckpt_dir), model_name)) else: ckpt = tf.train.get_checkpoint_state('./models/' + model_name) ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path saver = tf.train.Saver(var_list=dict( (v.name.split('/', 1)[1].split(':')[0], v) for v in tf.global_variables())) saver.restore(sess, ckpt_path) print('\nMODEL SUCCESSFULLY LOADED from : {}'.format(ckpt_path)) initialize_uninitialized_global_variables(sess) else: def evaluate(): do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False) train(sess, loss, None, None, dataset_train=dataset_train, dataset_size=dataset_size, evaluate=evaluate, args=train_params, rng=rng, var_list=model.get_params()) logits_op = preds.op while logits_op.type != 'MatMul': logits_op = logits_op.inputs[0].op latent_x_tensor, weights = logits_op.inputs logits_tensor = preds nb_classes = weights.shape[-1].value if not FLAGS.save_pgd_samples: noise_eps = FLAGS.noise_eps.split(',') if FLAGS.noise_eps_detect is None: FLAGS.noise_eps_detect = FLAGS.noise_eps noise_eps_detect = FLAGS.noise_eps_detect.split(',') if pgd_train is not None: pgd_train = pgd_train[:FLAGS.n_collect] if not FLAGS.passthrough: predictor = tf_robustify.collect_statistics( x_train[:FLAGS.n_collect], y_train[:FLAGS.n_collect], x, sess, logits_tensor=logits_tensor, latent_x_tensor=latent_x_tensor, weights=weights, nb_classes=nb_classes, p_ratio_cutoff=FLAGS.p_ratio_cutoff, noise_eps=noise_eps, noise_eps_detect=noise_eps_detect, pgd_eps=pgd_params['eps'], pgd_lr=pgd_params['eps_iter'] / pgd_params['eps'], pgd_iters=pgd_params['nb_iter'], save_alignments_dir='logs/stats' if FLAGS.save_alignments else None, load_alignments_dir=os.path.expanduser( '~/data/advhyp/madry/stats') if FLAGS.load_alignments else None, clip_min=pgd_params['clip_min'], clip_max=pgd_params['clip_max'], batch_size=batch_size, num_noise_samples=FLAGS.num_noise_samples, debug_dict=debug_dict, debug=FLAGS.debug, targeted=False, pgd_train=pgd_train, fit_classifier=FLAGS.fit_classifier, clip_alignments=FLAGS.clip_alignments, just_detect=FLAGS.just_detect) else: def _predictor(): _x = yield while (_x is not None): _y = sess.run(preds, {x: _x}).argmax(-1) _x = yield np.stack((_y, np.zeros_like(_y)), -1) predictor = _predictor() next(predictor) if FLAGS.save_alignments: exit(0) # Evaluate the accuracy of the model on clean examples acc_clean = do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False, predictor=predictor) # Initialize the PGD attack object and graph if FLAGS.attack == 'pgd': pgd = MadryEtAl(model, sess=sess) adv_x = pgd.generate(x, **pgd_params) elif FLAGS.attack == 'cw': cw = CarliniWagnerL2(model, sess=sess) adv_x = cw.generate(x, **cw_params) elif FLAGS.attack == 'mean': pgd = MadryEtAl(model, sess=sess) mean_eps = FLAGS.mean_eps * FLAGS.eps def _attack_mean(x): x_many = tf.tile(x[None], (FLAGS.mean_samples, 1, 1, 1)) x_noisy = x_many + tf.random_uniform(x_many.shape, -mean_eps, mean_eps) x_noisy = tf.clip_by_value(x_noisy, 0, 255) x_pgd = pgd.generate(x_noisy, **pgd_params) x_clip = tf.minimum(x_pgd, x_many + FLAGS.eps) x_clip = tf.maximum(x_clip, x_many - FLAGS.eps) x_clip = tf.clip_by_value(x_clip, 0, 255) return x_clip adv_x = tf.map_fn(_attack_mean, x) adv_x = tf.reduce_mean(adv_x, 1) preds_adv = model.get_logits(adv_x) if FLAGS.save_pgd_samples: for ds, y, name in ((x_train, y_train, 'train'), (x_test, y_test, 'test')): train_batches = math.ceil(len(ds) / FLAGS.batch_size) train_pgd = np.concatenate([ sess.run(adv_x, { x: ds[b * FLAGS.batch_size:(b + 1) * FLAGS.batch_size] }) for b in tqdm.trange(train_batches) ]) np.save('logs/{}_clean.npy'.format(name), ds / 255.) np.save('logs/{}_y.npy'.format(name), y) train_pgd /= 255. np.save('logs/{}_pgd.npy'.format(name), train_pgd) exit(0) # Evaluate the accuracy of the model on adversarial examples if not FLAGS.load_pgd_test_samples: acc_pgd = do_eval(preds_adv, x_test, y_test, 'clean_train_adv_eval', True, predictor=predictor, x_adv=adv_x) else: acc_pgd = do_eval(preds, pgd_test, y_test, 'clean_train_adv_eval', True, predictor=predictor) swriter.add_scalar('test_acc_mean', (acc_clean + acc_pgd) / 2., 0) print('Repeating the process, using adversarial training') exit(0) # Create a new model and train it to be robust to MadryEtAl if architecture == 'ConvNet': model2 = ModelAllConvolutional('model2', nb_classes, nb_filters, input_shape=[32, 32, 3]) elif architecture == 'ResNet': model = ResNet() else: raise Exception('Specify valid classifier architecture!') pgd2 = MadryEtAl(model2, sess=sess) def attack(x): return pgd2.generate(x, **pgd_params) loss2 = CrossEntropy(model2, smoothing=label_smoothing, attack=attack) preds2 = model2.get_logits(x) adv_x2 = attack(x) if not backprop_through_attack: # For some attacks, enabling this flag increases the cost of # training, but gives the defender the ability to anticipate how # the atacker will change their strategy in response to updates to # the defender's parameters. adv_x2 = tf.stop_gradient(adv_x2) preds2_adv = model2.get_logits(adv_x2) if load_model: if ckpt_dir is not 'None': ckpt = tf.train.get_checkpoint_state( os.path.join(os.path.expanduser(ckpt_dir), 'adv_trained')) else: ckpt = tf.train.get_checkpoint_state('./models/adv_trained') ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path assert ckpt_path and tf_model_load( sess, file_path=ckpt_path), '\nMODEL LOADING FAILED' print('\nMODEL SUCCESSFULLY LOADED from : {}'.format(ckpt_path)) initialize_uninitialized_global_variables(sess) else: def evaluate2(): # Accuracy of adversarially trained model on legitimate test inputs do_eval(preds2, x_test, y_test, 'adv_train_clean_eval', False) # Accuracy of the adversarially trained model on adversarial # examples do_eval(preds2_adv, x_test, y_test, 'adv_train_adv_eval', True) # Perform and evaluate adversarial training train(sess, loss2, None, None, dataset_train=dataset_train, dataset_size=dataset_size, evaluate=evaluate2, args=train_params, rng=rng, var_list=model2.get_params()) # Evaluate model do_eval(preds2, x_test, y_test, 'adv_train_clean_eval', False) do_eval(preds2_adv, x_test, y_test, 'adv_train_adv_eval', True) return report
def average_stat(model, set_type, attack, with_max_threshold=True): """ Prints out the stats of the attack. Parameters ---------- model: str The joblib name. set_type: str The type of set used (either "train" or "test") attack: str The type of attack used (either "jsma", "wjsma" or "tjsma") with_max_threshold: bool, optional Uses the max threshold as the upper limit to compute stats for unsuccessful samples if set to True. """ if "mnist" in model: image_size = 784 max_iter = 57 * 2 max_distortion = max_iter / image_size max_pixel_number = int(image_size * max_distortion / 2) * 2 from cleverhans.dataset import MNIST x_set, y_set = MNIST(train_start=0, train_end=60000, test_start=0, test_end=10000).get_set(set_type) elif "cifar10" in model: image_size = 3072 max_iter = 57 * 2 max_distortion = max_iter / image_size max_pixel_number = int(image_size * max_distortion / 2) * 2 from cleverhans.dataset import CIFAR10 x_set, y_set = CIFAR10(train_start=0, train_end=50000, test_start=0, test_end=10000).get_set(set_type) y_set = y_set.reshape((y_set.shape[0], 10)) else: raise ValueError( "Invalid folder name, it must have the name of the dataset somewhere either 'mnist' or 'cifar10'" ) y_set = np.argmax(y_set, axis=1) average_distortion = 0 average_distortion_successful = 0 average_pixel_number = 0 average_pixel_number_successful = 0 total_samples = 0 total_samples_successful = 0 predicted = np.argmax(get_labels(model, x_set), axis=1) folder = "attack/" + model + "/" + attack + "_" + set_type + "/" for file in os.listdir(folder): df = pandas.read_csv(folder + file) df_values = df.to_numpy() index = int(file.split("_")[2][:-4]) if y_set[index] != predicted[index]: continue for i in range(9): total_samples += 1 if with_max_threshold: average_pixel_number += min(df_values[-3, i], max_pixel_number) average_distortion += min(df_values[-2, i], max_distortion) else: average_pixel_number += df_values[-3, i] average_distortion += df_values[-2, i] if df_values[-3, i] < max_iter: total_samples_successful += 1 average_pixel_number_successful += df_values[-3, i] average_distortion_successful += df_values[-2, i] print(folder) print("----------------------") print("WELL PREDICTED ORIGINAL SAMPLES:", total_samples) print("SUCCESS RATE (MISS CLASSIFIED):", total_samples_successful / total_samples) print("AVERAGE NUMBER OF CHANGED PIXELS:", average_pixel_number / total_samples) print("AVERAGE DISTORTION:", average_distortion / total_samples) print("----------------------") print("AVERAGE SUCCESSFUL NUMBER OF CHANGED PIXELS:", average_pixel_number_successful / total_samples_successful) print("AVERAGE SUCCESSFUL DISTORTION:", average_distortion_successful / total_samples_successful) print("----------------------\n")
def defence_frame(train_start=0, train_end=TRAIN_SIZE, test_start=0, test_end=TEST_SIZE, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, clean_train=CLEAN_TRAIN, testing=False, backprop_through_attack=BACKPROP_THROUGH_ATTACK, nb_filters=NB_FILTERS, num_threads=None, label_smoothing=0.1): # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session if num_threads: config_args = dict(intra_op_parallelism_threads=1, allow_soft_placement=True, log_device_placement=True) else: config_args = dict(allow_soft_placement=True, log_device_placement=True) sess = tf.Session(config=tf.ConfigProto(**config_args)) # Set parameters train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } eval_params = {'batch_size': FLAGS.batch_size} def_model_list = [] if FLAGS.dataset == 'mnist': X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) assert Y_train.shape[1] == 10 nb_classes = 10 x = tf.placeholder(tf.float32, shape=[None, 28, 28, 1]) y = tf.placeholder(tf.float32, shape=[None, 10]) input_shape = [28, 28, 1] elif FLAGS.dataset == 'cifar10': data = CIFAR10(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) dataset_size = data.x_train.shape[0] X_train, Y_train = data.get_set('train') X_test, Y_test = data.get_set('test') # Use Image Parameters img_rows, img_cols, nchannels = X_test.shape[1:4] nb_classes = Y_test.shape[1] assert Y_test.shape[1] == 10. x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3)) y = tf.placeholder(tf.float32, shape=(None, 10)) input_shape = [32, 32, 3] # define and train clean model to be defenced on model = get_model(FLAGS.dataset, FLAGS.attack_model, 'model', nb_classes, nb_filters, input_shape) rng = np.random.RandomState([2017, 10, 30]) loss = CrossEntropy(model, smoothing=label_smoothing) train(sess, loss, X_train, Y_train, args=train_params, rng=rng, var_list=model.get_params()) # for the 1...M attack methods, create adv samples and train defence models for i, attack_name in enumerate(FLAGS.attack_type): attack_params = get_para(FLAGS.dataset, attack_name) model_i = get_model(FLAGS.dataset, FLAGS.attack_model, 'model_' + str(i), nb_classes, nb_filters, input_shape) if IS_ONLINE: attack_method = get_attack(attack_name, model_i, sess) else: attack_method = get_attack(attack_name, model, sess) def attack(x): return attack_method.generate(x, **attack_params) loss_i = CrossEntropy(model_i, smoothing=label_smoothing, attack=attack, adv_coeff=1.) train(sess, loss_i, X_train, Y_train, args=train_params, rng=rng, var_list=model_i.get_params()) def_model_list.append(model_i) # Make Ensemble model def ensemble_model_logits(x): return do_logits(x, model, def_model_list=def_model_list) def ensemble_model_probs(x): return tf.math.log(do_probs(x, model, def_model_list=def_model_list)) if ENSEMBLE_TYPE == 'logits': ensemble_model = CallableModelWrapper(ensemble_model_logits, 'logits') elif ENSEMBLE_TYPE == 'probs': ensemble_model = CallableModelWrapper(ensemble_model_probs, 'logits') # Evaluate the accuracy of model on clean examples do_eval(sess, x, y, do_probs(x, model), X_test, Y_test, "origin model on clean data", eval_params) do_eval(sess, x, y, do_probs(x, ensemble_model), X_test, Y_test, "ensemble model on clean data", eval_params) do_eval(sess, x, y, do_logits(x, model, def_model_list=def_model_list), X_test, Y_test, "test ensemble logits on clean data", eval_params) do_eval(sess, x, y, do_probs(x, model, def_model_list=def_model_list), X_test, Y_test, "test ensemble probs on clean data", eval_params) # Evaluate the accuracy of model on adv examples for i, attack_name in enumerate(FLAGS.attack_type): attack_params = get_para(FLAGS.dataset, attack_name) # generate attack to origin model origin_attack = get_attack(attack_name, model, sess) origin_adv_x = origin_attack.generate(x, **attack_params) do_eval(sess, x, y, do_probs(origin_adv_x, model), X_test, Y_test, attack_name + "-> origin model, test on origin model", eval_params) do_eval( sess, x, y, do_probs(origin_adv_x, model, def_model_list=def_model_list), X_test, Y_test, attack_name + "-> origin model, test on ensemble model, using probs", eval_params) do_eval( sess, x, y, do_logits(origin_adv_x, model, def_model_list=def_model_list), X_test, Y_test, attack_name + "-> origin model, test on ensemble model, using logits", eval_params) # generate attack to ensemble model ensemble_attack = get_attack(attack_name, ensemble_model, sess) ensemble_adv_x = ensemble_attack.generate(x, **attack_params) do_eval(sess, x, y, do_probs(ensemble_adv_x, ensemble_model), X_test, Y_test, attack_name + "-> ensemble model, test on ensemble model", eval_params) do_eval( sess, x, y, do_logits(ensemble_adv_x, model, def_model_list=def_model_list), X_test, Y_test, attack_name + "-> ensemble model, test on ensemble model, test logits", eval_params) do_eval( sess, x, y, do_probs(ensemble_adv_x, model, def_model_list=def_model_list), X_test, Y_test, attack_name + "-> ensemble model, test on ensemble model, test probs", eval_params)
b = np.zeros((len(y_test), 43)) b[np.arange(len(y_test)), y_test] = 1 y_test = b if input.upper() == "TRAIN": return x_train.astype(np.float32), y_train.astype(np.float32) if input.upper() == "TEST": return x_test.astype(np.float32), y_test.astype(np.float32) else: return None, None print("STEP 1: Get training data...") data = CIFAR10(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) #default load cifar10 image x_train, y_train = data.get_set('train') x_test, y_test = data.get_set('test') # load stop sign images sign_data = True if sign_data is True: x_train, y_train = Signs("TRAIN") x_test, y_test = Signs("TEST") img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] my_data = []
def cifar10_train_on_untargeted(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, testing=True, adv_training=False, backprop_through_attack=BACKPROP_THROUGH_ATTACK, num_threads=None,threat_model='white_box', model_key='model_1_a',attacker_key='clean', label_smoothing=0.1): """ CIFAR10 cleverhans training :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param clean_train: perform normal training on clean examples only before performing adversarial training. :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate :param backprop_through_attack: If True, backprop through adversarial example construction process during adversarial training. :param label_smoothing: float, amount of label smoothing for cross entropy :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {} sess = tf.Session(config=tf.ConfigProto(**config_args)) K.set_learning_phase(0) ## Create TF session and set as Keras backend session K.set_session(sess) # Create a new model and train it to be robust to Attacker #keras_model = c10load.load_model(version=2,subtract_pixel_mean=True) meta = read_from_meta() attacker_meta = meta['attacker'][attacker_key] model_meta = meta['model'][model_key] attack_type = attacker_meta['attack_type'] if threat_model == 'black_box_A': print('Using training set A') train_end = int(train_end/2) assert 'black_box_A' in meta['model'][model_key]['threat_models'] dataset_section = 'A' elif threat_model == 'black_box_B': print('Using training set B') train_start = int(train_end/2) dataset_section = 'B' assert 'black_box_B' in meta['model'][model_key]['threat_models'] elif threat_model == 'white_box': print('Using full training set') dataset_section = '' else: raise NotImplementedError # Get CIFAR10 data data = CIFAR10(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) dataset_size = data.x_train.shape[0] dataset_train = data.to_tensorflow()[0] dataset_train = dataset_train.map( lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4) dataset_train = dataset_train.batch(batch_size) dataset_train = dataset_train.prefetch(16) x_train, y_train = data.get_set('train') x_test, y_test = data.get_set('test') # Use Image Parameters img_rows, img_cols, nchannels = x_test.shape[1:4] nb_classes = y_test.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) attack_params = {} attack_params.update(meta['attacker']['default']['attack_params']) attack_params.update(attacker_meta['attack_params']) for k,v in attack_params.items(): if isinstance(v,str): attack_params[k] = eval(v) if 'meta_key' in attacker_meta.keys() and attack_type == 'advgan': folderpath = meta['advgan'][attacker_meta['meta_key']]['train_params']['output_folder'] attack_params.update({'generator_filepath':os.path.join(folderpath,'generator.hd5')}) model_filename = model_meta['file_name'] if 'black_box' in threat_model: model_filename = model_filename.replace('cifar10','cifar10B') model_filepath=model_meta['folder_path']+'/'+model_filename keras_model=tf.keras.models.load_model( filepath=model_filepath, custom_objects=custom_object()) model = KerasModelWrapper(keras_model) def attack_statistics(x_true,x_adv): # calculate average L1,L2,Linf norms # as well as % of pixels modified L1 = tf.reduce_mean(K.sum(K.abs(x_adv-x_true),axis=(-1,-2,-3))) L2 = tf.reduce_mean(K.sqrt(K.sum(K.square(x_adv-x_true),axis=(-1,-2,-3)))) Linf = tf.reduce_mean(K.max(K.abs(x_true-x_adv),axis=(-1,-2,-3))) eps = tf.constant(1/255,shape=x_true.shape.as_list()[1:]) mod_perc = 100*tf.reduce_mean(K.cast(K.greater(K.abs(x_true-x_adv),eps),dtype='float')) return {'L1':L1,'L2':L2,'Linf':Linf,'%pix':mod_perc} def do_eval(preds, x_set, y_set, report_key, is_adv=None): eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params) setattr(report, report_key, acc) if is_adv is None: report_text = None elif is_adv: report_text = 'adversarial' else: report_text = 'legitimate' if report_text: print('Test accuracy on %s examples: %0.4f' % (report_text, acc)) #define attacker if attack_type == 'cwl2': from cleverhans.attacks import CarliniWagnerL2 attacker = CarliniWagnerL2(model, sess=sess) elif attack_type == 'fgsm': from cleverhans.attacks import FastGradientMethod attacker = FastGradientMethod(model, sess=sess) elif attack_type == 'pgd': from cleverhans.attacks import MadryEtAl attacker = MadryEtAl(model, sess=sess) elif attack_type == 'advgan': from cleverhans.attacks.adversarial_gan import AdvGAN attacker = AdvGAN(model,sess=sess) elif attack_type == None or attack_type=='clean': attacker = None else: print(attack_type+' is not a valid attack type') def attack(x): if attacker: print('attack_params',attack_params) return attacker.generate(x,**attack_params) else: return x loss = CrossEntropy(model, smoothing=label_smoothing, attack=attack) preds = model.get_logits(x) adv_x = attack(x) if not backprop_through_attack: # For the fgsm attack used in this tutorial, the attack has zero # gradient so enabling this flag does not change the gradient. # For some other attacks, enabling this flag increases the cost of # training, but gives the defender the ability to anticipate how # the attacker will change their strategy in response to updates to # the defender's parameters. adv_x = tf.stop_gradient(adv_x) preds_adv = model.get_logits(adv_x) def evaluate(): # Accuracy of adversarially trained model on legitimate test inputs do_eval(preds, x_test, y_test, 'adv_train_clean_eval', False) # Accuracy of the adversarially trained model on adversarial examples do_eval(preds_adv, x_test, y_test, 'adv_train_adv_eval', True) #print_attack info with sess.as_default(): print('attack type: '+ attack_type) attack_stats = attack_statistics(x,adv_x) feed_dict={x:x_test[:batch_size],y:y_test[:batch_size]} attack_stats_eval = sess.run(attack_stats,feed_dict=feed_dict) attack_stats_eval = {k:str(v)[:10] for k,v in attack_stats_eval.items()} print(attack_stats_eval) if adv_training: # Train an CIFAR10 model reeval_breaks = 10 train_params = { 'batch_size': batch_size, 'learning_rate': learning_rate } nb_e = nb_epochs prev_acc = 0 # Perform and evaluate adversarial training for rb in range(reeval_breaks,0,-1): train_params.update({'nb_epochs': int(np.ceil(nb_e/rb))}) if nb_e < train_params['nb_epochs'] < 0: train_params['nb_epochs'] = nb_e print("Starting training {} of {}".format(nb_epochs-nb_e, nb_epochs)) train(sess, loss, None, None, dataset_train=dataset_train, dataset_size=dataset_size, evaluate=evaluate, args=train_params, rng=rng) nb_e-=train_params['nb_epochs'] #put accuracies in dictionary fr json serializability report_dict = {attr:str(getattr(report,attr))[:10] for attr in dir(report) if type(getattr(report,attr)) in [float,np.float32,np.float64]} print(report_dict) #save to meta new_meta = read_from_meta() new_model = deepcopy(model_meta) new_model.update({'adv_training':True, 'attacker_key':attacker_key, 'parent_key':model_key, 'threat_models':[threat_model], 'attack_stats':attack_stats_eval, 'report':report_dict, 'train_params': { 'batch_size': batch_size, 'learning_rate': learning_rate, 'nb_epochs': nb_epochs-nb_e, }, 'reeval':False }) if nb_e > 0: new_model.update({'training_finished':False, 'file_name': model_meta['file_name'].replace('clean',attacker_key+'_train_epoch_'+str(new_model['train_params']['nb_epochs']))}) else: new_model.update({'training_finished':True, 'file_name': model_meta['file_name'].replace('clean',attacker_key+'_train')}) new_model_key = get_new_key(model_key,meta) new_meta['model'].update({new_model_key:new_model}) write_to_meta(new_meta) save_filename = new_model['file_name'] if 'black_box' in threat_model: save_filename = save_filename.replace('cifar10','cifar10'+dataset_section) save_model(keras_model,filepath=new_model['folder_path']+'/'+save_filename) if report.adv_train_adv_eval >= 0.9: break elif report.adv_train_adv_eval <= 0.01: #increase_lr lr = train_params['learning_rate'] train_params.update({'learning_rate':lr*1.5}) print('no learning! Increasing learning rate to {}' .format(train_params['learning_rate'])) elif prev_acc<=report.adv_train_adv_eval: #update_lr lr = train_params['learning_rate'] train_params.update({'learning_rate':lr*0.8}) print('decreasing learning rate to {}' .format(train_params['learning_rate'])) prev_acc = copy(report.adv_train_adv_eval) if nb_e<=0: break # Calculate training errors elif testing: do_eval(preds, x_train, y_train, 'train_adv_train_clean_eval') do_eval(preds_adv, x_train, y_train, 'train_adv_train_adv_eval') report_dict = {attr:str(getattr(report,attr))[:10] for attr in dir(report) if type(getattr(report,attr)) in [float,]} print('report_dict') print(report_dict) return report
def cifar10_eval_attacks(train_start=0, train_end=60000, test_start=0, test_end=10000, sweep_eps=SWEEP_EPS, targeted=TARGETED, model_key='model_1_a', attacker_keys='clean', eval_model_keys=None, threat_model='white_box', generate_examples=True): """ CIFAR10 cleverhans training :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param model_key: name of the keras model to be loaded and tested :param attacker_key: name or list of names to be loaded and used to attack the model :return: an AccuracyReport object """ if threat_model == 'white_box': eval_model_keys = [ model_key, ] attacker_partition = '' defender_partition = '' if threat_model == 'black_box': attacker_partition = 'A' defender_partition = 'B' if not isinstance(eval_model_keys, list): raise ValueError('eval_model_keys must be list for black_box') #TODO: add white-box info to meta-data """ v<the eval model "model_1_g": { v< the surrogate model "advgan_b->model_1_e": { "model_acc": "saved_models/model_1_cifar10_ResNet20_v2\\pickle\\model_1_g_advgan_b_model_acc.p", "target_acc": "saved_models/model_1_cifar10_ResNet20_v2\\pickle\\model_1_g_advgan_b_target_acc.p", "attack_stats": { "L1": 127.04542236328125, "L2": 2.9744277954101563, "Linf": 0.2539639711380005, "%pix": 93.39645385742188, "num_batches": 20, "time": "97.7us" "threat_model":"black_box" """ # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session sess = tf.Session() K.set_learning_phase(0) ## Create TF session and set as Keras backend session K.set_session(sess) # Get CIFAR10 data data = CIFAR10(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) dataset_size = data.x_train.shape[0] dataset_train = data.to_tensorflow()[0] #dataset_train = dataset_train.map( # lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4) #dataset_train = dataset_train.batch(batch_size) #dataset_train = dataset_train.prefetch(16) #x_train, y_train = data.get_set('train') x_test, y_test = data.get_set('test') #nb_train = x_train.shape[0] nb_test = x_test.shape[0] # Use Image Parameters img_rows, img_cols, nchannels = x_test.shape[1:4] nb_classes = y_test.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) y_target = tf.placeholder(tf.float32, shape=(None, nb_classes)) meta = read_from_meta() model_meta = meta['model'][model_key] filename = model_meta['file_name'].replace('CIFAR10', 'CIFAR10' + attacker_partition) keras_model = tf.keras.models.load_model( filepath=model_meta['folder_path'] + '/' + filename, custom_objects=custom_object()) model = KerasModelWrapper(keras_model) attacker_keys = list(attacker_keys) report = dict() for attacker_key in attacker_keys: # Create a new model and train it to be robust to Attacker #keras_model = c10load.load_model(version=2,subtract_pixel_mean=True) attacker_meta = meta['attacker'][attacker_key] attack_type = attacker_meta['attack_type'] attack_params = {} attack_params.update(meta['attacker']['default']['attack_params']) attack_params.update(attacker_meta['attack_params']) if 'spsa' in attacker_key: eval_par = {'batch_size': 1} else: eval_par = {'batch_size': attack_params['batch_size']} for k, v in attack_params.items(): if isinstance(v, str): attack_params[k] = eval(v) #define attacker if attack_type == 'advgan' or 'g+' in attack_type: if 'meta_key' in attacker_meta.keys(): folderpath = meta['advgan'][ attacker_meta['meta_key']]['train_params']['output_folder'] attack_params.update({ 'generator_filepath': os.path.join(folderpath, 'generator.hd5'), 'custom_objects': custom_object() }) else: raise NotImplementedError( "Must provide attacker meta with existing meta_key") standard_attackers = { 'cwl2': cha.CarliniWagnerL2, 'fgsm': cha.FastGradientMethod, 'pgd': cha.MadryEtAl, 'jsma': cha.SaliencyMapMethod, 'stm': cha.SpatialTransformationMethod, 'advgan': cha.AdvGAN, 'spsa': cha.SPSA, 'g+pgd': cha.GanInformedPGD, 'g+spsa': cha.GanInformedSPSA #'g+fgsm':cha.GanInformedFGM } if attack_type in standard_attackers.keys(): attacker = standard_attackers[attack_type](model, sess=sess) elif attack_type == None or attack_type == 'clean': attacker = None else: print(attack_type + ' is not a valid attack type') pkl_folderpath = os.path.join(model_meta['folder_path'], 'pickle', attacker_key) if not os.path.isdir(pkl_folderpath): os.makedirs(pkl_folderpath) ######## if targeted: # get target labels target_test = np.repeat(range(nb_classes), nb_test) x_test_shuf = np.array(np.tile(x_test, (nb_classes, 1, 1, 1))) y_test_shuf = np.array(np.tile(y_test, (nb_classes, 1))) y_target_test_shuf = tf.keras.utils.to_categorical( target_test, nb_classes) #do not shuffle #shuffle_in_unison(x_test_shuf,y_test_shuf,y_target_test_shuf) x_test_by_t_o = [[None] * nb_classes for n in range(nb_classes)] y_test_by_t_o = [[None] * nb_classes for n in range(nb_classes)] y_target_test_by_t_o = [[None] * nb_classes for n in range(nb_classes)] nb_test_by_t_o = np.zeros((nb_classes + 1, nb_classes + 1)) print(y_target_test_shuf) for t in range(nb_classes): for o in range(nb_classes): if t == o: continue index = np.logical_and(y_target_test_shuf[:, t], y_test_shuf[:, o]) nb_test_by_t_o[t, o] = np.count_nonzero(index) x_test_by_t_o[t][o] = x_test_shuf[index] y_test_by_t_o[t][o] = y_test_shuf[index] y_target_test_by_t_o[t][o] = y_target_test_shuf[index] np.testing.assert_array_equal(y_target_test_by_t_o[0][1], y_target_test_by_t_o[0][2], err_msg='', verbose=True) nb_test_by_t_o[nb_classes, :] = np.sum(nb_test_by_t_o, axis=0) nb_test_by_t_o[:, nb_classes] = np.sum(nb_test_by_t_o, axis=1) attack_params.update({'y_target': y_target}) def model_eval_wrapper(preds, acc_target='original_class', adv_x=None): if acc_target == 'original_class': acc_target = y_test_by_t_o elif acc_target == 'target_class': acc_target = y_target_test_by_t_o else: raise ValueError('invalid value for accuracy_target: ' + acc_target) accuracy_by_t_o = np.zeros((nb_classes + 1, nb_classes + 1)) orig_accuracy_by_t_o = np.zeros( (nb_classes + 1, nb_classes + 1)) for t in range(nb_classes + 1): for o in range(nb_classes): if t == o: continue row_scale = nb_test_by_t_o[t, o] / nb_test_by_t_o[ t, nb_classes] col_scale = nb_test_by_t_o[t, o] / nb_test_by_t_o[ nb_classes, o] if t < nb_classes: feed = { y_target: y_target_test_by_t_o[t][o] [:eval_par['batch_size'], :] } if generate_examples: assert adv_x is not None, 'adv_x tensor must be supplied when generating examples' pickle_x_file = os.path.join( pkl_folderpath, pickle_file_head + "x_test_targeted_{}_{}.p".format(t, o)) if os.path.exists(pickle_x_file): adv_x_test = pickle.load( open(pickle_x_file, "rb")) else: adv_x_test = gen_np( sess, x_test_by_t_o[t][o], x, adv_x, y_target_test_by_t_o[t][o], y_target) pickle.dump(adv_x_test, open(pickle_x_file, "wb")) accuracy_by_t_o[t, o] = model_eval( sess, adv_x, y, preds, adv_x_test, acc_target[t][o], args=eval_par) orig_accuracy_by_t_o[t, o] = model_eval( sess, adv_x, y, preds, x_test_by_t_o[t][o], acc_target[t][o], args=eval_par) else: accuracy_by_t_o[t, o] = model_eval( sess, x, y, preds, x_test_by_t_o[t][o], acc_target[t][o], feed=feed, args=eval_par) accuracy_by_t_o[ nb_classes, o] += accuracy_by_t_o[t, o] * col_scale orig_accuracy_by_t_o[ nb_classes, o] += orig_accuracy_by_t_o[t, o] * col_scale accuracy_by_t_o[ t, nb_classes] += accuracy_by_t_o[t, o] * row_scale orig_accuracy_by_t_o[ t, nb_classes] += orig_accuracy_by_t_o[t, o] * row_scale if adv_x is not None: # fill diagonal with original accuracies for o in range(nb_classes): accuracy_by_t_o[o, o] = orig_accuracy_by_t_o[nb_classes, o] return accuracy_by_t_o else: x_test_shuf = x_test y_test_shuf = y_test def attack(x, attack_params=attack_params): if attacker: return attacker.generate(x, **attack_params) else: return x def gen_np(sess, X, x, adv_x, Y_target=None, y_target=None): #inputs: # sess (required) : tf session # X (required) : numpy input data # x (required) : placeholder for model input # adv_x (required) : tensor for generator output # Y_target (optional) : optional numpy array speccifying the target class # y_target (optional) : optional placeholder for the target inputs #outputs: # if attacker: with sess.as_default(): _batch_size = eval_par['batch_size'] nb_x = X.shape[0] nb_batches = int(np.ceil(float(nb_x) / _batch_size)) assert nb_batches * _batch_size >= nb_x adv_x_np = np.zeros((0, ) + X.shape[1:], dtype=X.dtype) for batch in range(nb_batches): start = batch * _batch_size end = min(nb_x, start + _batch_size) feed_dict = {x: X[start:end]} if not Y_target is None: feed_dict.update({y_target: Y_target[start:end]}) adv_x_cur = adv_x.eval(feed_dict=feed_dict) adv_x_np = np.concatenate([adv_x_np, adv_x_cur], axis=0) assert end >= nb_x return adv_x_np else: return x def attack_stats_eval(x, adv_x, num_batches=1): # Return attack info with sess.as_default(): _batch_size = eval_par['batch_size'] _as_eval = dict() cum_time = 0. attack_stats = attack_statistics(x, adv_x) for batch in range(num_batches): feed_dict = { x: x_test_shuf[batch * _batch_size:(batch + 1) * _batch_size], y: y_test_shuf[batch * _batch_size:(batch + 1) * _batch_size] } if targeted: feed_dict.update({ y_target: y_target_test_shuf[batch * _batch_size:(batch + 1) * _batch_size] }) _as = sess.run(attack_stats, feed_dict=feed_dict) if batch == 0: _as_eval = deepcopy(_as) else: _as_eval = {k: v + _as[k] for k, v in _as_eval.items()} t_1 = time.process_time() adv_x.eval(feed_dict=feed_dict) t_2 = time.process_time() cum_time += t_2 - t_1 cum_time /= num_batches * _batch_size _as_eval = {k: v / num_batches for k, v in _as_eval.items()} _as_eval.update({ 'num_batches': num_batches, 'time': metric_convert(cum_time, 's') }) return _as_eval report.update({attacker_key: {'model_acc': {}}}) for eval_model_key in eval_model_keys: #Sweep over models to evaluate on. "White Box" attacks #only have one eval_model_key "Black Box" attack may #have several eval_model_key "defenses" report_view = report[attacker_key] if threat_model == 'white_box': assert model_key == eval_model_key, ( 'for white_box attacks, ', 'generating model and eval model must be the same') eval_model = model elif threat_model == 'black_box': #add black box eval model to report and update report head if not 'black_box' in report_view.keys(): report_view.update( {'black_box': { eval_model_key: { 'model_acc': {} } }}) else: report_view['black_box'].update( {eval_model_key: { 'model_acc': {} }}) report_view = report_view['black_box'][eval_model_key] #load eval model trained on defense dataset eval_model_meta = meta['model'][eval_model_key] filename = eval_model_meta['file_name'].replace( 'CIFAR10', 'CIFAR10' + defender_partition) keras_model = tf.keras.models.load_model( filepath=eval_model_meta['folder_path'] + '/' + filename, custom_objects=custom_object()) eval_model = KerasModelWrapper(keras_model) #evaluate model on clean examples preds = eval_model.get_logits(x) model_acc = model_eval(sess, x, y, preds, x_test, y_test, args=eval_par) print('Test accuracy on clean examples %0.4f\n' % model_acc) report_view.update({'clean_model_acc': model_acc}) t1 = 0 #sweep epsilon if sweep_eps and attack_type != 'clean': max_eps = 2 * attack_params['eps'] if 'eps_iter' in attack_params.keys(): max_eps_iter = 2 * attack_params['eps_iter'] epsilons = np.linspace(1 / 255, max_eps, min(int(max_eps * 255), 16)) sweep_e = dict() for e in epsilons: scaled_e = str(int(e * 255)) t1 = time.time() attack_params.update({'eps': e}) if 'eps_iter' in attack_params.keys(): attack_params.update( {'eps_iter': max_eps_iter * e / max_eps}) adv_x = attack(x, attack_params) attack_stats_cur = attack_stats_eval(x, adv_x, 1) preds_adv = eval_model.get_probs(adv_x) if targeted: model_acc = model_eval_wrapper( preds_adv, acc_target='original_class', adv_x=adv_x) target_acc = model_eval_wrapper( preds_adv, acc_target='target_class', adv_x=adx_x) pickle_file_head = '{}_{}_{}_'.format( model_key, attacker_key, e) pickle_m_file = os.path.join( pkl_folderpath, pickle_file_head + "model_acc.p") pickle_t_file = os.path.join( pkl_folderpath, pickle_file_head + "target_acc.p") pickle.dump(model_acc, open(pickle_m_file, "wb")) pickle.dump(target_acc, open(pickle_t_file, "wb")) sweep_e.update({ scaled_e: { 'model_acc': pickle_m_file, 'target_acc': pickle_t_file, 'attack_stats': attack_stats_cur } }) else: if generate_examples: pickle_x_file = os.path.join( pkl_folderpath, pickle_file_head + "x_test_untargeted.p") if os.path.exists(pickle_x_file): adv_x_test = pickle.load( open(pickle_x_file, "rb")) else: adv_x_test = gen_np(sess, x_test, x, adv_x) pickle.dump(adv_x_test, open(pickle_x_file, "wb")) model_acc = model_eval(sess, adv_x, y, preds, adv_x_test, y_test, args=eval_par) else: model_acc = model_eval(sess, x, y, preds, x_test, y_test, args=eval_par) sweep_e.update({ scaled_e: { 'model_acc': model_acc, 'attack_stats': attack_stats_cur } }) print('Epsilon %.2f, accuracy on adversarial' % e, 'examples %0.4f\n' % model_acc) print(sweep_e[scaled_e]) report_view.update({'sweep_eps': sweep_e}) t2 = time.time() else: if 'eps' in attack_params: cond_eps = attack_params['eps'] else: cond_eps = 'N/A' print('evaluating {}->{} examples on {} (single epsilon: {})'. format(attacker_key, model_key, eval_model_key, cond_eps)) t1 = time.time() adv_x = attack(x, attack_params) preds_adv = eval_model.get_probs(adv_x) pickle_file_head = '{}_{}_'.format(model_key, attacker_key) if targeted: model_acc = model_eval_wrapper(preds_adv, acc_target='original_class', adv_x=adv_x) target_acc = model_eval_wrapper(preds_adv, acc_target='target_class', adv_x=adv_x) if threat_model == 'black_box': pickle_m_file = os.path.join( pkl_folderpath, pickle_file_head + eval_model_key + "_model_acc.p") pickle_t_file = os.path.join( pkl_folderpath, pickle_file_head + eval_model_key + "_target_acc.p") else: pickle_m_file = os.path.join( pkl_folderpath, pickle_file_head + "_model_acc.p") pickle_t_file = os.path.join( pkl_folderpath, pickle_file_head + "_target_acc.p") pickle.dump(model_acc, open(pickle_m_file, "wb")) pickle.dump(target_acc, open(pickle_t_file, "wb")) report_view.update({ 'model_acc': pickle_m_file, 'target_acc': pickle_t_file, 'attack_stats': attack_stats_eval(x, adv_x, 20) }) else: if generate_examples: pickle_x_file = os.path.join( pkl_folderpath, pickle_file_head + "x_test_untargeted.p") if os.path.exists(pickle_x_file): adv_x_test = pickle.load(open(pickle_x_file, "rb")) else: adv_x_test = gen_np(sess, x_test, x, adv_x) pickle.dump(adv_x_test, open(pickle_x_file, "wb")) #evaluate on self and, if black box, all other eval models model_acc = model_eval(sess, adv_x, y, preds_adv, adv_x_test, y_test, args=eval_par) else: model_acc = model_eval(sess, x, y, preds_adv, x_test, y_test, args=eval_par) report_view.update({ 'model_acc': model_acc, 'attack_stats': attack_stats_eval(x, adv_x, 20) }) t2 = time.time() if targeted: print('Test accuracy on adversarial examples %0.4f\n' % model_acc[nb_classes, nb_classes]) print('Target accuracy on adversarial examples %0.4f\n' % target_acc[nb_classes, nb_classes]) else: print('Test accuracy on adversarial examples %0.4f\n' % model_acc) print("Took", t2 - t1, "seconds") return report
def main(argv=None): from cleverhans_tutorials import check_installation check_installation(__file__) if not os.path.exists( CONFIG.SAVE_PATH ): os.makedirs( CONFIG.SAVE_PATH ) save_path_data = CONFIG.SAVE_PATH + 'data/' if not os.path.exists( save_path_data ): os.makedirs( save_path_data ) model_path = CONFIG.SAVE_PATH + '../all/' + CONFIG.DATASET + '/' if not os.path.exists( model_path ): os.makedirs( model_path ) os.makedirs( model_path + 'data/' ) nb_epochs = FLAGS.nb_epochs batch_size = FLAGS.batch_size learning_rate = FLAGS.learning_rate nb_filters = FLAGS.nb_filters len_x = int(CONFIG.NUM_TEST/2) start = time.time() # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set seeds to improve reproducibility if CONFIG.DATASET == 'mnist' or CONFIG.DATASET == 'cifar10': tf.set_random_seed(1234) np.random.seed(1234) rd.seed(1234) elif CONFIG.DATASET == 'moon' or CONFIG.DATASET == 'dims': tf.set_random_seed(13) np.random.seed(1234) rd.seed(0) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session tf_config = tf.ConfigProto(allow_soft_placement=True,log_device_placement=True) tf_config.gpu_options.per_process_gpu_memory_fraction = 0.2 sess = tf.Session(config=tf_config) if CONFIG.DATASET == 'mnist': # Get MNIST data mnist = MNIST(train_start=0, train_end=CONFIG.NUM_TRAIN, test_start=0, test_end=CONFIG.NUM_TEST) x_train, y_train = mnist.get_set('train') x_test, y_test = mnist.get_set('test') elif CONFIG.DATASET == 'cifar10': # Get CIFAR10 data data = CIFAR10(train_start=0, train_end=CONFIG.NUM_TRAIN, test_start=0, test_end=CONFIG.NUM_TEST) dataset_size = data.x_train.shape[0] dataset_train = data.to_tensorflow()[0] dataset_train = dataset_train.map( lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4) dataset_train = dataset_train.batch(batch_size) dataset_train = dataset_train.prefetch(16) x_train, y_train = data.get_set('train') x_test, y_test = data.get_set('test') elif CONFIG.DATASET == 'moon': # Create a two moon example X, y = make_moons(n_samples=(CONFIG.NUM_TRAIN+CONFIG.NUM_TEST), noise=0.2, random_state=0) X = StandardScaler().fit_transform(X) x_train1, x_test1, y_train1, y_test1 = train_test_split(X, y, test_size=(CONFIG.NUM_TEST/(CONFIG.NUM_TRAIN +CONFIG.NUM_TEST)), random_state=0) x_train, y_train, x_test, y_test = normalize_reshape_inputs_2d(model_path, x_train1, y_train1, x_test1, y_test1) elif CONFIG.DATASET == 'dims': X, y = make_moons(n_samples=(CONFIG.NUM_TRAIN+CONFIG.NUM_TEST), noise=0.2, random_state=0) X = StandardScaler().fit_transform(X) x_train1, x_test1, y_train1, y_test1 = train_test_split(X, y, test_size=(CONFIG.NUM_TEST/(CONFIG.NUM_TRAIN +CONFIG.NUM_TEST)), random_state=0) x_train2, y_train, x_test2, y_test = normalize_reshape_inputs_2d(model_path, x_train1, y_train1,x_test1, y_test1) x_train, x_test = add_noise_and_QR(x_train2, x_test2, CONFIG.NUM_DIMS) np.save(os.path.join(save_path_data, 'x_test'), x_test) np.save(os.path.join(save_path_data, 'y_test'), y_test) # Use Image Parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) # Train an model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } eval_params = {'batch_size': 1} rng = np.random.RandomState([2017, 8, 30]) with open(CONFIG.SAVE_PATH + 'acc_param.txt', 'a') as fi: def do_eval(adv_x, preds, x_set, y_set, report_key): acc, pred_np, adv_x_np = model_eval(sess, x, y, preds, adv_x, nb_classes, x_set, y_set, args=eval_params) setattr(report, report_key, acc) if report_key: print('Accuracy on %s examples: %0.4f' % (report_key, acc), file=fi) return pred_np, adv_x_np if CONFIG.DATASET == 'mnist': trained_model_path = model_path + 'data/trained_model' model = ModelBasicCNN('model1', nb_classes, nb_filters) elif CONFIG.DATASET == 'cifar10': trained_model_path = model_path + 'data/trained_model' model = ModelAllConvolutional('model1', nb_classes, nb_filters, input_shape=[32, 32, 3]) elif CONFIG.DATASET == 'moon': trained_model_path = model_path + 'data/trained_model' model = ModelMLP('model1', nb_classes) elif CONFIG.DATASET == 'dims': trained_model_path = save_path_data + 'trained_model' model = ModelMLP_dyn('model1', nb_classes, CONFIG.NUM_DIMS) preds = model.get_logits(x) loss = CrossEntropy(model, smoothing=0.1) def evaluate(): _, _ = do_eval(x, preds, x_test, y_test, 'test during train') if os.path.isfile( trained_model_path + '.index' ): tf_model_load(sess, trained_model_path) else: if CONFIG.DATASET == 'mnist': train(sess, loss, x_train, y_train, evaluate=evaluate, args=train_params, rng=rng, var_list=model.get_params()) elif CONFIG.DATASET == 'cifar10': train(sess, loss, None, None, dataset_train=dataset_train, dataset_size=dataset_size, evaluate=evaluate, args=train_params, rng=rng, var_list=model.get_params()) elif CONFIG.DATASET == 'moon': train_2d(sess, loss, x, y, x_train, y_train, save=False, evaluate=evaluate, args=train_params, rng=rng, var_list=model.get_params()) elif CONFIG.DATASET == 'dims': train_2d(sess, loss, x, y, x_train, y_train, evaluate=evaluate, args=train_params, rng=rng, var_list=model.get_params()) saver = tf.train.Saver() saver.save(sess, trained_model_path) # Evaluate the accuracy on test examples if os.path.isfile( save_path_data + 'logits_zero_attacked.npy' ): logits_0 = np.load(save_path_data + 'logits_zero_attacked.npy') else: _, _ = do_eval(x, preds, x_train, y_train, 'train') logits_0, _ = do_eval(x, preds, x_test, y_test, 'test') np.save(os.path.join(save_path_data, 'logits_zero_attacked'), logits_0) if CONFIG.DATASET == 'moon': num_grid_points = 5000 if os.path.isfile( model_path + 'data/images_mesh' + str(num_grid_points) + '.npy' ): x_mesh = np.load(model_path + 'data/images_mesh' + str(num_grid_points) + '.npy') logits_mesh = np.load(model_path + 'data/logits_mesh' + str(num_grid_points) + '.npy') else: xx, yy = np.meshgrid(np.linspace(0, 1, num_grid_points), np.linspace(0, 1, num_grid_points)) x_mesh1 = np.stack([np.ravel(xx), np.ravel(yy)]).T y_mesh1 = np.ones((x_mesh1.shape[0]),dtype='int64') x_mesh, y_mesh, _, _ = normalize_reshape_inputs_2d(model_path, x_mesh1, y_mesh1) logits_mesh, _ = do_eval(x, preds, x_mesh, y_mesh, 'mesh') x_mesh = np.squeeze(x_mesh) np.save(os.path.join(model_path, 'data/images_mesh'+str(num_grid_points)), x_mesh) np.save(os.path.join(model_path, 'data/logits_mesh'+str(num_grid_points)), logits_mesh) points_x = x_test[:len_x] points_y = y_test[:len_x] points_x_bar = x_test[len_x:] points_y_bar = y_test[len_x:] # Initialize the CW attack object and graph cw = CarliniWagnerL2(model, sess=sess) # first attack attack_params = { 'learning_rate': CONFIG.CW_LEARNING_RATE, 'max_iterations': CONFIG.CW_MAX_ITERATIONS } if CONFIG.DATASET == 'moon': out_a = compute_polytopes_a(x_mesh, logits_mesh, model_path) attack_params['const_a_min'] = out_a attack_params['const_a_max'] = 100 adv_x = cw.generate(x, **attack_params) if os.path.isfile( save_path_data + 'images_once_attacked.npy' ): adv_img_1 = np.load(save_path_data + 'images_once_attacked.npy') logits_1 = np.load(save_path_data + 'logits_once_attacked.npy') else: #Evaluate the accuracy on adversarial examples preds_adv = model.get_logits(adv_x) logits_1, adv_img_1 = do_eval(adv_x, preds_adv, points_x_bar, points_y_bar, 'test once attacked') np.save(os.path.join(save_path_data, 'images_once_attacked'), adv_img_1) np.save(os.path.join(save_path_data, 'logits_once_attacked'), logits_1) # counter attack attack_params['max_iterations'] = 1024 if CONFIG.DATASET == 'moon': out_alpha2 = compute_epsilons_balls_alpha(x_mesh, np.squeeze(x_test), np.squeeze(adv_img_1), model_path, CONFIG.SAVE_PATH) attack_params['learning_rate'] = out_alpha2 attack_params['const_a_min'] = -1 attack_params['max_iterations'] = 2048 plot_data(np.squeeze(adv_img_1), logits_1, CONFIG.SAVE_PATH+'data_pred1.png', x_mesh, logits_mesh) adv_adv_x = cw.generate(x, **attack_params) x_k = np.concatenate((points_x, adv_img_1), axis=0) y_k = np.concatenate((points_y, logits_1), axis=0) if os.path.isfile( save_path_data + 'images_twice_attacked.npy' ): adv_img_2 = np.load(save_path_data + 'images_twice_attacked.npy') logits_2 = np.load(save_path_data + 'logits_twice_attacked.npy') else: # Evaluate the accuracy on adversarial examples preds_adv_adv = model.get_logits(adv_adv_x) logits_2, adv_img_2 = do_eval(adv_adv_x, preds_adv_adv, x_k, y_k, 'test twice attacked') np.save(os.path.join(save_path_data, 'images_twice_attacked'), adv_img_2) np.save(os.path.join(save_path_data, 'logits_twice_attacked'), logits_2) if CONFIG.DATASET == 'moon': plot_data(np.squeeze(adv_img_2[:len_x]), logits_2[:len_x], CONFIG.SAVE_PATH+'data_pred2.png', x_mesh, logits_mesh) plot_data(np.squeeze(adv_img_2[len_x:]), logits_2[len_x:], CONFIG.SAVE_PATH+'data_pred12.png', x_mesh, logits_mesh) test_balls(np.squeeze(x_k), np.squeeze(adv_img_2), logits_0, logits_1, logits_2, CONFIG.SAVE_PATH) compute_returnees(logits_0[len_x:], logits_1, logits_2[len_x:], logits_0[:len_x], logits_2[:len_x], CONFIG.SAVE_PATH) if x_test.shape[-1] > 1: num_axis=(1,2,3) else: num_axis=(1,2) D_p = np.squeeze(np.sqrt(np.sum(np.square(points_x-adv_img_2[:len_x]), axis=num_axis))) D_p_p = np.squeeze(np.sqrt(np.sum(np.square(adv_img_1-adv_img_2[len_x:]), axis=num_axis))) D_p_mod, D_p_p_mod = modify_D(D_p, D_p_p, logits_0[len_x:], logits_1, logits_2[len_x:], logits_0[:len_x], logits_2[:len_x]) if D_p_mod != [] and D_p_p_mod != []: plot_violins(D_p_mod, D_p_p_mod, CONFIG.SAVE_PATH) threshold_evaluation(D_p_mod, D_p_p_mod, CONFIG.SAVE_PATH) _ = compute_auroc(D_p_mod, D_p_p_mod, CONFIG.SAVE_PATH) plot_results_models(len_x, CONFIG.DATASET, CONFIG.SAVE_PATH) print('Time needed:', time.time()-start) return report
def mnist_tutorial(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, train_dir=TRAIN_DIR, filename=FILENAME, load_model=LOAD_MODEL, testing=False, label_smoothing=0.1): """ MNIST CleverHans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param train_dir: Directory storing the saved model :param filename: Filename to save model under :param load_model: True for load, False for not load :param testing: if true, test error is calculated :param label_smoothing: float, amount of label smoothing for cross entropy :return: an AccuracyReport object """ tf.keras.backend.set_learning_phase(0) # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) if keras.backend.image_data_format() != 'channels_last': raise NotImplementedError( "this tutorial requires keras to be configured to channels_last format" ) # Create TF session and set as Keras backend session sess = tf.Session() keras.backend.set_session(sess) # Get MNIST test data cifar10 = CIFAR10(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) x_train, y_train = cifar10.get_set('train') x_test, y_test = cifar10.get_set('test') # Obtain Image Parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) # Define TF model graph model = cnn_model(img_rows=img_rows, img_cols=img_cols, channels=nchannels, nb_filters=64, nb_classes=nb_classes) preds = model(x) print("Defined TensorFlow model graph.") def evaluate(): # Evaluate the accuracy of the MNIST model on legitimate test examples eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params) report.clean_train_clean_eval = acc # assert X_test.shape[0] == test_end - test_start, X_test.shape print('Test accuracy on legitimate examples: %0.4f' % acc) # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'train_dir': train_dir, 'filename': filename } rng = np.random.RandomState([2017, 8, 30]) if not os.path.exists(train_dir): os.mkdir(train_dir) ckpt = tf.train.get_checkpoint_state(train_dir) print(train_dir, ckpt) ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path wrap = KerasModelWrapper(model) if load_model and ckpt_path: saver = tf.train.Saver() print(ckpt_path) saver.restore(sess, ckpt_path) print("Model loaded from: {}".format(ckpt_path)) evaluate() else: print("Model was not loaded, training from scratch.") loss = CrossEntropy(wrap, smoothing=label_smoothing) train(sess, loss, x_train, y_train, evaluate=evaluate, args=train_params, rng=rng) # Calculate training error if testing: eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds, x_train, y_train, args=eval_params) report.train_clean_train_clean_eval = acc # Initialize the Fast Gradient Sign Method (FGSM) attack object and graph fgsm = FastGradientMethod(wrap, sess=sess) fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} adv_x = fgsm.generate(x, **fgsm_params) # Consider the attack to be constant adv_x = tf.stop_gradient(adv_x) preds_adv = model(adv_x) # Evaluate the accuracy of the MNIST model on adversarial examples eval_par = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds_adv, x_test, y_test, args=eval_par) print('Test accuracy on adversarial examples: %0.4f\n' % acc) report.clean_train_adv_eval = acc # Calculating train error if testing: eval_par = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds_adv, x_train, y_train, args=eval_par) report.train_clean_train_adv_eval = acc print("Repeating the process, using adversarial training") # Redefine TF model graph model_2 = cnn_model(img_rows=img_rows, img_cols=img_cols, channels=nchannels, nb_filters=64, nb_classes=nb_classes) wrap_2 = KerasModelWrapper(model_2) preds_2 = model_2(x) fgsm2 = FastGradientMethod(wrap_2, sess=sess) def attack(x): return fgsm2.generate(x, **fgsm_params) preds_2_adv = model_2(attack(x)) loss_2 = CrossEntropy(wrap_2, smoothing=label_smoothing, attack=attack) def evaluate_2(): # Accuracy of adversarially trained model on legitimate test inputs eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds_2, x_test, y_test, args=eval_params) print('Test accuracy on legitimate examples: %0.4f' % accuracy) report.adv_train_clean_eval = accuracy # Accuracy of the adversarially trained model on adversarial examples accuracy = model_eval(sess, x, y, preds_2_adv, x_test, y_test, args=eval_params) print('Test accuracy on adversarial examples: %0.4f' % accuracy) report.adv_train_adv_eval = accuracy # Perform and evaluate adversarial training train(sess, loss_2, x_train, y_train, evaluate=evaluate_2, args=train_params, rng=rng) # Calculate training errors if testing: eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds_2, x_train, y_train, args=eval_params) report.train_adv_train_clean_eval = accuracy accuracy = model_eval(sess, x, y, preds_2_adv, x_train, y_train, args=eval_params) report.train_adv_train_adv_eval = accuracy return report
def cifar10_cw_recon(train_start=0, train_end=60000, test_start=0, test_end=10000, viz_enabled=VIZ_ENABLED, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, source_samples=SOURCE_SAMPLES, learning_rate=LEARNING_RATE, attack_iterations=ATTACK_ITERATIONS, model_path=MODEL_PATH, model_path_cls=MODEL_PATH, targeted=TARGETED, num_threads=None, label_smoothing=0.1, nb_filters=NB_FILTERS, filename=FILENAME, train_dir_ae=TRAIN_DIR_AE, train_dir_cl=TRAIN_DIR_CL): # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) rng = np.random.RandomState() # Create TF session sess = tf.Session() print("Created TensorFlow session.") set_log_level(logging.DEBUG) if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {} sess = tf.Session(config=tf.ConfigProto(**config_args)) # Get CIFAR10 data data = CIFAR10(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) dataset_size = data.x_train.shape[0] dataset_train = data.to_tensorflow()[0] dataset_train = dataset_train.map( lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4) dataset_train = dataset_train.batch(batch_size) dataset_train = dataset_train.prefetch(16) x_train, y_train = data.get_set('train') x_test, y_test = data.get_set('test') nb_latent_size = 100 # Get MNIST test data # Obtain Image Parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] print("img_Rows, img_cols, nchannels: ", img_rows, img_cols, nchannels) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) x_t = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) y_t = tf.placeholder(tf.float32, shape=(None, nb_classes)) #model_vae= vae_model(x, img_rows=img_rows, img_cols=img_cols, # channels=nchannels) wrap_vae = ModelVAE('wrap_vae') recon = wrap_vae.get_layer(x, 'RECON') #print("recon: ",recon) print("Defined TensorFlow model graph.") def evaluate_ae(): # Evaluate the accuracy of the MNIST model on legitimate test examples eval_params = {'batch_size': 128} noise, d1, d2, dist_diff, avg_dist_lat = model_eval_ae( sess, x, x_t, recon, x_train, x_train, args=eval_params) print("reconstruction distance: ", d1) # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'train_dir': train_dir_ae, 'filename': filename } rng = np.random.RandomState([2017, 8, 30]) if not os.path.exists(train_dir_ae): os.mkdir(train_dir_ae) #ckpt = tf.train.get_checkpoint_state(train_dir_ae) #print(train_dir_ae, ckpt) #ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path #wrap_vae = KerasModelWrapper(model_vae) latent_dim = 20 intermediate_dim = 128 #train_ae(sess, global_loss, x_train, x_train, evaluate = evaluate_ae, args = train_params, rng = rng, var_list=wrap_vae.get_params()) if clean_train_vae == True: print("Training VAE") loss = vae_loss(wrap_vae) train_ae(sess, loss, x_train, x_train, evaluate=evaluate_ae, args=train_params, rng=rng, var_list=wrap_vae.get_params()) saver = tf.train.Saver() saver.save(sess, "train_dir/model_vae.ckpt") print("saved model") else: print("Loading VAE") saver = tf.train.Saver() #print(ckpt_path) saver.restore(sess, "train_dir/model_vae.ckpt") evaluate_ae() if (train_further): train_params = { 'nb_epochs': 10, 'batch_size': batch_size, 'learning_rate': learning_rate, 'train_dir': train_dir_ae, 'filename': filename } #training with the saved model as starting point loss = SquaredError(wrap_vae) train_ae(sess, loss, x_train, x_train, evaluate=evaluate_vae, args=train_params, rng=rng) saver = tf.train.Saver() saver.save(sess, "train_dir/model_ae_final.ckpt") evaluate_ae() print("Model loaded and trained for more epochs") num_classes = 10 ''' save_dir= 'models' model_name = 'cifar10_CNN.h5' model_path_cls = os.path.join(save_dir, model_name) ''' cl_model = cnn_cl_model(img_rows=img_rows, img_cols=img_cols, channels=nchannels, nb_filters=64, nb_classes=nb_classes) preds_cl = cl_model(x) def do_eval_cls(preds, x_set, y_set, x_tar_set, report_key, is_adv=None): acc = model_eval(sess, x, y, preds, x_t, x_set, y_set, x_tar_set, args=eval_params_cls) def evaluate(): # Evaluate the accuracy of the MNIST model on legitimate test examples eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds_cl, x_t, x_test, y_test, x_test, args=eval_params) report.clean_train_clean_eval = acc # assert X_test.shape[0] == test_end - test_start, X_test.shape print('Test accuracy on legitimate examples: %0.4f' % acc) train_params = { 'nb_epochs': 3, 'batch_size': batch_size, 'learning_rate': learning_rate, 'train_dir': train_dir_cl, 'filename': filename } rng = np.random.RandomState([2017, 8, 30]) if not os.path.exists(train_dir_cl): os.mkdir(train_dir_cl) #ckpt = tf.train.get_checkpoint_state(train_dir_cl) #print(train_dir_cl, ckpt) #ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path wrap_cl = KerasModelWrapper(cl_model) if clean_train_cl == True: print("Training CNN Classifier") loss_cl = CrossEntropy(wrap_cl, smoothing=label_smoothing) train(sess, loss_cl, x_train, y_train, evaluate=evaluate, optimizer=tf.train.RMSPropOptimizer(learning_rate=0.0001, decay=1e-6), args=train_params, rng=rng) saver = tf.train.Saver() saver.save(sess, "train_dir/model_cnn_cl_vae.ckpt") print("saved model at ", "train_dir/model_cnn_cl.ckpt") else: print("Loading CNN Classifier") saver = tf.train.Saver() #print(ckpt_path) saver.restore(sess, "train_dir/model_cnn_cl_vae.ckpt") print("Model loaded") evaluate() # Score trained model. ''' scores = cl_model.evaluate(x_test, y_test, verbose=1) print('Test loss:', scores[0]) print('Test accuracy:', scores[1]) cl_model_wrap = KerasModelWrapper(cl_model) ` ''' ########################################################################### # Craft adversarial examples using Carlini and Wagner's approach ########################################################################### nb_adv_per_sample = str(nb_classes - 1) if targeted else '1' print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample + ' adversarial examples') print("This could take some time ...") # Instantiate a CW attack object cw = CarliniWagnerAE(wrap_vae, wrap_cl, sess=sess) if viz_enabled: assert source_samples == nb_classes idxs = [ np.where(np.argmax(y_test, axis=1) == i)[0][0] for i in range(nb_classes) ] if targeted: if viz_enabled: # Initialize our array for grid visualization grid_shape = (nb_classes, nb_classes, img_rows, img_cols, nchannels) grid_viz_data = np.zeros(grid_shape, dtype='f') grid_viz_data_1 = np.zeros(grid_shape, dtype='f') adv_inputs = np.array([[instance] * (nb_classes - 1) for instance in x_test[idxs]], dtype=np.float32) #adv_input_y = np.array([[instance]*(nb_classes-1) for instance in y_test[idxs]]) adv_input_y = [] for curr_num in range(nb_classes): targ = [] for id in range(nb_classes - 1): targ.append(y_test[idxs[curr_num]]) adv_input_y.append(targ) adv_input_y = np.array(adv_input_y) adv_target_y = [] for curr_num in range(nb_classes): targ = [] for id in range(nb_classes): if (id != curr_num): targ.append(y_test[idxs[id]]) adv_target_y.append(targ) adv_target_y = np.array(adv_target_y) #print("adv_input_y: \n", adv_input_y) #print("adv_target_y: \n", adv_target_y) adv_input_targets = [] for curr_num in range(nb_classes): targ = [] for id in range(nb_classes): if (id != curr_num): targ.append(x_test[idxs[id]]) adv_input_targets.append(targ) adv_input_targets = np.array(adv_input_targets) adv_inputs = adv_inputs.reshape((source_samples * (nb_classes - 1), img_rows, img_cols, nchannels)) adv_input_targets = adv_input_targets.reshape( (source_samples * (nb_classes - 1), img_rows, img_cols, nchannels)) adv_input_y = adv_input_y.reshape( source_samples * (nb_classes - 1), 10) adv_target_y = adv_target_y.reshape( source_samples * (nb_classes - 1), 10) one_hot = np.zeros((nb_classes, nb_classes)) one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1 adv_ys = np.array([one_hot] * source_samples, dtype=np.float32).reshape( (source_samples * nb_classes, nb_classes)) yname = "y_target" cw_params_batch_size = source_samples * (nb_classes - 1) cw_params = { 'binary_search_steps': 1, yname: adv_ys, 'max_iterations': attack_iterations, 'learning_rate': CW_LEARNING_RATE, 'batch_size': cw_params_batch_size, 'initial_const': 1 } adv = cw.generate_np(adv_inputs, adv_input_targets, **cw_params) #adv = sess.run(adv) #print("layer names: \n", wrap_vae.get_layer_names()) recon_orig = wrap_vae.get_layer(x, 'RECON') recon_orig = sess.run(recon_orig, feed_dict={x: adv_inputs}) recon_adv = wrap_vae.get_layer(x, 'RECON') recon_adv = sess.run(recon_adv, feed_dict={x: adv}) pred_adv_recon = wrap_cl.get_logits(x) pred_adv_recon = sess.run(pred_adv_recon, {x: recon_adv}) #scores1 = cl_model.evaluate(recon_adv, adv_input_y, verbose=1) #scores2 = cl_model.evaluate(recon_adv, adv_target_y, verbose = 1) #acc_1 = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_target_y, adv_input_targets, args=eval_params_cls) #acc_2 = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_input_y, adv_input_targets, args=eval_params_cls) shape = np.shape(adv_inputs) noise = np.sum(np.square(adv - adv_inputs)) / (np.shape(adv)[0]) noise = pow(noise, 0.5) d1 = np.sum(np.square(recon_adv - adv_inputs)) / (np.shape(adv_inputs)[0]) d2 = np.sum( np.square(recon_adv - adv_input_targets)) / (np.shape(adv_inputs)[0]) acc_1 = (sum( np.argmax(pred_adv_recon, axis=-1) == np.argmax(adv_target_y, axis=-1)) ) / (np.shape(adv_target_y)[0]) acc_2 = (sum( np.argmax(pred_adv_recon, axis=-1) == np.argmax(adv_input_y, axis=-1)) ) / (np.shape(adv_target_y)[0]) print("noise: ", noise) print("d1: ", d1) print("d2: ", d2) print("classifier acc_target: ", acc_1) print("classifier acc_true: ", acc_2) #print("recon_adv[0]\n", recon_adv[0,:,:,0]) curr_class = 0 if viz_enabled: for j in range(nb_classes): if targeted: for i in range(nb_classes): #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i] if (i == j): grid_viz_data[i, j] = recon_orig[curr_class * 9] grid_viz_data_1[i, j] = adv_inputs[curr_class * 9] curr_class = curr_class + 1 else: if (j > i): grid_viz_data[i, j] = recon_adv[i * (nb_classes - 1) + j - 1] grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) + j - 1] else: grid_viz_data[i, j] = recon_adv[i * (nb_classes - 1) + j] grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) + j] #rint(grid_viz_data.shape) print('--------------------------------------') # Compute the number of adversarial examples that were successfully found # Compute the average distortion introduced by the algorithm percent_perturbed = np.mean( np.sum((adv - adv_inputs)**2, axis=(1, 2, 3))**.5) print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed)) # Finally, block & display a grid of all the adversarial examples if viz_enabled: plt.ioff() figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') # Add the images to the plot num_cols = grid_viz_data.shape[0] num_rows = grid_viz_data.shape[1] num_channels = grid_viz_data.shape[4] for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols)) plt.axis('off') plt.imshow(grid_viz_data[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_vae_fig1') figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols)) plt.axis('off') plt.imshow(grid_viz_data_1[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_vae_fig2') #return report #adversarial training if (adv_train == True): print("starting adversarial training") #sess1 = tf.Session() adv_input_set = [] adv_input_target_set = [] for i in range(20): indices = np.arange(np.shape(x_train)[0]) np.random.shuffle(indices) print("indices: ", indices[1:10]) x_train = x_train[indices] y_train = y_train[indices] idxs = [ np.where(np.argmax(y_train, axis=1) == i)[0][0] for i in range(nb_classes) ] adv_inputs_2 = np.array([[instance] * (nb_classes - 1) for instance in x_train[idxs]], dtype=np.float32) adv_input_targets_2 = [] for curr_num in range(nb_classes): targ = [] for id in range(nb_classes): if (id != curr_num): targ.append(x_train[idxs[id]]) adv_input_targets_2.append(targ) adv_input_targets_2 = np.array(adv_input_targets_2) adv_inputs_2 = adv_inputs_2.reshape( (source_samples * (nb_classes - 1), img_rows, img_cols, nchannels)) adv_input_targets_2 = adv_input_targets_2.reshape( (source_samples * (nb_classes - 1), img_rows, img_cols, nchannels)) adv_input_set.append(adv_inputs_2) adv_input_target_set.append(adv_input_targets_2) adv_input_set = np.array(adv_input_set), adv_input_target_set = np.array(adv_input_target_set) print("shape of adv_input_set: ", np.shape(adv_input_set)) print("shape of adv_input_target_set: ", np.shape(adv_input_target_set)) adv_input_set = np.reshape( adv_input_set, (np.shape(adv_input_set)[0] * np.shape(adv_input_set)[1] * np.shape(adv_input_set)[2], np.shape(adv_input_set)[3], np.shape(adv_input_set)[4], np.shape(adv_input_set)[5])) adv_input_target_set = np.reshape(adv_input_target_set, (np.shape(adv_input_target_set)[0] * np.shape(adv_input_target_set)[1], np.shape(adv_input_target_set)[2], np.shape(adv_input_target_set)[3], np.shape(adv_input_target_set)[4])) print("generated adversarial training set") adv_set = cw.generate_np(adv_input_set, adv_input_target_set, **cw_params) x_train_aim = np.append(x_train, adv_input_set, axis=0) x_train_app = np.append(x_train, adv_set, axis=0) #model_name = 'cifar10_AE_adv.h5' #model_path_ae = os.path.join(save_dir, model_name) model_ae_adv = ae_model(x, img_rows=img_rows, img_cols=img_cols, channels=nchannels) recon = model_ae_adv(x) wrap_vae_adv = KerasModelWrapper(model_ae_adv) #print("recon: ",recon) #print("Defined TensorFlow model graph.") print("Training Adversarial AE") loss = SquaredError(wrap_vae_adv) train_ae(sess, loss_2, x_train_app, x_train_aim, evaluate=evaluate_ae, args=train_params, rng=rng) saver = tf.train.Saver() saver.save(sess, "train_dir/model_ae_adv.ckpt") print("saved model") cw2 = CarliniWagnerAE(wrap_vae_adv, wrap_cl, sess=sess) adv_2 = cw2.generate_np(adv_inputs, adv_input_targets, **cw_params) recon_adv = wrap_vae_adv.get_layer(x, 'RECON') recon_orig = wrap_vae_adv.get_layer(x, 'RECON') recon_adv = sess.run(recon_adv, {x: adv_2}) recon_orig = sess.run(recon_orig, {x: adv_inputs}) pred_adv_recon = wrap_cl.get_logits(x) pred_adv_recon = sess.run(pred_adv_recon, {x: recon_adv}) if targeted: noise = reduce_sum(tf.square(adv_inputs - adv_2), list(range(1, len(shape)))) print("noise: ", noise) pred_adv_recon = cl_model.get_layer(recon_adv) #scores1 = cl_model.evaluate(recon_adv, adv_input_y, verbose=1) #scores2 = cl_model.eval_params(recon_adv, adv_target_y, verbose = 1) #acc_1 = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_target_y, adv_input_targets, args=eval_params_cls) #acc_2 = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_input_y, adv_input_targets, args=eval_params_cls) noise = np.sum(np.square(adv - adv_inputs)) / (np.shape(adv)[0]) noise = pow(noise, 0.5) d1 = np.sum( np.square(recon_adv - adv_inputs)) / (np.shape(adv_inputs)[0]) d2 = np.sum(np.square(recon_adv - adv_input_targets)) / (np.shape(adv_inputs)[0]) acc_1 = (sum( np.argmax(pred_adv_recon, axis=-1) == np.argmax( adv_target_y, axis=-1))) / (np.shape(adv_target_y)[0]) acc_2 = (sum( np.argmax(pred_adv_recon, axis=-1) == np.argmax( adv_input_y, axis=-1))) / (np.shape(adv_target_y)[0]) print("noise: ", noise) print("d1: ", d1) print("d2: ", d2) print("classifier acc_target: ", acc_1) print("classifier acc_true: ", acc_2) #print("recon_adv[0]\n", recon_adv[0,:,:,0]) curr_class = 0 if viz_enabled: for j in range(nb_classes): for i in range(nb_classes): #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i] if (i == j): grid_viz_data[i, j] = recon_orig[curr_class * 9] grid_viz_data_1[i, j] = adv_inputs[curr_class * 9] curr_class = curr_class + 1 else: if (j > i): grid_viz_data[i, j] = recon_adv[i * (nb_classes - 1) + j - 1] grid_viz_data_1[i, j] = adv_2[i * (nb_classes - 1) + j - 1] else: grid_viz_data[i, j] = recon_adv[i * (nb_classes - 1) + j] grid_viz_data_1[i, j] = adv_2[i * (nb_classes - 1) + j] #rint(grid_viz_data.shape) print('--------------------------------------') # Compute the number of adversarial examples that were successfully found # Compute the average distortion introduced by the algorithm percent_perturbed = np.mean( np.sum((adv_2 - adv_inputs)**2, axis=(1, 2, 3))**.5) print( 'Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed)) # Close TF session #sess.close() # Finally, block & display a grid of all the adversarial examples if viz_enabled: #_ = grid_visual(grid_viz_data) #_ = grid_visual(grid_viz_data_1) plt.ioff() figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') # Add the images to the plot num_cols = grid_viz_data.shape[0] num_rows = grid_viz_data.shape[1] num_channels = grid_viz_data.shape[4] for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols)) plt.axis('off') if num_channels == 1: plt.imshow(grid_viz_data[xx, yy, :, :, 0]) else: plt.imshow(grid_viz_data[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_fig1_vae_adv_trained') figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols)) plt.axis('off') plt.imshow(grid_viz_data_1[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_fig2_vae_adv_trained') #return report #binarization defense #if(binarization_defense == True or mean_filtering==True): if (binarization_defense == True): print("BINARIZATION") print("---------------------------") adv[adv > 0.5] = 1.0 adv[adv <= 0.5] = 0.0 recon_orig = wrap_vae.get_layer(x, 'RECON') recon_adv = wrap_vae.get_layer(x, 'RECON') #pred_adv = wrap_cl.get_logits(x) recon_orig = sess.run(recon_orig, {x: adv_inputs}) recon_adv = sess.run(recon_adv, {x: adv}) #pred_adv = sess.run(pred_adv, {x: recon_adv}) pred_adv_recon = wrap_cl.get_logits(x) pred_adv_recon = sess.run(pred_adv_recon, {x: recon_adv}) eval_params = {'batch_size': 90} if targeted: noise = np.sum(np.square(adv - adv_inputs)) / (np.shape(adv)[0]) noise = pow(noise, 0.5) d1 = np.sum( np.square(recon_adv - adv_inputs)) / (np.shape(adv_inputs)[0]) d2 = np.sum(np.square(recon_adv - adv_input_targets)) / ( np.shape(adv_inputs)[0]) acc_1 = (sum( np.argmax(pred_adv_recon, axis=-1) == np.argmax( adv_target_y, axis=-1))) / (np.shape(adv_target_y)[0]) acc_2 = (sum( np.argmax(pred_adv_recon, axis=-1) == np.argmax( adv_input_y, axis=-1))) / (np.shape(adv_target_y)[0]) print("noise: ", noise) print("d1: ", d1) print("d2: ", d2) print("classifier acc_target: ", acc_1) print("classifier acc_true: ", acc_2) curr_class = 0 if viz_enabled: for j in range(nb_classes): for i in range(nb_classes): #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i] if (i == j): grid_viz_data[i, j] = recon_orig[curr_class * 9] grid_viz_data_1[i, j] = adv_inputs[curr_class * 9] curr_class = curr_class + 1 else: if (j > i): grid_viz_data[i, j] = recon_adv[i * (nb_classes - 1) + j - 1] grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) + j - 1] else: grid_viz_data[i, j] = recon_adv[i * (nb_classes - 1) + j] grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) + j] plt.ioff() figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') # Add the images to the plot num_cols = grid_viz_data.shape[0] num_rows = grid_viz_data.shape[1] num_channels = grid_viz_data.shape[4] for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols)) plt.axis('off') if num_channels == 1: plt.imshow(grid_viz_data[xx, yy, :, :, 0]) else: plt.imshow(grid_viz_data[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_vae_fig1_bin') figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols)) plt.axis('off') if num_channels == 1: plt.imshow(grid_viz_data_1[xx, yy, :, :, 0]) else: plt.imshow(grid_viz_data_1[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_vae_fig2_bin') if (mean_filtering == True): print("MEAN FILTERING") print("---------------------------") adv = uniform_filter(adv, 2) recon_orig = wrap_vae.get_layer(x, 'RECON') recon_adv = wrap_vae.get_layer(x, 'RECON') pred_adv_recon = wrap_cl.get_logits(x) recon_orig = sess.run(recon_orig, {x: adv_inputs}) recon_adv = sess.run(recon_adv, {x: adv}) pred_adv_recon = sess.run(pred_adv_recon, {x: recon_adv}) eval_params = {'batch_size': 90} noise = np.sum(np.square(adv - adv_inputs)) / (np.shape(adv)[0]) noise = pow(noise, 0.5) d1 = np.sum( np.square(recon_adv - adv_inputs)) / (np.shape(adv_inputs)[0]) d2 = np.sum(np.square(recon_adv - adv_input_targets)) / (np.shape(adv_inputs)[0]) acc_1 = (sum( np.argmax(pred_adv_recon, axis=-1) == np.argmax( adv_target_y, axis=-1))) / (np.shape(adv_target_y)[0]) acc_2 = (sum( np.argmax(pred_adv_recon, axis=-1) == np.argmax( adv_input_y, axis=-1))) / (np.shape(adv_target_y)[0]) print("noise: ", noise) print("d1: ", d1) print("d2: ", d2) print("classifier acc_target: ", acc_1) print("classifier acc_true: ", acc_2) curr_class = 0 if viz_enabled: for j in range(nb_classes): for i in range(nb_classes): #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i] if (i == j): grid_viz_data[i, j] = recon_orig[curr_class * 9] grid_viz_data_1[i, j] = adv_inputs[curr_class * 9] curr_class = curr_class + 1 else: if (j > i): grid_viz_data[i, j] = recon_adv[i * (nb_classes - 1) + j - 1] grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) + j - 1] else: grid_viz_data[i, j] = recon_adv[i * (nb_classes - 1) + j] grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) + j] plt.ioff() figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') # Add the images to the plot num_cols = grid_viz_data.shape[0] num_rows = grid_viz_data.shape[1] num_channels = grid_viz_data.shape[4] for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols)) plt.axis('off') if num_channels == 1: plt.imshow(grid_viz_data[xx, yy, :, :, 0]) else: plt.imshow(grid_viz_data[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_vae_fig1_mean') figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols)) plt.axis('off') if num_channels == 1: plt.imshow(grid_viz_data_1[xx, yy, :, :, 0]) else: plt.imshow(grid_viz_data_1[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_vae_fig2_mean')
def evaluate(dataset='CIFAR100'): batch_size = 128 test_num = 10000 defense_list = ['Naive', 'Goodfellow', 'Madry', 'PGN'] model_path_list = [] for defense in defense_list: for i in os.listdir('save/%s/%s' % (dataset, defense)): if os.path.exists('save/%s/%s/%s/model.joblib' % (dataset, defense, i)): model_path_list.append('save/%s/%s/%s/model.joblib' % (dataset, defense, i)) if dataset == 'CIFAR100': data = CIFAR100(test_start=0, test_end=test_num) x_test, y_test = data.get_set('test') x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3)) y = tf.placeholder(tf.float32, shape=(None, 100)) elif dataset == 'CIFAR10': data = CIFAR10(test_start=0, test_end=test_num) x_test, y_test = data.get_set('test') x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3)) y = tf.placeholder(tf.float32, shape=(None, 10)) sess = tf.Session() cw_params = { 'batch_size': 128, 'clip_min': 0., 'clip_max': 1., 'max_iterations': 100, 'y': y } eval_params = {'batch_size': batch_size} def do_eval(preds, x_set, y_set, report_text): acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params) print('Test accuracy on %s: %0.4f' % (report_text, acc)) return acc def get_adv_x_numpy(adv_x, attack_success_index, x_set, y_set): result = [] result_index = [] nb_batches = int(math.ceil(float(len(x_set)) / batch_size)) X_cur = np.zeros((batch_size, ) + x_set.shape[1:], dtype=x_set.dtype) Y_cur = np.zeros((batch_size, ) + y_set.shape[1:], dtype=y_set.dtype) for batch in range(nb_batches): start = batch * batch_size end = min(len(x_set), start + batch_size) cur_batch_size = end - start X_cur[:cur_batch_size] = x_set[start:end] Y_cur[:cur_batch_size] = y_set[start:end] feed_dict = {x: X_cur, y: Y_cur} adv_x_numpy, success_index = sess.run( [adv_x, attack_success_index], feed_dict=feed_dict) result.append(adv_x_numpy[:cur_batch_size]) result_index.append(success_index[:cur_batch_size]) return np.concatenate(result, axis=0), np.concatenate(result_index, axis=0) print(model_path_list) acc_dict = {} l2mean_dict = {} for model_path in model_path_list: defense = model_path.split('/')[2] if not defense in acc_dict: acc_dict[defense] = [] if not defense in l2mean_dict: l2mean_dict[defense] = [] if os.path.exists( os.path.join(os.path.dirname(model_path), 'cash_result')): with open(os.path.join(os.path.dirname(model_path), 'cash_result'), 'r') as f: cash_result_str = f.read() acc, l2mean, model_create_time = cash_result_str.split(",") if int(model_create_time) == int(os.path.getctime(model_path)): acc_dict[defense].append(float(acc)) l2mean_dict[defense].append(float(l2mean)) print(model_path, acc, l2mean) continue with sess.as_default(): model = load(model_path) attack_model = CarliniWagnerL2(model, sess=sess) attack_params = cw_params preds = model.get_logits(x) acc = do_eval(preds, x_test[:test_num], y_test[:test_num], 'DEFENSE : %s' % defense) adv_x = attack_model.generate(x, **attack_params) preds_adv = model.get_logits(adv_x) attack_success_index = tf.math.not_equal(tf.argmax(preds_adv, axis=-1), tf.argmax(y, axis=-1)) adv_x_numpy, success_index = get_adv_x_numpy(adv_x, attack_success_index, x_test[:test_num], y_test[:test_num]) print('C&W attack success_rate = %f' % np.mean(success_index)) l2mean = np.mean( np.sqrt( np.sum(np.power( adv_x_numpy[success_index] - x_test[:test_num][success_index], 2), axis=(1, 2, 3)))) acc_dict[defense].append(acc) l2mean_dict[defense].append(l2mean) print(model_path, acc, l2mean) with open(os.path.join(os.path.dirname(model_path), 'cash_result'), 'w') as f: f.write('%.4f,%.4f,%d' % (acc, l2mean, os.path.getctime(model_path))) for defense in defense_list: if not defense in l2mean_dict: continue l2mean_dict[defense] = np.array(l2mean_dict[defense]) acc_dict[defense] = np.array(acc_dict[defense]) arg_l2mean_dict = np.argsort(l2mean_dict[defense]) l2mean_dict[defense] = l2mean_dict[defense][arg_l2mean_dict] acc_dict[defense] = acc_dict[defense][arg_l2mean_dict] plt.plot(l2mean_dict[defense], acc_dict[defense], '-o', label=defense) plt.legend() plt.xlabel('$\\rho_{cw}$') plt.ylabel('benign accuracy') plt.title("RESULT FOR %s" % dataset) fig_save_dir = 'evaluate/%s' % dataset if not os.path.exists(fig_save_dir): os.makedirs(fig_save_dir) plt.savefig('%s/robustness-curve.png' % fig_save_dir)
def mnist_ae(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, clean_train=CLEAN_TRAIN, testing=False, backprop_through_attack=BACKPROP_THROUGH_ATTACK, num_threads=None, label_smoothing=0.1): report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) rng = np.random.RandomState() source_samples = 10 # Create TF session sess = tf.Session() print("Created TensorFlow session.") set_log_level(logging.DEBUG) if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {} sess = tf.Session(config=tf.ConfigProto(**config_args)) # Get CIFAR10 data data = CIFAR10(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) dataset_size = data.x_train.shape[0] dataset_train = data.to_tensorflow()[0] dataset_train = dataset_train.map( lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4) dataset_train = dataset_train.batch(batch_size) dataset_train = dataset_train.prefetch(16) x_train, y_train = data.get_set('train') x_test, y_test = data.get_set('test') nb_latent_size = 100 # Get MNIST test data # Obtain Image Parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] print("img_Rows, img_cols, nchannels: ", img_rows, img_cols, nchannels) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) x_t = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) y_t = tf.placeholder( tf.float32, shape=(None, nb_classes)) #z = tf.placeholder(tf.float32, shape = (None, nb_latent_size)) #z_t = tf.placeholder(tf.float32, shape = (None, nb_latent_size)) ''' save_dir= 'models' model_name = 'cifar10_AE.h5' model_path_ae = os.path.join(save_dir, model_name) ''' #model_ae= ae_model(x, img_rows=img_rows, img_cols=img_cols, # channels=nchannels) #recon = model_ae(x) #print("recon: ",recon) wrap_ae = ModelVAE('wrap_ae') recon = wrap_ae.get_layer(x,'RECON') print("Defined TensorFlow model graph.") def evaluate_ae(): # Evaluate the accuracy of the MNIST model on legitimate test examples eval_params = {'batch_size': 128} noise, d1, d2, dist_diff, avg_dist_lat = model_eval_ae(sess, x, x_t,recon, x_train, x_train, args=eval_params) print("reconstruction distance: ", d1) # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, #'train_dir': train_dir_ae, #'filename': filename } rng = np.random.RandomState([2017, 8, 30]) #if not os.path.exists(train_dir_ae): # os.mkdir(train_dir_ae) #ckpt = tf.train.get_checkpoint_state(train_dir_ae) #print(train_dir_ae, ckpt) #ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path if clean_train_vae==True: print("Training VAE") loss = vae_loss(wrap_ae) train_ae(sess, loss, x_train, x_train, tf.train.AdamOptimizer(learning_rate=0.0002, beta1=0.5), evaluate=evaluate_ae, args=train_params, rng=rng, var_list = wrap_ae.get_params()) saver = tf.train.Saver() saver.save(sess, "train_dir/model_vae_fgsm.ckpt") print("saved model") else: print("Loading VAE") saver = tf.train.Saver() #print(ckpt_path) saver.restore(sess, "train_dir/model_vae.ckpt") evaluate_ae() if(train_further): train_params = { 'nb_epochs': 10, 'batch_size': batch_size, 'learning_rate': 0.0002, } #training with the saved model as starting point loss = SquaredError(wrap_ae) train_ae(sess, loss, x_train, x_train, optimizer = tf.train.AdamOptimizer(learning_rate=0.0002, beta1=0.5), evaluate=evaluate_ae, args=train_params, rng=rng) saver = tf.train.Saver() saver.save(sess, "train_dir/model_vae_fgsm.ckpt") evaluate_ae() print("Model loaded and trained for more epochs") num_classes = 10 ''' save_dir= 'models' model_name = 'cifar10_CNN.h5' model_path_cls = os.path.join(save_dir, model_name) ''' cl_model = cnn_cl_model(img_rows=img_rows, img_cols=img_cols, channels=nchannels, nb_filters=64, nb_classes=nb_classes) preds_cl = cl_model(x) def do_eval_cls(preds, x_set, y_set, x_tar_set,report_key, is_adv = None): acc = model_eval(sess, x, y, preds, x_t, x_set, y_set, x_tar_set, args=eval_params_cls) def evaluate(): # Evaluate the accuracy of the MNIST model on legitimate test examples eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds_cl,x_t, x_test, y_test, x_test,args=eval_params) report.clean_train_clean_eval = acc # assert X_test.shape[0] == test_end - test_start, X_test.shape print('Test accuracy on legitimate examples: %0.4f' % acc) train_params = { 'nb_epochs': 100, 'batch_size': batch_size, 'learning_rate': learning_rate, #'train_dir': train_dir_cl, #'filename': filename } rng = np.random.RandomState([2017, 8, 30]) wrap_cl = KerasModelWrapper(cl_model) if clean_train_cl == True: train_params = { 'nb_epochs': 5, 'batch_size': batch_size, 'learning_rate': learning_rate, #'train_dir': train_dir_cl, #'filename': filename } print("Training CNN Classifier") ''' datagen = ImageDataGenerator( rotation_range=15, width_shift_range=0.1, height_shift_range=0.1, horizontal_flip=True, ) datagen.fit(x_train) ''' loss_cl = CrossEntropy(wrap_cl, smoothing=label_smoothing) #for x_batch, y_batch in datagen.flow(x_train, y_train, batch_size = 128): # train(sess, loss_cl, x_batch, y_batch, tf.train.AdamOptimizer(learning_rate=0.0002, beta1=0.5), evaluate=evaluate, # args=train_params, rng=rng) train(sess, loss_cl, x_train, y_train, evaluate=evaluate, optimizer = tf.train.RMSPropOptimizer(learning_rate = 0.0001, decay = 1e-6), args=train_params, rng=rng) saver = tf.train.Saver() saver.save(sess, "train_dir/model_cnn_cl.ckpt") print("saved model at ", "train_dir/model_cnn_cl_fgsm.ckpt") else: print("Loading CNN Classifier") saver = tf.train.Saver() #print(ckpt_path) saver.restore(sess, "train_dir/model_cnn_cl.ckpt") evaluate() if(train_further): train_params = { 'nb_epochs': 10, 'batch_size': batch_size, 'learning_rate': learning_rate, 'train_dir': train_dir_cl, 'filename': filename } loss_cl = CrossEntropy(wrap_cl, smoothing=label_smoothing) train(sess, loss_cl, x_train, y_train, evaluate=evaluate, optimizer = tf.train.RMSPropOptimizer(learning_rate = 0.0001, decay = 1e-6), args=train_params, rng=rng) saver = tf.train.Saver() saver.save(sess, "train_dir/model_cl_fgsm.ckpt") print("Model loaded and trained further") evaluate() ########################################################################### # Craft adversarial examples using Carlini and Wagner's approach ########################################################################### nb_adv_per_sample = str(nb_classes - 1) if targeted else '1' print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample + ' adversarial examples') print("This could take some time ...") # Instantiate a CW attack object #cw = CarliniWagnerAE(wrap_ae,wrap_cl, sess=sess) if viz_enabled: assert source_samples == nb_classes idxs = [np.where(np.argmax(y_test, axis=1) == i)[0][0] for i in range(nb_classes)] if targeted: if viz_enabled: # Initialize our array for grid visualization grid_shape = (nb_classes, nb_classes, img_rows, img_cols, nchannels) grid_viz_data = np.zeros(grid_shape, dtype='f') grid_viz_data_1 = np.zeros(grid_shape, dtype='f') adv_inputs = np.array( [[instance] * (nb_classes-1) for instance in x_test[idxs]], dtype=np.float32) #adv_input_y = np.array([[instance]*(nb_classes-1) for instance in y_test[idxs]]) adv_input_y = [] for curr_num in range(nb_classes): targ = [] for id in range(nb_classes-1): targ.append(y_test[idxs[curr_num]]) adv_input_y.append(targ) adv_input_y = np.array(adv_input_y) adv_target_y = [] for curr_num in range(nb_classes): targ = [] for id in range(nb_classes): if(id!=curr_num): targ.append(y_test[idxs[id]]) adv_target_y.append(targ) adv_target_y = np.array(adv_target_y) #print("adv_input_y: \n", adv_input_y) #print("adv_target_y: \n", adv_target_y) adv_input_targets = [] for curr_num in range(nb_classes): targ = [] for id in range(nb_classes): if(id!=curr_num): targ.append(x_test[idxs[id]]) adv_input_targets.append(targ) adv_input_targets = np.array(adv_input_targets) adv_inputs = adv_inputs.reshape( (source_samples * (nb_classes-1), img_rows, img_cols, nchannels)) adv_input_targets = adv_input_targets.reshape( (source_samples * (nb_classes-1), img_rows, img_cols, nchannels)) adv_input_y = adv_input_y.reshape(source_samples*(nb_classes-1), 10) adv_target_y = adv_target_y.reshape(source_samples*(nb_classes-1), 10) one_hot = np.zeros((nb_classes, nb_classes)) one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1 adv_ys = np.array([one_hot] * source_samples, dtype=np.float32).reshape((source_samples * nb_classes, nb_classes)) yname = "y_target" fgsm_params = { 'eps': 0.3, 'clip_min': 0., 'clip_max': 1. } fgsm = FastGradientMethodAe(wrap_ae, sess=sess) adv = fgsm.generate(x,x_t, **fgsm_params) adv = sess.run(adv, {x: adv_inputs, x_t: adv_input_targets}) recon_orig = wrap_ae.get_layer(x, 'RECON') recon_orig = sess.run(recon_orig, feed_dict = {x: adv_inputs}) recon_adv = wrap_ae.get_layer(x, 'RECON') recon_adv = sess.run(recon_adv, feed_dict = {x: adv}) pred_adv_recon = wrap_cl.get_logits(x) pred_adv_recon = sess.run(pred_adv_recon, {x:recon_adv}) #scores1 = cl_model.evaluate(recon_adv, adv_input_y, verbose=1) #scores2 = cl_model.evaluate(recon_adv, adv_target_y, verbose = 1) #acc_1 = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_target_y, adv_input_targets, args=eval_params_cls) #acc_2 = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_input_y, adv_input_targets, args=eval_params_cls) shape = np.shape(adv_inputs) noise = np.sum(np.square(adv-adv_inputs))/(np.shape(adv)[0]) noise = pow(noise,0.5) d1 = np.sum(np.square(recon_adv-adv_inputs))/(np.shape(adv_inputs)[0]) d2 = np.sum(np.square(recon_adv-adv_input_targets))/(np.shape(adv_inputs)[0]) acc_1 = (sum(np.argmax(pred_adv_recon, axis=-1)== np.argmax(adv_target_y, axis=-1)))/(np.shape(adv_target_y)[0]) acc_2 = (sum(np.argmax(pred_adv_recon, axis=-1)== np.argmax(adv_input_y, axis=-1)))/(np.shape(adv_target_y)[0]) print("noise: ", noise) print("d1: ", d1) print("d2: ", d2) print("classifier acc_target: ", acc_1) print("classifier acc_true: ", acc_2) #print("recon_adv[0]\n", recon_adv[0,:,:,0]) curr_class = 0 if viz_enabled: for j in range(nb_classes): if targeted: for i in range(nb_classes): #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i] if(i==j): grid_viz_data[i,j] = recon_orig[curr_class*9] grid_viz_data_1[i,j] = adv_inputs[curr_class*9] curr_class = curr_class+1 else: if(j>i): grid_viz_data[i,j] = recon_adv[i*(nb_classes-1) + j-1] grid_viz_data_1[i,j] = adv[i*(nb_classes-1)+j-1] else: grid_viz_data[i,j] = recon_adv[i*(nb_classes-1) + j] grid_viz_data_1[i,j] = adv[i*(nb_classes-1)+j] #rint(grid_viz_data.shape) print('--------------------------------------') # Compute the number of adversarial examples that were successfully found # Compute the average distortion introduced by the algorithm percent_perturbed = np.mean(np.sum((adv - adv_inputs)**2, axis=(1, 2, 3))**.5) print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed)) # Finally, block & display a grid of all the adversarial examples if viz_enabled: plt.ioff() figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') # Add the images to the plot num_cols = grid_viz_data.shape[0] num_rows = grid_viz_data.shape[1] num_channels = grid_viz_data.shape[4] for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols)) plt.axis('off') plt.imshow(grid_viz_data[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_fgsm_vae_fig1') figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols)) plt.axis('off') plt.imshow(grid_viz_data_1[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_fgsm_vae_fig2') if adversarial_training: print("starting adversarial training") index_shuf = list(range(len(x_train))) x_train_target = x_train[index_shuf] y_train_target = y_train[index_shuf] # Randomly repeat a few training examples each epoch to avoid # having a too-small batch ''' while len(index_shuf) % batch_size != 0: index_shuf.append(rng.randint(len(x_train))) nb_batches = len(index_shuf) // batch_size rng.shuffle(index_shuf) # Shuffling here versus inside the loop doesn't seem to affect # timing very much, but shuffling here makes the code slightly # easier to read ''' print("len of x_train_target and x_train: ", len(x_train_target), len(x_train)) for ind in range (0, len(x_train)): r_ind = -1 while(np.argmax(y_train_target[ind])==np.argmax(y_train[ind])): r_ind = rng.randint(0,len(x_train)) y_train_target[ind] = y_train[r_ind] if r_ind>-1: x_train_target[ind] = x_train[r_ind] wrap_ae2 = ModelVAE('wrap_ae2') fgsm2 = FastGradientMethodAe(wrap_ae2, sess=sess) adv2 = fgsm.generate(x,x_t, **fgsm_params) adv_set = sess.run(adv2, {x: x_train, x_t: x_train_target}) x_train_aim = np.append(x_train, x_train, axis = 0) x_train_app = np.append(x_train, adv_set, axis = 0) loss2 = vae_loss(wrap_ae2) train_params = { 'nb_epochs': 5, 'batch_size': batch_size, 'learning_rate': learning_rate} train_ae(sess, loss2, x_train_app, x_train_aim, tf.train.AdamOptimizer(learning_rate=0.0002, beta1=0.5), args=train_params, rng=rng, var_list = wrap_ae2.get_params()) evaluate_ae() adv3 = fgsm2.generate(x, x_t, **fgsm_params) adv3 = sess.run(adv3, {x: adv_inputs, x_t: adv_input_targets}) recon_orig2 = wrap_ae2.get_layer(x, 'RECON') recon_orig2 = sess.run(recon_orig2, feed_dict = {x: adv_inputs}) recon_adv2 = wrap_ae2.get_layer(x, 'RECON') recon_adv2 = sess.run(recon_adv2, feed_dict = {x: adv3}) pred_adv_recon2 = wrap_cl.get_logits(x) pred_adv_recon2 = sess.run(pred_adv_recon2, {x:recon_adv2}) shape = np.shape(adv_inputs) noise = np.sum(np.square(adv3-adv_inputs))/(np.shape(adv3)[0]) noise = pow(noise,0.5) d1 = np.sum(np.square(recon_adv2-adv_inputs))/(np.shape(adv_inputs)[0]) d2 = np.sum(np.square(recon_adv2-adv_input_targets))/(np.shape(adv_inputs)[0]) acc_1 = (sum(np.argmax(pred_adv_recon2, axis=-1)== np.argmax(adv_target_y, axis=-1)))/(np.shape(adv_target_y)[0]) acc_2 = (sum(np.argmax(pred_adv_recon2, axis=-1)== np.argmax(adv_input_y, axis=-1)))/(np.shape(adv_target_y)[0]) print("noise: ", noise) print("d1: ", d1) print("d2: ", d2) print("classifier acc_target: ", acc_1) print("classifier acc_true: ", acc_2) #print("recon_adv[0]\n", recon_adv[0,:,:,0]) curr_class = 0 if viz_enabled: for j in range(nb_classes): if targeted: for i in range(nb_classes): #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i] if(i==j): grid_viz_data[i,j] = recon_orig2[curr_class*9] grid_viz_data_1[i,j] = adv_inputs[curr_class*9] curr_class = curr_class+1 else: if(j>i): grid_viz_data[i,j] = recon_adv2[i*(nb_classes-1) + j-1] grid_viz_data_1[i,j] = adv3[i*(nb_classes-1)+j-1] else: grid_viz_data[i,j] = recon_adv2[i*(nb_classes-1) + j] grid_viz_data_1[i,j] = adv3[i*(nb_classes-1)+j] #rint(grid_viz_data.shape) print('--------------------------------------') # Compute the number of adversarial examples that were successfully found # Compute the average distortion introduced by the algorithm percent_perturbed = np.mean(np.sum((adv - adv_inputs)**2, axis=(1, 2, 3))**.5) print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed)) # Finally, block & display a grid of all the adversarial examples if viz_enabled: plt.ioff() figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') # Add the images to the plot num_cols = grid_viz_data.shape[0] num_rows = grid_viz_data.shape[1] num_channels = grid_viz_data.shape[4] for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols)) plt.axis('off') plt.imshow(grid_viz_data[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_vae_fgsm_adv_fig1') figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols)) plt.axis('off') plt.imshow(grid_viz_data_1[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_vae_fgsm_adv_fig2') #return report if binarization: print("----------------") print("BINARIZATION") adv[adv>0.5] = 1.0 adv[adv<=0.5] = 0.0 recon_orig = wrap_ae.get_layer(x, 'RECON') recon_adv = wrap_ae.get_layer(x, 'RECON') #pred_adv = wrap_cl.get_logits(x) recon_orig = sess.run(recon_orig, {x: adv_inputs}) recon_adv = sess.run(recon_adv, {x: adv}) #pred_adv = sess.run(pred_adv, {x: recon_adv}) pred_adv_recon = wrap_cl.get_logits(x) pred_adv_recon = sess.run(pred_adv_recon, {x:recon_adv}) eval_params = {'batch_size': 90} if targeted: noise = np.sum(np.square(adv-adv_inputs))/(np.shape(adv)[0]) noise = pow(noise,0.5) d1 = np.sum(np.square(recon_adv-adv_inputs))/(np.shape(adv_inputs)[0]) d2 = np.sum(np.square(recon_adv-adv_input_targets))/(np.shape(adv_inputs)[0]) acc_1 = (sum(np.argmax(pred_adv_recon, axis=-1)== np.argmax(adv_target_y, axis=-1)))/(np.shape(adv_target_y)[0]) acc_2 = (sum(np.argmax(pred_adv_recon, axis=-1)== np.argmax(adv_input_y, axis=-1)))/(np.shape(adv_target_y)[0]) print("noise: ", noise) print("d1: ", d1) print("d2: ", d2) print("classifier acc_target: ", acc_1) print("classifier acc_true: ", acc_2) curr_class = 0 if viz_enabled: for j in range(nb_classes): for i in range(nb_classes): #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i] if(i==j): grid_viz_data[i,j] = recon_orig[curr_class*9] grid_viz_data_1[i,j] = adv_inputs[curr_class*9] curr_class = curr_class+1 else: if(j>i): grid_viz_data[i,j] = recon_adv[i*(nb_classes-1) + j-1] grid_viz_data_1[i,j] = adv[i*(nb_classes-1)+j-1] else: grid_viz_data[i,j] = recon_adv[i*(nb_classes-1) + j] grid_viz_data_1[i,j] = adv[i*(nb_classes-1)+j] plt.ioff() figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') # Add the images to the plot num_cols = grid_viz_data.shape[0] num_rows = grid_viz_data.shape[1] num_channels = grid_viz_data.shape[4] for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy* num_cols)) plt.axis('off') if num_channels == 1: plt.imshow(grid_viz_data[xx, yy, :, :, 0]) else: plt.imshow(grid_viz_data[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_fgsm_vae_fig1_bin') figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols)) plt.axis('off') if num_channels == 1: plt.imshow(grid_viz_data_1[xx, yy, :, :, 0]) else: plt.imshow(grid_viz_data_1[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_fgsm_vae_fig2_bin') if(mean_filtering ==True): print("----------------") print("MEAN FILTERING") adv = uniform_filter(adv, 2) recon_orig = wrap_ae.get_layer(x, 'RECON') recon_adv = wrap_ae.get_layer(x, 'RECON') pred_adv_recon = wrap_cl.get_logits(x) recon_orig = sess.run(recon_orig, {x: adv_inputs}) recon_adv = sess.run(recon_adv, {x: adv}) pred_adv_recon = sess.run(pred_adv_recon, {x: recon_adv}) eval_params = {'batch_size': 90} noise = np.sum(np.square(adv-adv_inputs))/(np.shape(adv)[0]) noise = pow(noise,0.5) d1 = np.sum(np.square(recon_adv-adv_inputs))/(np.shape(adv_inputs)[0]) d2 = np.sum(np.square(recon_adv-adv_input_targets))/(np.shape(adv_inputs)[0]) acc_1 = (sum(np.argmax(pred_adv_recon, axis=-1)== np.argmax(adv_target_y, axis=-1)))/(np.shape(adv_target_y)[0]) acc_2 = (sum(np.argmax(pred_adv_recon, axis=-1)== np.argmax(adv_input_y, axis=-1)))/(np.shape(adv_target_y)[0]) print("noise: ", noise) print("d1: ", d1) print("d2: ", d2) print("classifier acc_target: ", acc_1) print("classifier acc_true: ", acc_2) curr_class = 0 if viz_enabled: for j in range(nb_classes): for i in range(nb_classes): #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i] if(i==j): grid_viz_data[i,j] = recon_orig[curr_class*9] grid_viz_data_1[i,j] = adv_inputs[curr_class*9] curr_class = curr_class+1 else: if(j>i): grid_viz_data[i,j] = recon_adv[i*(nb_classes-1) + j-1] grid_viz_data_1[i,j] = adv[i*(nb_classes-1)+j-1] else: grid_viz_data[i,j] = recon_adv[i*(nb_classes-1) + j] grid_viz_data_1[i,j] = adv[i*(nb_classes-1)+j] plt.ioff() figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') # Add the images to the plot num_cols = grid_viz_data.shape[0] num_rows = grid_viz_data.shape[1] num_channels = grid_viz_data.shape[4] for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy* num_cols)) plt.axis('off') if num_channels == 1: plt.imshow(grid_viz_data[xx, yy, :, :, 0]) else: plt.imshow(grid_viz_data[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_fgsm_vae_fig1_mean') figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols)) plt.axis('off') if num_channels == 1: plt.imshow(grid_viz_data_1[xx, yy, :, :, 0]) else: plt.imshow(grid_viz_data_1[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_fgsm_vae_fig2_mean')
def init_defense(sess, x, preds, batch_size, multi_noise=False): data = CIFAR10() dataset_size = data.x_train.shape[0] dataset_train = data.to_tensorflow()[0] dataset_train = dataset_train.map( lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4) dataset_train = dataset_train.batch(batch_size) dataset_train = dataset_train.prefetch(16) x_train, y_train = data.get_set('train') x_train *= 255 nb_classes = y_train.shape[1] n_collect = 1000 p_ratio_cutoff = .999 just_detect = True clip_alignments = True fit_classifier = True noise_eps = 'n30.0' num_noise_samples = 256 if multi_noise: noises = 'n0.003,s0.003,u0.003,n0.005,s0.005,u0.005,s0.008,n0.008,u0.008'.split( ',') noise_eps_detect = [] for n in noises: new_noise = n[0] + str(float(n[1:]) * 255) noise_eps_detect.append(new_noise) else: noise_eps_detect = 'n30.0' # these attack parameters are just for initializing the defense eps = 8.0 pgd_params = { 'eps': eps, 'eps_iter': (eps / 5), 'nb_iter': 10, 'clip_min': 0, 'clip_max': 255 } logits_op = preds.op while logits_op.type != 'MatMul': logits_op = logits_op.inputs[0].op latent_x_tensor, weights = logits_op.inputs logits_tensor = preds predictor = tf_robustify.collect_statistics( x_train[:n_collect], y_train[:n_collect], x, sess, logits_tensor=logits_tensor, latent_x_tensor=latent_x_tensor, weights=weights, nb_classes=nb_classes, p_ratio_cutoff=p_ratio_cutoff, noise_eps=noise_eps, noise_eps_detect=noise_eps_detect, pgd_eps=pgd_params['eps'], pgd_lr=pgd_params['eps_iter'] / pgd_params['eps'], pgd_iters=pgd_params['nb_iter'], save_alignments_dir=None, load_alignments_dir=None, clip_min=pgd_params['clip_min'], clip_max=pgd_params['clip_max'], batch_size=batch_size, num_noise_samples=num_noise_samples, debug_dict=None, debug=False, targeted=False, pgd_train=None, fit_classifier=fit_classifier, clip_alignments=clip_alignments, just_detect=just_detect) next(predictor) return predictor
def cifar10_tutorial(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, model_path=MODEL_PATH, learning_rate=LEARNING_RATE, clean_train=CLEAN_TRAIN, testing=False, backprop_through_attack=BACKPROP_THROUGH_ATTACK, nb_filters=NB_FILTERS, num_threads=None, label_smoothing=0.1): """ CIFAR10 cleverhans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param clean_train: perform normal training on clean examples only before performing adversarial training. :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate :param backprop_through_attack: If True, backprop through adversarial example construction process during adversarial training. :param label_smoothing: float, amount of label smoothing for cross entropy :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {} sess = tf.Session(config=tf.ConfigProto(**config_args)) # Get CIFAR10 data data = CIFAR10(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) dataset_size = data.x_train.shape[0] dataset_train = data.to_tensorflow()[0] dataset_train = dataset_train.map( lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4) dataset_train = dataset_train.batch(batch_size) dataset_train = dataset_train.prefetch(16) x_train, y_train = data.get_set('train') x_test, y_test = data.get_set('test') # Use Image Parameters img_rows, img_cols, nchannels = x_test.shape[1:4] nb_classes = y_test.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'filename': os.path.split(model_path)[-1] } eval_params = {'batch_size': batch_size} rng = np.random.RandomState([2017, 8, 30]) def do_eval(preds, x_set, y_set, report_key, is_adv=None): acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params) setattr(report, report_key, acc) if is_adv is None: report_text = None elif is_adv: report_text = 'adversarial' else: report_text = 'legitimate' if report_text: print('Test accuracy on %s examples: %0.4f' % (report_text, acc)) if clean_train: print('start') #model = CNN('model1', nb_classes, isL2 = True) model = make_wresnet(scope='model1') preds = model.get_logits(x) loss = CrossEntropy(model, smoothing=label_smoothing) tf_model_load( sess, '/nfs/nas4/data-hanwei/data-hanwei/DATA/models/wresnet/cifar1') def evaluate(): do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False) optimizer = tf.train.MomentumOptimizer(learning_rate=0.0008, momentum=0) #optimizer = tf.train.MomentumOptimizer(learning_rate=0.0008,momentum=0.9) #optimizer = tf.train.MomentumOptimizer(learning_rate=0.001,momentum=0.9) train(sess, x, y, model, None, None, dataset_train=dataset_train, dataset_size=dataset_size, evaluate=evaluate, args=train_params, rng=rng, var_list=model.get_params(), optimizer=optimizer) saver = tf.train.Saver() saver.save(sess, model_path) # Calculate training error if testing: do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval') return report
def train_deflecting(dataset_name=DATASET, train_start=0, train_end=TRAIN_END, test_start=0, test_end=TEST_END, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, num_capsules_output=NUM_CAPSULES_OUTPUT, output_atoms = OUTPUT_ATOMS, num_routing = NUM_ROUTING, learning_rate=LEARNING_RATE, nb_filters=NB_FILTERS, num_threads=None): """ SVHN cleverhans tutorial to train a deflecting model based on CapsLayer :dataset_name: SVHN or CIFAR10 :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param num_capsules_output: number of output capsules :param output_atoms: size of each capsule vector :param num_routing: number of routings in capsule layer :param learning_rate: learning rate for training """ # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Create TF session if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {} sess = tf.Session(config=tf.ConfigProto(**config_args)) # Get svhn data if dataset_name == "SVHN": data = SVHN(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) elif dataset_name == "CIFAR10": data = CIFAR10(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) dataset_size = data.x_train.shape[0] dataset_train = data.to_tensorflow()[0] if dataset_name == "SVHN": dataset_train = dataset_train.map(lambda x, y: (random_shift((x)), y), 4) elif dataset_name == "CIFAR10": dataset_train = dataset_train.map( lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4) dataset_train = dataset_train.batch(batch_size) dataset_train = dataset_train.prefetch(16) x_train, y_train = data.get_set('train') x_test, y_test = data.get_set('test') # Use Image Parameters img_rows, img_cols, nchannels = x_test.shape[1:4] nb_classes = y_test.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } eval_params = {'batch_size': batch_size} rng = np.random.RandomState([2017, 8, 30]) model = CapsNetRecons(dataset_name, nb_classes, nb_filters, input_shape=[batch_size, img_rows, img_cols, nchannels], num_capsules_output=num_capsules_output, output_atoms=output_atoms, num_routing=num_routing) var_lists = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=dataset_name) preds = model.get_logits(x) loss = MarginCycLoss(model) def evaluate(): acc = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params) print('Test accuracy on %s examples: %0.4f' % ("clean", acc)) return acc train(sess, loss, None, None, dataset_train=dataset_train, dataset_size=dataset_size, evaluate=evaluate, args=train_params, rng=rng, var_list=var_lists)
def cifar10_tutorial_bim(train_start=0, train_end=60000, test_start=0, test_end=10000, viz_enabled=VIZ_ENABLED, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, source_samples=SOURCE_SAMPLES, learning_rate=LEARNING_RATE, attack_iterations=ATTACK_ITERATIONS, model_path=MODEL_PATH, targeted=TARGETED, noise_output=NOISE_OUTPUT): """ CIFAR10 tutorial for Basic Iterative Method's attack :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param viz_enabled: (boolean) activate plots of adversarial examples :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param nb_classes: number of output classes :param source_samples: number of test inputs to attack :param learning_rate: learning rate for training :param model_path: path to the model file :param targeted: should we run a targeted attack? or untargeted? :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Create TF session sess = tf.Session() print("Created TensorFlow session.") set_log_level(logging.DEBUG) # Get CIFAR10 test data cifar10 = CIFAR10(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) x_train, y_train = cifar10.get_set('train') x_test, y_test = cifar10.get_set('test') # Obtain Image Parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) nb_filters = 64 # Define TF model graph model = ModelAllConvolutional('model1', nb_classes, nb_filters, input_shape=[32, 32, 3]) preds = model.get_logits(x) loss = CrossEntropy(model, smoothing=0.1) print("Defined TensorFlow model graph.") ########################################################################### # Training the model using TensorFlow ########################################################################### # Train an CIFAR10 model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'filename': os.path.split(model_path)[-1] } rng = np.random.RandomState([2017, 8, 30]) # check if we've trained before, and if we have, use that pre-trained model if os.path.exists(model_path + ".meta"): tf_model_load(sess, model_path) else: train(sess, loss, x_train, y_train, args=train_params, rng=rng) saver = tf.train.Saver() saver.save(sess, model_path) # Evaluate the accuracy of the CIFAR10 model on legitimate test examples eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params) assert x_test.shape[0] == test_end - test_start, x_test.shape print('Test accuracy on legitimate test examples: {0}'.format(accuracy)) report.clean_train_clean_eval = accuracy ########################################################################### # Craft adversarial examples using Basic Iterative Method's approach ########################################################################### nb_adv_per_sample = str(nb_classes - 1) if targeted else '1' print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample + ' adversarial examples') print("This could take some time ...") # Instantiate a BIM attack object bim = BasicIterativeMethod(model, sess=sess) if viz_enabled: assert source_samples == nb_classes idxs = [ np.where(np.argmax(y_test, axis=1) == i)[0][0] for i in range(nb_classes) ] if targeted: if viz_enabled: # Initialize our array for grid visualization grid_shape = (nb_classes, 1, img_rows, img_cols, nchannels) grid_viz_data = np.zeros(grid_shape, dtype='f') adv_inputs = np.array([[instance] * nb_classes for instance in x_test[idxs]], dtype=np.float32) else: adv_inputs = np.array([[instance] * nb_classes for instance in x_test[:source_samples]], dtype=np.float32) one_hot = np.zeros((nb_classes, nb_classes)) one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1 adv_inputs = adv_inputs.reshape( (source_samples * nb_classes, img_rows, img_cols, nchannels)) adv_ys = np.array([one_hot] * source_samples, dtype=np.float32).reshape( (source_samples * nb_classes, nb_classes)) else: if viz_enabled: # Initialize our array for grid visualization grid_shape = (nb_classes, nb_classes, img_rows, img_cols, nchannels) grid_viz_data = np.zeros(grid_shape, dtype='f') adv_inputs = x_test[idxs] else: adv_inputs = x_test[:source_samples] adv_ys = None bim_params = { 'eps': 0.3, 'clip_min': 0., 'clip_max': 1., 'nb_iter': 50, 'eps_iter': .01 } adv = bim.generate_np(adv_inputs, **bim_params) eval_params = {'batch_size': np.minimum(nb_classes, source_samples)} if targeted: adv_accuracy = model_eval(sess, x, y, preds, adv, adv_ys, args=eval_params) else: if viz_enabled: err = model_eval(sess, x, y, preds, adv, y_test[idxs], args=eval_params) adv_accuracy = 1 - err else: err = model_eval(sess, x, y, preds, adv, y_test[:source_samples], args=eval_params) adv_accuracy = 1 - err if viz_enabled: for i in range(nb_classes): if noise_output: image = adv[i * nb_classes] - adv_inputs[i * nb_classes] else: image = adv[i * nb_classes] grid_viz_data[i, 0] = image print('--------------------------------------') # Compute the number of adversarial examples that were successfully found print('Avg. rate of successful adv. examples {0:.4f}'.format(adv_accuracy)) report.clean_train_adv_eval = 1. - adv_accuracy # Compute the average distortion introduced by the algorithm percent_perturbed = np.mean( np.sum((adv - adv_inputs)**2, axis=(1, 2, 3))**.5) print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed)) # Close TF session sess.close() def save_visual(data, path): """ Modified version of cleverhans.plot.pyplot """ figure = plt.figure() # figure.canvas.set_window_title('Cleverhans: Grid Visualization') # Add the images to the plot num_cols = data.shape[0] num_rows = data.shape[1] num_channels = data.shape[4] for y in range(num_rows): for x in range(num_cols): figure.add_subplot(num_rows, num_cols, (x + 1) + (y * num_cols)) plt.axis('off') if num_channels == 1: plt.imshow(data[x, y, :, :, 0], cmap='gray') else: plt.imshow(data[x, y, :, :, :]) # Draw the plot and return plt.savefig(path) return figure # Finally, block & display a grid of all the adversarial examples if viz_enabled: if noise_output: image_name = "output/bim_cifar10_noise.png" else: image_name = "output/bim_cifar10.png" _ = save_visual(grid_viz_data, image_name) return report
def cifar10_tutorial(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, clean_train=CLEAN_TRAIN, testing=False, backprop_through_attack=BACKPROP_THROUGH_ATTACK, nb_filters=NB_FILTERS, num_threads=None, label_smoothing=0.1, retrain=False): """ CIFAR10 cleverhans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param clean_train: perform normal training on clean examples only before performing adversarial training. :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate :param backprop_through_attack: If True, backprop through adversarial example construction process during adversarial training. :param label_smoothing: float, amount of label smoothing for cross entropy :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {} sess = tf.Session(config=tf.ConfigProto(**config_args)) # Get CIFAR10 data data = CIFAR10(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) dataset_size = data.x_train.shape[0] dataset_train = data.to_tensorflow()[0] dataset_train = dataset_train.map( lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4) dataset_train = dataset_train.batch(batch_size) dataset_train = dataset_train.prefetch(16) x_train, y_train = data.get_set('train') x_test, y_test = data.get_set('test') # start = 6 # end = 10 # x_test = x_test[start:end] # y_test = y_test[start:end] ########################### # Adjust hue / saturation # ########################### # hueValue = 0.9 # saturationValue = 0.9 # tf_x_test = tf.image.adjust_saturation(tf.image.adjust_hue(x_test, saturationValue), hueValue) # tf_x_test = tf.image.adjust_saturation(tx_test, hueValue) # x_test = sess.run(tf_x_test) ############################### # Transform image to uniimage # ############################### # x_train = convert_uniimage(x_train) # Use Image Parameters img_rows, img_cols, nchannels = x_test.shape[1:4] nb_classes = y_test.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'train_dir': save_dir, 'filename': filename, } eval_params = {'batch_size': batch_size} fgsm_params = {'eps': 8 / 255, 'clip_min': 0., 'clip_max': 1.} rng = np.random.RandomState([2017, 8, 30]) def do_eval(preds, x_set, y_set, report_key, is_adv=None, ae=None, type=None, datasetName=None, discretizeColor=1): accuracy, distortion = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params, is_adv=is_adv, ae=ae, type=type, datasetName=datasetName, discretizeColor=discretizeColor) setattr(report, report_key, accuracy) if is_adv is None: report_text = None elif is_adv: report_text = 'adversarial' else: report_text = 'legitimate' if report_text: print('Test accuracy on %s examples: %0.4f' % (report_text, accuracy)) return accuracy, distortion if clean_train: model = ModelAllConvolutional('model1', nb_classes, nb_filters, input_shape=[32, 32, 3]) # model = UIPModel('model1', nb_classes, nb_filters, input_shape=[32, 32, 3]) preds = model.get_logits(x) loss = CrossEntropy(model, smoothing=label_smoothing) def evaluate(): do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False, type=type, datasetName="CIFAR10", discretizeColor=discretizeColor) # train(sess, loss, None, None, # dataset_train=dataset_train, dataset_size=dataset_size, # evaluate=evaluate, args=train_params, rng=rng, # var_list=model.get_params(), save=save) saveFileNumArr = [] # saveFileNumArr = [50, 500, 1000] count = 0 appendNum = 1000 while count < 1000: count = count + appendNum saveFileNumArr.append(count) distortionArr = [] accuracyArr = [] for i in range(len(saveFileNumArr)): saveFileNum = saveFileNumArr[i] model_path = os.path.join(save_dir, filename + "-" + str(saveFileNum)) print("Trying to load trained model from: " + model_path) if os.path.exists(model_path + ".meta"): tf_model_load(sess, model_path) print("Load trained model") else: train_with_noise(sess, loss, x_train, y_train, evaluate=evaluate, args=train_params, rng=rng, var_list=model.get_params(), save=save, type=type, datasetName="CIFAR10", retrain=retrain, discretizeColor=discretizeColor) retrain = False ########################################## # Generate semantic adversarial examples # ########################################## adv_x, y_test2 = color_shift_attack(sess, x, y, np.copy(x_test), np.copy(y_test), preds, args=eval_params, num_trials=num_trials) x_test2 = adv_x # convert_uniimage(np.copy(x_test2), np.copy(x_test), discretizeColor) accuracy, distortion = do_eval(preds, np.copy(x_test2), np.copy(y_test2), 'clean_train_clean_eval', False, type=type, datasetName="CIFAR10", discretizeColor=discretizeColor) # accuracy, distortion = do_eval(preds, np.copy(x_test), np.copy(y_test), 'clean_train_clean_eval', False, type=type, # datasetName="CIFAR10", discretizeColor=discretizeColor) # # Initialize the Fast Gradient Sign Method (FGSM) attack object and # # graph # fgsm = FastGradientMethod(model, sess=sess) # fgsm = BasicIterativeMethod(model, sess=sess) # fgsm = MomentumIterativeMethod(model, sess=sess) # adv_x = fgsm.generate(x, **fgsm_params) # preds_adv = model.get_logits(adv_x) # Evaluate the accuracy of the MNIST model on adversarial examples # accuracy, distortion = do_eval(preds_adv, x_test, y_test, 'clean_train_adv_eval', True, type=type) # accuracy, distortion = do_eval(preds, x_test, y_test, 'clean_train_adv_eval', True, ae=adv_x, type=type, # datasetName="CIFAR10", discretizeColor=discretizeColor) distortionArr.append(distortion) accuracyArr.append(accuracy) print(str(accuracy)) print(str(distortion)) print("accuracy:") for accuracy in accuracyArr: print(accuracy) print("distortion:") for distortion in distortionArr: print(distortion) # print("hue "+str(hueValue)) return report
def cifar10_tutorial(train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, clean_train=CLEAN_TRAIN, testing=False, backprop_through_attack=BACKPROP_THROUGH_ATTACK, nb_filters=NB_FILTERS, num_threads=None, label_smoothing=0.1, adversarial_training=ADVERSARIAL_TRAINING): """ CIFAR10 cleverhans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param clean_train: perform normal training on clean examples only before performing adversarial training. :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate :param backprop_through_attack: If True, backprop through adversarial example construction process during adversarial training. :param label_smoothing: float, amount of label smoothing for cross entropy :param adversarial_training: True means using adversarial training :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: # put data on cpu and gpu both config_args = dict(allow_soft_placement=True) sess = tf.Session(config=tf.ConfigProto(**config_args)) # Get CIFAR10 data data = CIFAR10(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) dataset_size = data.x_train.shape[0] dataset_train = data.to_tensorflow()[0] dataset_train = dataset_train.map( lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4) dataset_train = dataset_train.batch(batch_size) dataset_train = dataset_train.prefetch(16) x_train, y_train = data.get_set('train') x_test, y_test = data.get_set('test') # Use Image Parameters img_rows, img_cols, nchannels = x_test.shape[1:4] nb_classes = y_test.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } eval_params = {'batch_size': batch_size} bim_params = { 'eps': 0.5, 'clip_min': 0., 'eps_iter': 0.002, 'nb_iter': 10, 'clip_max': 1., 'ord': np.inf } rng = np.random.RandomState([2017, 8, 30]) def do_eval(preds, x_set, y_set, report_key, is_adv=None): acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params) setattr(report, report_key, acc) if is_adv is None: report_text = None elif is_adv: report_text = 'adversarial' else: report_text = 'legitimate' if report_text: print('Test accuracy on %s examples: %0.4f' % (report_text, acc)) if clean_train: model = ModelAllConvolutional('model1', nb_classes, nb_filters, input_shape=[32, 32, 3]) preds = model.get_logits(x) loss = CrossEntropy(model, smoothing=label_smoothing) def evaluate(): do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False) """ when training, evaluating can be happened """ train(sess, loss, None, None, dataset_train=dataset_train, dataset_size=dataset_size, evaluate=evaluate, args=train_params, rng=rng, var_list=model.get_params()) # save model # Calculate training error if testing: do_eval(preds, x_train, y_train, 'train_clean_train_clean_eval') # Initialize the Basic Iterative Method (BIM) attack object and # graph for i in range(20): bim = BasicIterativeMethod(model, sess=sess) adv_x = bim.generate(x, **bim_params) preds_adv = model.get_logits(adv_x) # Evaluate the accuracy of the MNIST model on adversarial examples print("eps:%0.2f" % (bim_params["eps_iter"] * bim_params['nb_iter'])) do_eval(preds_adv, x_test, y_test, 'clean_train_adv_eval', True) bim_params["eps_iter"] = bim_params["eps_iter"] + 0.002 # Calculate training error if testing: do_eval(preds_adv, x_train, y_train, 'train_clean_train_adv_eval') if not adversarial_training: return report print('Repeating the process, using adversarial training') # Create a new model and train it to be robust to BasicIterativeMethod model2 = ModelAllConvolutional('model2', nb_classes, nb_filters, input_shape=[32, 32, 3]) bim2 = BasicIterativeMethod(model2, sess=sess) def attack(x): return bim2.generate(x, **bim_params) # add attack to loss loss2 = CrossEntropy(model2, smoothing=label_smoothing, attack=attack) preds2 = model2.get_logits(x) adv_x2 = attack(x) if not backprop_through_attack: # For the fgsm attack used in this tutorial, the attack has zero # gradient so enabling this flag does not change the gradient. # For some other attacks, enabling this flag increases the cost of # training, but gives the defender the ability to anticipate how # the attacker will change their strategy in response to updates to # the defender's parameters. adv_x2 = tf.stop_gradient(adv_x2) preds2_adv = model2.get_logits(adv_x2) def evaluate2(): # Accuracy of adversarially trained model on legitimate test inputs do_eval(preds2, x_test, y_test, 'adv_train_clean_eval', False) # Accuracy of the adversarially trained model on adversarial examples do_eval(preds2_adv, x_test, y_test, 'adv_train_adv_eval', True) # Perform and evaluate adversarial training train(sess, loss2, None, None, dataset_train=dataset_train, dataset_size=dataset_size, evaluate=evaluate2, args=train_params, rng=rng, var_list=model2.get_params()) # Calculate training errors if testing: do_eval(preds2, x_train, y_train, 'train_adv_train_clean_eval') do_eval(preds2_adv, x_train, y_train, 'train_adv_train_adv_eval') return report
def cifar10_tutorial_jsma(train_start=0, train_end=60000, test_start=0, test_end=10000, viz_enabled=VIZ_ENABLED, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, source_samples=SOURCE_SAMPLES, learning_rate=LEARNING_RATE, model_path=MODEL_PATH, noise_output=NOISE_OUTPUT): """ CIFAR10 tutorial for the Jacobian-based saliency map approach (JSMA) :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param viz_enabled: (boolean) activate plots of adversarial examples :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param nb_classes: number of output classes :param source_samples: number of test inputs to attack :param learning_rate: learning rate for training :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Create TF session and set as Keras backend session sess = tf.Session() print("Created TensorFlow session.") set_log_level(logging.DEBUG) # Get CIFAR10 test data cifar10 = CIFAR10(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) x_train, y_train = cifar10.get_set('train') x_test, y_test = cifar10.get_set('test') # Obtain Image Parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) nb_filters = 64 # Define TF model graph model = ModelAllConvolutional('model1', nb_classes, nb_filters, input_shape=[32, 32, 3]) preds = model.get_logits(x) loss = CrossEntropy(model, smoothing=0.1) print("Defined TensorFlow model graph.") ########################################################################### # Training the model using TensorFlow ########################################################################### # Train an CIFAR10 model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'filename': os.path.split(model_path)[-1] } sess.run(tf.global_variables_initializer()) rng = np.random.RandomState([2017, 8, 30]) train(sess, loss, x_train, y_train, args=train_params, rng=rng) # Evaluate the accuracy of the CIFAR10 model on legitimate test examples eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params) assert x_test.shape[0] == test_end - test_start, x_test.shape print('Test accuracy on legitimate test examples: {0}'.format(accuracy)) report.clean_train_clean_eval = accuracy ########################################################################### # Craft adversarial examples using the Jacobian-based saliency map approach ########################################################################### print('Crafting ' + str(source_samples) + ' * ' + str(nb_classes - 1) + ' adversarial examples') # Keep track of success (adversarial example classified in target) results = np.zeros((nb_classes, source_samples), dtype='i') # Rate of perturbed features for each test set example and target class perturbations = np.zeros((nb_classes, source_samples), dtype='f') # Initialize our array for grid visualization grid_shape = (nb_classes, 1, img_rows, img_cols, nchannels) grid_viz_data = np.zeros(grid_shape, dtype='f') # Instantiate a SaliencyMapMethod attack object jsma = SaliencyMapMethod(model, sess=sess) jsma_params = { 'theta': 1., 'gamma': 0.1, 'clip_min': 0., 'clip_max': 1., 'y_target': None } # Loop over the samples we want to perturb into adversarial examples adv_all = np.zeros((nb_classes, img_rows, img_cols, nchannels), dtype='f') sample_all = np.zeros((nb_classes, img_rows, img_cols, nchannels), dtype='f') for sample_ind in xrange(0, source_samples): print('--------------------------------------') print('Attacking input %i/%i' % (sample_ind + 1, source_samples)) sample = x_test[sample_ind:(sample_ind + 1)] # We want to find an adversarial example for each possible target class # (i.e. all classes that differ from the label given in the dataset) current_class = int(np.argmax(y_test[sample_ind])) target_classes = other_classes(nb_classes, current_class) # For the grid visualization, keep original images along the diagonal # grid_viz_data[current_class, current_class, :, :, :] = np.reshape( # sample, (img_rows, img_cols, nchannels)) # Loop over all target classes for target in target_classes: print('Generating adv. example for target class %i' % target) # This call runs the Jacobian-based saliency map approach one_hot_target = np.zeros((1, nb_classes), dtype=np.float32) one_hot_target[0, target] = 1 jsma_params['y_target'] = one_hot_target adv_x = jsma.generate_np(sample, **jsma_params) adv_all[current_class] = adv_x sample_all[current_class] = sample # Check if success was achieved res = int(model_argmax(sess, x, preds, adv_x) == target) # Computer number of modified features adv_x_reshape = adv_x.reshape(-1) test_in_reshape = x_test[sample_ind].reshape(-1) nb_changed = np.where(adv_x_reshape != test_in_reshape)[0].shape[0] percent_perturb = float(nb_changed) / adv_x.reshape(-1).shape[0] # Display the original and adversarial images side-by-side # if viz_enabled: # figure = pair_visual( # np.reshape(sample, (img_rows, img_cols, nchannels)), # np.reshape(adv_x, (img_rows, img_cols, nchannels)), figure) # # Add our adversarial example to our grid data # grid_viz_data[target, current_class, :, :, :] = np.reshape( # adv_x, (img_rows, img_cols, nchannels)) # Update the arrays for later analysis results[target, sample_ind] = res perturbations[target, sample_ind] = percent_perturb print('--------------------------------------') # Compute the number of adversarial examples that were successfully found nb_targets_tried = ((nb_classes - 1) * source_samples) succ_rate = float(np.sum(results)) / nb_targets_tried print('Avg. rate of successful adv. examples {0:.4f}'.format(succ_rate)) report.clean_train_adv_eval = 1. - succ_rate # Compute the average distortion introduced by the algorithm percent_perturbed = np.mean(perturbations) print('Avg. rate of perturbed features {0:.4f}'.format(percent_perturbed)) # Compute the average distortion introduced for successful samples only percent_perturb_succ = np.mean(perturbations * (results == 1)) print('Avg. rate of perturbed features for successful ' 'adversarial examples {0:.4f}'.format(percent_perturb_succ)) # Compute the average distortion introduced by the algorithm l2_norm = np.mean(np.sum((adv_all - sample_all)**2, axis=(1, 2, 3))**.5) print('Avg. L_2 norm of perturbations {0:.4f}'.format(l2_norm)) for i in range(nb_classes): if noise_output: image = adv_all[i] - sample_all[i] else: image = adv_all[i] grid_viz_data[i, 0] = image # Close TF session sess.close() def save_visual(data, path): """ Modified version of cleverhans.plot.pyplot """ import matplotlib.pyplot as plt figure = plt.figure() # figure.canvas.set_window_title('Cleverhans: Grid Visualization') # Add the images to the plot num_cols = data.shape[0] num_rows = data.shape[1] num_channels = data.shape[4] for y in range(num_rows): for x in range(num_cols): figure.add_subplot(num_rows, num_cols, (x + 1) + (y * num_cols)) plt.axis('off') if num_channels == 1: plt.imshow(data[x, y, :, :, 0], cmap='gray') else: plt.imshow(data[x, y, :, :, :]) # Draw the plot and return plt.savefig(path) # Finally, block & display a grid of all the adversarial examples if viz_enabled: if noise_output: image_name = "output/jsma_cifar10_noise.png" else: image_name = "output/jsma_cifar10.png" _ = save_visual(grid_viz_data, image_name) return report
def cifar10_tutorial( train_start=0, train_end=60000, test_start=0, test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, clean_train=CLEAN_TRAIN, testing=False, backprop_through_attack=BACKPROP_THROUGH_ATTACK, nb_filters=NB_FILTERS, num_threads=None, label_smoothing=0.1, ): """ CIFAR10 cleverhans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param clean_train: perform normal training on clean examples only before performing adversarial training. :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate :param backprop_through_attack: If True, backprop through adversarial example construction process during adversarial training. :param label_smoothing: float, amount of label smoothing for cross entropy :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {} sess = tf.Session(config=tf.ConfigProto(**config_args)) # Get CIFAR10 data data = CIFAR10( train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end, ) dataset_size = data.x_train.shape[0] dataset_train = data.to_tensorflow()[0] dataset_train = dataset_train.map( lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4) dataset_train = dataset_train.batch(batch_size) dataset_train = dataset_train.prefetch(16) x_train, y_train = data.get_set("train") x_test, y_test = data.get_set("test") # Use Image Parameters img_rows, img_cols, nchannels = x_test.shape[1:4] nb_classes = y_test.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) # Train an MNIST model train_params = { "nb_epochs": nb_epochs, "batch_size": batch_size, "learning_rate": learning_rate, } eval_params = {"batch_size": batch_size} fgsm_params = {"eps": 0.3, "clip_min": 0.0, "clip_max": 1.0} rng = np.random.RandomState([2017, 8, 30]) def do_eval(preds, x_set, y_set, report_key, is_adv=None): acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params) setattr(report, report_key, acc) if is_adv is None: report_text = None elif is_adv: report_text = "adversarial" else: report_text = "legitimate" if report_text: print("Test accuracy on %s examples: %0.4f" % (report_text, acc)) if clean_train: model = ModelAllConvolutional("model1", nb_classes, nb_filters, input_shape=[32, 32, 3]) preds = model.get_logits(x) loss = CrossEntropy(model, smoothing=label_smoothing) def evaluate(): do_eval(preds, x_test, y_test, "clean_train_clean_eval", False) train( sess, loss, None, None, dataset_train=dataset_train, dataset_size=dataset_size, evaluate=evaluate, args=train_params, rng=rng, var_list=model.get_params(), ) # Calculate training error if testing: do_eval(preds, x_train, y_train, "train_clean_train_clean_eval") # Initialize the Fast Gradient Sign Method (FGSM) attack object and # graph fgsm = FastGradientMethod(model, sess=sess) adv_x = fgsm.generate(x, **fgsm_params) preds_adv = model.get_logits(adv_x) # Evaluate the accuracy of the MNIST model on adversarial examples do_eval(preds_adv, x_test, y_test, "clean_train_adv_eval", True) # Calculate training error if testing: do_eval(preds_adv, x_train, y_train, "train_clean_train_adv_eval") print("Repeating the process, using adversarial training") # Create a new model and train it to be robust to FastGradientMethod model2 = ModelAllConvolutional("model2", nb_classes, nb_filters, input_shape=[32, 32, 3]) fgsm2 = FastGradientMethod(model2, sess=sess) def attack(x): return fgsm2.generate(x, **fgsm_params) loss2 = CrossEntropy(model2, smoothing=label_smoothing, attack=attack) preds2 = model2.get_logits(x) adv_x2 = attack(x) if not backprop_through_attack: # For the fgsm attack used in this tutorial, the attack has zero # gradient so enabling this flag does not change the gradient. # For some other attacks, enabling this flag increases the cost of # training, but gives the defender the ability to anticipate how # the atacker will change their strategy in response to updates to # the defender's parameters. adv_x2 = tf.stop_gradient(adv_x2) preds2_adv = model2.get_logits(adv_x2) def evaluate2(): # Accuracy of adversarially trained model on legitimate test inputs do_eval(preds2, x_test, y_test, "adv_train_clean_eval", False) # Accuracy of the adversarially trained model on adversarial examples do_eval(preds2_adv, x_test, y_test, "adv_train_adv_eval", True) # Perform and evaluate adversarial training train( sess, loss2, None, None, dataset_train=dataset_train, dataset_size=dataset_size, evaluate=evaluate2, args=train_params, rng=rng, var_list=model2.get_params(), ) # Calculate training errors if testing: do_eval(preds2, x_train, y_train, "train_adv_train_clean_eval") do_eval(preds2_adv, x_train, y_train, "train_adv_train_adv_eval") return report
def cifar10_tutorial(train_start=0, train_end=50000, test_start=0, test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE, clean_train=CLEAN_TRAIN, testing=False, backprop_through_attack=BACKPROP_THROUGH_ATTACK, nb_filters=NB_FILTERS, num_threads=None, label_smoothing=0.1): """ CIFAR10 cleverhans tutorial :param train_start: index of first training set example :param train_end: index of last training set example :param test_start: index of first test set example :param test_end: index of last test set example :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :param clean_train: perform normal training on clean examples only before performing adversarial training. :param testing: if true, complete an AccuracyReport for unit tests to verify that performance is adequate :param backprop_through_attack: If True, backprop through adversarial example construction process during adversarial training. :param label_smoothing: float, amount of label smoothing for cross entropy :return: an AccuracyReport object """ # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) # Set logging level to see debug information set_log_level(logging.DEBUG) # Create TF session if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {} sess = tf.Session(config=tf.ConfigProto(**config_args)) # Get CIFAR10 data data = CIFAR10(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) dataset_size = data.x_train.shape[0] dataset_train = data.to_tensorflow()[0] dataset_train = dataset_train.map( lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4) dataset_train = dataset_train.batch(batch_size) dataset_train = dataset_train.prefetch(16) x_train, y_train = data.get_set('train') x_test, y_test = data.get_set('test') # Use Image Parameters img_rows, img_cols, nchannels = x_test.shape[1:4] nb_classes = y_test.shape[1] # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } eval_params = {'batch_size': batch_size} fgsm_params = {'eps': 0.13, 'clip_min': 0., 'clip_max': 1.} rng = np.random.RandomState([2017, 8, 30]) def do_eval(preds, x_set, y_set, report_key, is_adv=None): acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params) setattr(report, report_key, acc) if is_adv is None: report_text = None elif is_adv: report_text = 'adversarial' else: report_text = 'legitimate' if report_text: print('Test accuracy on %s examples: %0.4f' % (report_text, acc)) model = ModelAllConvolutional('model1', nb_classes, nb_filters, input_shape=[32, 32, 3]) preds = model.get_logits(x) if clean_train: loss = CrossEntropy(model, smoothing=label_smoothing) def evaluate(): do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False) train(sess, loss, None, None, dataset_train=dataset_train, dataset_size=dataset_size, evaluate=evaluate, args=train_params, rng=rng, var_list=model.get_params()) # save model #saver = tf.train.Saver() #saver.save(sess, "./checkpoint_dir/clean_model_100.ckpt") # load model and compute testing accuracy if testing: tf_model_load(sess, file_path="./checkpoint_dir/clean_model_100.ckpt") do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False) # Initialize the Fast Gradient Sign Method (FGSM) attack object and # graph fgsm = FastGradientMethod(model, sess=sess) adv_x = fgsm.generate(x, **fgsm_params) preds_adv = model.get_logits(adv_x) # Evaluate the accuracy of the CIFAR10 model on adversarial examples do_eval(preds_adv, x_test, y_test, 'clean_train_adv_eval', True) # generate and show adversarial samples x_test_adv = np.zeros(shape=x_test.shape) for i in range(10): x_test_adv[i * 1000:(i + 1) * 1000] = adv_x.eval( session=sess, feed_dict={x: x_test[i * 1000:(i + 1) * 1000]}) # implement anisotropic diffusion on adversarial samples x_test_filtered = np.zeros(shape=x_test_adv.shape) for i in range(y_test.shape[0]): x_test_filtered[i] = filter.anisotropic_diffusion(x_test_adv[i]) # implement median on adversarial samples # x_test_filtered_med = np.zeros(shape=x_test_adv.shape) # for i in range(y_test.shape[0]): # x_test_filtered_med[i] = medfilt(x_test_filtered_ad[i], kernel_size=(3,3,1)) acc = model_eval(sess, x, y, preds, x_test_filtered, y_test, args=eval_params) print("acc after anisotropic diffusion is {}".format(acc)) return report