def test_get_layer_names(self): model = KerasModelWrapper(self.model) layer_names = model.get_layer_names() self.assertEqual(layer_names, ['l1', 'l2', 'softmax'])
def test_get_layer_names(self): model = KerasModelWrapper(self.model) layer_names = model.get_layer_names() self.assertEqual(layer_names, ["l1", "l2", "softmax"])
class AdversarialExampleController( Observable, method='adversarialControllerChanged', changes={'busy_changed', 'data_changed', 'parameter_changed'}): """ Attributes ---------- _model: leverhans.model.Model The cleverhans model used to """ _nb_epochs = 6 _batch_size = 128 _learning_rate = 0.001 _train_dir = 'train_dir' _filename = 'mnist.ckpt' _testing = False _label_smoothing = 0.1 # FIXME[todo]: this needs to be initialized ... _runner: Runner = None def __init__(self): super().__init__() self._model = None self._loss = None self._input_placeholder = None self._label_placeholder = None self._preds = None self._graph = None self._sess = None self._busy = False self.load_mnist() # FIXME[hack] # FIXME[old]: check what is still needed from the following code # Object used to keep track of (and return) key accuracies self._report = AccuracyReport() # Set numpy random seed to improve reproducibility self._rng = np.random.RandomState([2017, 8, 30]) # Set TF random seed to improve reproducibility tf.set_random_seed(1234) self._train_params = { 'nb_epochs': self._nb_epochs, 'batch_size': self._batch_size, 'learning_rate': self._learning_rate, 'train_dir': self._train_dir, 'filename': self._filename } self._eval_params = {'batch_size': self._batch_size} if not os.path.exists(self._train_dir): os.mkdir(self._train_dir) self._ckpt = tf.train.get_checkpoint_state(self._train_dir) print(f"train_dir={self._train_dir}, chheckpoint={self._ckpt}") self._ckpt_path = False if self._ckpt is None else self._ckpt.model_checkpoint_path def init_from_keras_classifier(self, keras_classifier: KerasClassifier): self._graph = keras_classifier.graph self._sess = keras_classifier.session self._input_placeholder = keras_classifier.input self._label_placeholder = keras_classifier.label self._preds = keras_classifier.predictions self._model = KerasModelWrapper(keras_classifier.model) self._loss = CrossEntropy(self._model, smoothing=self._label_smoothing) with self._graph.as_default(): fgsm = FastGradientMethod(self._model, sess=self._sess) fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.} adv_x = fgsm.generate(self._input_placeholder, **fgsm_params) # Consider the attack to be constant self._adv_x = tf.stop_gradient(adv_x) # model predictions for adversarial examples self._preds_adv = keras_classifier.model(adv_x) self._keras_classifier = keras_classifier # FIXME[hack]: we need to keep a reference to the KerasClassifier to prevent the session from being closed def create_model(self): keras_classifier = KerasMnistClassifier() # FIXME[hack] self.init_from_keras_classifier(keras_classifier) def dump_model(self): with self._graph.as_default(): layer_names = self._model.get_layer_names() print(f"Model has {len(layer_names)} layers: {layer_names}") for n in layer_names: print( f" {n}: {self._model.get_layer(self._input_placeholder, n)}" ) model_layers = self._model.fprop(self._input_placeholder) print(f"Model has {len(model_layers)} layers:") for n, l in model_layers.items(): print(f" {n}: {l}") def train_model(self): """Train the model using the current training data. """ logging.info("Training Cleverhans model from scratch.") # FIXME[todo]: self._runner is not initialized yet! #self._runner.runTask(self._train_model) self._train_model() # FIXME[hack] def _train_model(self): self._busy = True self.change('busy_changed') def evaluate(): self.evaluate_model(self._x_test, self._y_test) # now use the cleverhans train method (this will optimize the # loss function, and hence the model): # FIXME[problem]: there seems to be no way to get some progress # report from this train method. The only callback we can # register is 'evaluate', which can be used for arbitrary # operations, but which is only called after every epoch with self._graph.as_default(): train(self._sess, self._loss, self._x_train, self._y_train, evaluate=evaluate, args=self._train_params, rng=self._rng) self._busy = False self.change('busy_changed') def evaluate_model(self, data, label): """Evaluate the accuracy of the MNIST model. """ # use cleverhans' model_eval function: with self._graph.as_default(): accuracy = model_eval(self._sess, self._input_placeholder, self._label_placeholder, self._preds, data, label, args=self._eval_params) print(f"MNIST model accurace: {accuracy:0.4f}") def load_model(self): if self._ckpt_path: with self._graph.as_default(): saver = tf.train.Saver() print(self._ckpt_path) saver.restore(self._sess, self._ckpt_path) print(f"Model loaded from: {format(self._ckpt_path)}") self.evaluate_model(self._x_test, self._y_test) else: print("Model was not loaded.") def save_model(self): print("Model was not saved.") def reset_model(self): print("Model was not reset.") def load_mnist(self): """Load the training data (MNIST). """ # Get MNIST data train_start, train_end = 0, 60000 test_start, test_end = 0, 10000 mnist = MNIST(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) self._x_train, self._y_train = mnist.get_set('train') self._x_test, self._y_test = mnist.get_set('test') # Use Image Parameters self._img_rows, self._img_cols, self._nchannels = \ self._x_train.shape[1:4] self._nb_classes = self._y_train.shape[1] print(f"len(train): {len(self._x_train)} / {len(self._y_train)}") print(f"len(test): {len(self._x_test)} / {len(self._y_test)}") print( f"img_rows x img_cols x nchannels: {self._img_rows} x {self._img_cols} x {self._nchannels}" ) print(f"nb_classes: {self._nb_classes}") def get_example(self, index: int = None): if index is None: index = np.random.randint(len(self._x_test)) #batch = np.arange(self._batch_size) batch = np.asarray([index]) self._x_sample = self._x_train[batch] self._y_sample = self._y_train[batch] with self._graph.as_default(): feed_dict = {self._input_placeholder: self._x_sample} preds_sample = \ self._preds.eval(feed_dict=feed_dict, session=self._sess) return self._x_sample[0], self._y_sample[0], preds_sample[0] def get_adversarial_example(self, index: int = None): with self._graph.as_default(): feed_dict = {self._input_placeholder: self._x_sample} x_adversarial = \ self._adv_x.eval(feed_dict=feed_dict, session=self._sess) feed_dict = {self._input_placeholder: x_adversarial} preds_adversarial = \ self._preds_adv.eval(feed_dict=feed_dict, session=self._sess) return x_adversarial[0], preds_adversarial[0] @property def busy(self): return self._busy
def cifar10_cw_recon(train_start=0, train_end=60000, test_start=0, test_end=10000, viz_enabled=VIZ_ENABLED, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE, source_samples=SOURCE_SAMPLES, learning_rate=LEARNING_RATE, attack_iterations=ATTACK_ITERATIONS, model_path=MODEL_PATH, model_path_cls=MODEL_PATH, targeted=TARGETED, num_threads=None, label_smoothing=0.1, nb_filters=NB_FILTERS, filename=FILENAME, train_dir_ae=TRAIN_DIR_AE, train_dir_cl=TRAIN_DIR_CL): # Object used to keep track of (and return) key accuracies report = AccuracyReport() # Set TF random seed to improve reproducibility tf.set_random_seed(1234) rng = np.random.RandomState() # Create TF session sess = tf.Session() print("Created TensorFlow session.") set_log_level(logging.DEBUG) if num_threads: config_args = dict(intra_op_parallelism_threads=1) else: config_args = {} sess = tf.Session(config=tf.ConfigProto(**config_args)) # Get CIFAR10 data data = CIFAR10(train_start=train_start, train_end=train_end, test_start=test_start, test_end=test_end) dataset_size = data.x_train.shape[0] dataset_train = data.to_tensorflow()[0] dataset_train = dataset_train.map( lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4) dataset_train = dataset_train.batch(batch_size) dataset_train = dataset_train.prefetch(16) x_train, y_train = data.get_set('train') x_test, y_test = data.get_set('test') nb_latent_size = 100 # Get MNIST test data # Obtain Image Parameters img_rows, img_cols, nchannels = x_train.shape[1:4] nb_classes = y_train.shape[1] print("img_Rows, img_cols, nchannels: ", img_rows, img_cols, nchannels) # Define input TF placeholder x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) x_t = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels)) y = tf.placeholder(tf.float32, shape=(None, nb_classes)) y_t = tf.placeholder(tf.float32, shape=(None, nb_classes)) #z = tf.placeholder(tf.float32, shape = (None, nb_latent_size)) #z_t = tf.placeholder(tf.float32, shape = (None, nb_latent_size)) ''' save_dir= 'models' model_name = 'cifar10_AE.h5' model_path_ae = os.path.join(save_dir, model_name) ''' model_ae = ae_model(x, img_rows=img_rows, img_cols=img_cols, channels=nchannels) recon = model_ae(x) #print("recon: ",recon) print("Defined TensorFlow model graph.") def evaluate_ae(): # Evaluate the accuracy of the MNIST model on legitimate test examples eval_params = {'batch_size': 128} noise, d1, d2, dist_diff, avg_dist_lat = model_eval_ae( sess, x, x_t, recon, x_train, x_train, args=eval_params) print("reconstruction distance: ", d1) # Train an MNIST model train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate, 'train_dir': train_dir_ae, 'filename': filename } rng = np.random.RandomState([2017, 8, 30]) if not os.path.exists(train_dir_ae): os.mkdir(train_dir_ae) #ckpt = tf.train.get_checkpoint_state(train_dir_ae) #print(train_dir_ae, ckpt) #ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path wrap_ae = KerasModelWrapper(model_ae) if clean_train_ae == True: print("Training AE") loss = SquaredError(wrap_ae) train_ae(sess, loss, x_train, x_train, evaluate=evaluate_ae, args=train_params, rng=rng) saver = tf.train.Saver() saver.save(sess, "train_dir/model_ae.ckpt") print("saved model") else: print("Loading AE") saver = tf.train.Saver() #print(ckpt_path) saver.restore(sess, "train_dir/model_ae.ckpt") evaluate_ae() if train_further: train_params = { 'nb_epochs': 100, 'batch_size': batch_size, 'learning_rate': learning_rate, 'train_dir': train_dir_ae, 'filename': filename } #training with the saved model as starting point loss = SquaredError(wrap_ae) train_ae(sess, loss, x_train, x_train, evaluate=evaluate_ae, args=train_params, rng=rng) saver = tf.train.Saver() saver.save(sess, "train_dir/model_ae_final.ckpt") evaluate_ae() print("Model loaded and trained for more epochs") num_classes = 10 ''' save_dir= 'models' model_name = 'cifar10_CNN.h5' model_path_cls = os.path.join(save_dir, model_name) ''' cl_model = cnn_cl_model(img_rows=img_rows, img_cols=img_cols, channels=nchannels, nb_filters=64, nb_classes=nb_classes) preds_cl = cl_model(x) def do_eval_cls(preds, x_set, y_set, x_tar_set, report_key, is_adv=None): acc = model_eval(sess, x, y, preds, x_t, x_set, y_set, x_tar_set, args=eval_params_cls) def evaluate(): # Evaluate the accuracy of the MNIST model on legitimate test examples eval_params = {'batch_size': batch_size} acc = model_eval(sess, x, y, preds_cl, x_t, x_test, y_test, x_test, args=eval_params) report.clean_train_clean_eval = acc # assert X_test.shape[0] == test_end - test_start, X_test.shape print('Test accuracy on legitimate examples: %0.4f' % acc) train_params = { 'nb_epochs': 100, 'batch_size': batch_size, 'learning_rate': learning_rate, 'train_dir': train_dir_cl, 'filename': filename } rng = np.random.RandomState([2017, 8, 30]) if not os.path.exists(train_dir_cl): os.mkdir(train_dir_cl) #ckpt = tf.train.get_checkpoint_state(train_dir_cl) #print(train_dir_cl, ckpt) #ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path wrap_cl = KerasModelWrapper(cl_model) if clean_train_cl == True: print("Training CNN Classifier") loss_cl = CrossEntropy(wrap_cl, smoothing=label_smoothing) train(sess, loss_cl, x_train, y_train, evaluate=evaluate, optimizer=tf.train.RMSPropOptimizer(learning_rate=0.0001, decay=1e-6), args=train_params, rng=rng) saver = tf.train.Saver() saver.save(sess, "train_dir/model_cnn_cl.ckpt") print("saved model at ", "train_dir/model_cnn_cl.ckpt") else: print("Loading CNN Classifier") saver = tf.train.Saver() #print(ckpt_path) saver.restore(sess, "train_dir/model_cnn_cl.ckpt") evaluate() if (train_further): train_params = { 'nb_epochs': 100, 'batch_size': batch_size, 'learning_rate': learning_rate, 'train_dir': train_dir_cl, 'filename': filename } loss_cl = CrossEntropy(wrap_cl, smoothing=label_smoothing) train(sess, loss_cl, x_train, y_train, evaluate=evaluate, optimizer=tf.train.RMSPropOptimizer(learning_rate=0.0001, decay=1e-6), args=train_params, rng=rng) saver = tf.train.Saver() saver.save(sess, "train_dir/model_cnn_cl_final.ckpt") print("Model loaded and trained further") evaluate() # Score trained model. ''' scores = cl_model.evaluate(x_test, y_test, verbose=1) print('Test loss:', scores[0]) print('Test accuracy:', scores[1]) cl_model_wrap = KerasModelWrapper(cl_model) ` ''' ########################################################################### # Craft adversarial examples using Carlini and Wagner's approach ########################################################################### nb_adv_per_sample = str(nb_classes - 1) if targeted else '1' print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample + ' adversarial examples') print("This could take some time ...") # Instantiate a CW attack object cw = CarliniWagnerAE(wrap_ae, wrap_cl, sess=sess) if viz_enabled: assert source_samples == nb_classes idxs = [ np.where(np.argmax(y_test, axis=1) == i)[0][0] for i in range(nb_classes) ] if targeted: if viz_enabled: # Initialize our array for grid visualization grid_shape = (nb_classes, nb_classes, img_rows, img_cols, nchannels) grid_viz_data = np.zeros(grid_shape, dtype='f') grid_viz_data_1 = np.zeros(grid_shape, dtype='f') adv_inputs = np.array([[instance] * (nb_classes - 1) for instance in x_test[idxs]], dtype=np.float32) #adv_input_y = np.array([[instance]*(nb_classes-1) for instance in y_test[idxs]]) adv_input_y = [] for curr_num in range(nb_classes): targ = [] for id in range(nb_classes - 1): targ.append(y_test[idxs[curr_num]]) adv_input_y.append(targ) adv_input_y = np.array(adv_input_y) adv_target_y = [] for curr_num in range(nb_classes): targ = [] for id in range(nb_classes): if (id != curr_num): targ.append(y_test[idxs[id]]) adv_target_y.append(targ) adv_target_y = np.array(adv_target_y) #print("adv_input_y: \n", adv_input_y) #print("adv_target_y: \n", adv_target_y) adv_input_targets = [] for curr_num in range(nb_classes): targ = [] for id in range(nb_classes): if (id != curr_num): targ.append(x_test[idxs[id]]) adv_input_targets.append(targ) adv_input_targets = np.array(adv_input_targets) adv_inputs = adv_inputs.reshape((source_samples * (nb_classes - 1), img_rows, img_cols, nchannels)) adv_input_targets = adv_input_targets.reshape( (source_samples * (nb_classes - 1), img_rows, img_cols, nchannels)) adv_input_y = adv_input_y.reshape( source_samples * (nb_classes - 1), 10) adv_target_y = adv_target_y.reshape( source_samples * (nb_classes - 1), 10) one_hot = np.zeros((nb_classes, nb_classes)) one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1 adv_ys = np.array([one_hot] * source_samples, dtype=np.float32).reshape( (source_samples * nb_classes, nb_classes)) yname = "y_target" cw_params_batch_size = source_samples * (nb_classes - 1) cw_params = { 'binary_search_steps': 1, yname: adv_ys, 'max_iterations': attack_iterations, 'learning_rate': CW_LEARNING_RATE, 'batch_size': cw_params_batch_size, 'initial_const': 1 } adv = cw.generate_np(adv_inputs, adv_input_targets, **cw_params) #adv = sess.run(adv) recon_orig = wrap_ae.get_layer(x, 'activation_7') recon_orig = sess.run(recon_orig, feed_dict={x: adv_inputs}) recon_adv = wrap_ae.get_layer(x, 'activation_7') recon_adv = sess.run(recon_adv, feed_dict={x: adv}) pred_adv_recon = wrap_cl.get_logits(x) pred_adv_recon = sess.run(pred_adv_recon, {x: recon_adv}) #scores1 = cl_model.evaluate(recon_adv, adv_input_y, verbose=1) #scores2 = cl_model.evaluate(recon_adv, adv_target_y, verbose = 1) #acc_1 = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_target_y, adv_input_targets, args=eval_params_cls) #acc_2 = model_eval(sess, x, y, pred_adv_recon, x_t, adv_inputs, adv_input_y, adv_input_targets, args=eval_params_cls) shape = np.shape(adv_inputs) noise = np.sum(np.square(adv - adv_inputs)) / (np.shape(adv)[0]) noise = pow(noise, 0.5) d1 = np.sum(np.square(recon_adv - adv_inputs)) / (np.shape(adv_inputs)[0]) d2 = np.sum( np.square(recon_adv - adv_input_targets)) / (np.shape(adv_inputs)[0]) acc_1 = (sum( np.argmax(pred_adv_recon, axis=-1) == np.argmax(adv_target_y, axis=-1)) ) / (np.shape(adv_target_y)[0]) acc_2 = (sum( np.argmax(pred_adv_recon, axis=-1) == np.argmax(adv_input_y, axis=-1)) ) / (np.shape(adv_target_y)[0]) print("noise: ", noise) print("d1: ", d1) print("d2: ", d2) print("classifier acc_target: ", acc_1) print("classifier acc_true: ", acc_2) #print("recon_adv[0]\n", recon_adv[0,:,:,0]) curr_class = 0 if viz_enabled: for j in range(nb_classes): if targeted: for i in range(nb_classes): #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i] if (i == j): grid_viz_data[i, j] = recon_orig[curr_class * 9] grid_viz_data_1[i, j] = adv_inputs[curr_class * 9] curr_class = curr_class + 1 else: if (j > i): grid_viz_data[i, j] = recon_adv[i * (nb_classes - 1) + j - 1] grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) + j - 1] else: grid_viz_data[i, j] = recon_adv[i * (nb_classes - 1) + j] grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) + j] #rint(grid_viz_data.shape) print('--------------------------------------') # Compute the number of adversarial examples that were successfully found # Compute the average distortion introduced by the algorithm percent_perturbed = np.mean( np.sum((adv - adv_inputs)**2, axis=(1, 2, 3))**.5) print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed)) # Finally, block & display a grid of all the adversarial examples if viz_enabled: plt.ioff() figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') # Add the images to the plot num_cols = grid_viz_data.shape[0] num_rows = grid_viz_data.shape[1] num_channels = grid_viz_data.shape[4] for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols)) plt.axis('off') plt.imshow(grid_viz_data[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_fig1') figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols)) plt.axis('off') plt.imshow(grid_viz_data_1[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_fig2') #return report #adversarial training if (adv_train == True): print("starting adversarial training") #sess1 = tf.Session() adv_input_set = [] adv_input_target_set = [] for i in range(20): indices = np.arange(np.shape(x_train)[0]) np.random.shuffle(indices) print("indices: ", indices[1:10]) x_train = x_train[indices] y_train = y_train[indices] idxs = [ np.where(np.argmax(y_train, axis=1) == i)[0][0] for i in range(nb_classes) ] adv_inputs_2 = np.array([[instance] * (nb_classes - 1) for instance in x_train[idxs]], dtype=np.float32) adv_input_targets_2 = [] for curr_num in range(nb_classes): targ = [] for id in range(nb_classes): if (id != curr_num): targ.append(x_train[idxs[id]]) adv_input_targets_2.append(targ) adv_input_targets_2 = np.array(adv_input_targets_2) adv_inputs_2 = adv_inputs_2.reshape( (source_samples * (nb_classes - 1), img_rows, img_cols, nchannels)) adv_input_targets_2 = adv_input_targets_2.reshape( (source_samples * (nb_classes - 1), img_rows, img_cols, nchannels)) adv_input_set.append(adv_inputs_2) adv_input_target_set.append(adv_input_targets_2) adv_input_set = np.array(adv_input_set), adv_input_target_set = np.array(adv_input_target_set) print("shape of adv_input_set: ", np.shape(adv_input_set)) print("shape of adv_input_target_set: ", np.shape(adv_input_target_set)) adv_input_set = np.reshape( adv_input_set, (np.shape(adv_input_set)[0] * np.shape(adv_input_set)[1] * np.shape(adv_input_set)[2], np.shape(adv_input_set)[3], np.shape(adv_input_set)[4], np.shape(adv_input_set)[5])) adv_input_target_set = np.reshape(adv_input_target_set, (np.shape(adv_input_target_set)[0] * np.shape(adv_input_target_set)[1], np.shape(adv_input_target_set)[2], np.shape(adv_input_target_set)[3], np.shape(adv_input_target_set)[4])) print("generated adversarial training set") adv_set = cw.generate_np(adv_input_set, adv_input_target_set, **cw_params) x_train_aim = np.append(x_train, adv_input_set, axis=0) x_train_app = np.append(x_train, adv_set, axis=0) #model_name = 'cifar10_AE_adv.h5' #model_path_ae = os.path.join(save_dir, model_name) model_ae_adv = ae_model(x, img_rows=img_rows, img_cols=img_cols, channels=nchannels) recon = model_ae_adv(x) wrap_ae_adv = KerasModelWrapper(model_ae_adv) #print("recon: ",recon) #print("Defined TensorFlow model graph.") train_params = { 'nb_epochs': 2, 'batch_size': batch_size, 'learning_rate': learning_rate, 'train_dir': train_dir_cl, 'filename': filename } print("Training Adversarial AE") loss_2 = SquaredError(wrap_ae_adv) print("layer names: ", wrap_ae_adv.get_layer_names()) train_ae(sess, loss_2, x_train_app, x_train_aim, evaluate=evaluate_ae, args=train_params, rng=rng) saver = tf.train.Saver() saver.save(sess, "train_dir/model_ae_adv.ckpt") print("saved model") cw2 = CarliniWagnerAE(wrap_ae_adv, wrap_cl, sess=sess) adv_2 = cw2.generate_np(adv_inputs, adv_input_targets, **cw_params) recon_adv = wrap_ae_adv.get_layer(x, 'activation_20') recon_orig = wrap_ae_adv.get_layer(x, 'activation_20') recon_adv = sess.run(recon_adv, {x: adv_2}) recon_orig = sess.run(recon_orig, {x: adv_inputs}) pred_adv_recon = wrap_cl.get_logits(x) pred_adv_recon = sess.run(pred_adv_recon, {x: recon_adv}) noise = np.sum(np.square(adv - adv_inputs)) / (np.shape(adv)[0]) noise = pow(noise, 0.5) d1 = np.sum( np.square(recon_adv - adv_inputs)) / (np.shape(adv_inputs)[0]) d2 = np.sum(np.square(recon_adv - adv_input_targets)) / (np.shape(adv_inputs)[0]) acc_1 = (sum( np.argmax(pred_adv_recon, axis=-1) == np.argmax( adv_target_y, axis=-1))) / (np.shape(adv_target_y)[0]) acc_2 = (sum( np.argmax(pred_adv_recon, axis=-1) == np.argmax( adv_input_y, axis=-1))) / (np.shape(adv_target_y)[0]) print("noise: ", noise) print("d1: ", d1) print("d2: ", d2) print("classifier acc_target: ", acc_1) print("classifier acc_true: ", acc_2) #print("recon_adv[0]\n", recon_adv[0,:,:,0]) curr_class = 0 if viz_enabled: for j in range(nb_classes): for i in range(nb_classes): #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i] if (i == j): grid_viz_data[i, j] = recon_orig[curr_class * 9] grid_viz_data_1[i, j] = adv_inputs[curr_class * 9] curr_class = curr_class + 1 else: if (j > i): grid_viz_data[i, j] = recon_adv[i * (nb_classes - 1) + j - 1] grid_viz_data_1[i, j] = adv_2[i * (nb_classes - 1) + j - 1] else: grid_viz_data[i, j] = recon_adv[i * (nb_classes - 1) + j] grid_viz_data_1[i, j] = adv_2[i * (nb_classes - 1) + j] #rint(grid_viz_data.shape) print('--------------------------------------') # Compute the number of adversarial examples that were successfully found # Compute the average distortion introduced by the algorithm percent_perturbed = np.mean( np.sum((adv_2 - adv_inputs)**2, axis=(1, 2, 3))**.5) print( 'Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed)) # Close TF session #sess.close() # Finally, block & display a grid of all the adversarial examples if viz_enabled: #_ = grid_visual(grid_viz_data) #_ = grid_visual(grid_viz_data_1) plt.ioff() figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') # Add the images to the plot num_cols = grid_viz_data.shape[0] num_rows = grid_viz_data.shape[1] num_channels = grid_viz_data.shape[4] for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols)) plt.axis('off') if num_channels == 1: plt.imshow(grid_viz_data[xx, yy, :, :, 0]) else: plt.imshow(grid_viz_data[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_fig1_adv_trained') figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols)) plt.axis('off') plt.imshow(grid_viz_data_1[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_fig2_adv_trained') #return report #binarization defense #if(binarization_defense == True or mean_filtering==True): if (binarization_defense == True): adv[adv > 0.5] = 1.0 adv[adv <= 0.5] = 0.0 recon_orig = wrap_ae.get_layer(x, 'activation_7') recon_adv = wrap_ae.get_layer(x, 'activation_7') #pred_adv = wrap_cl.get_logits(x) recon_orig = sess.run(recon_orig, {x: adv_inputs}) recon_adv = sess.run(recon_adv, {x: adv}) #pred_adv = sess.run(pred_adv, {x: recon_adv}) pred_adv_recon = wrap_cl.get_logits(x) pred_adv_recon = sess.run(pred_adv_recon, {x: recon_adv}) eval_params = {'batch_size': 90} if targeted: noise = np.sum(np.square(adv - adv_inputs)) / (np.shape(adv)[0]) noise = pow(noise, 0.5) d1 = np.sum( np.square(recon_adv - adv_inputs)) / (np.shape(adv_inputs)[0]) d2 = np.sum(np.square(recon_adv - adv_input_targets)) / ( np.shape(adv_inputs)[0]) acc_1 = (sum( np.argmax(pred_adv_recon, axis=-1) == np.argmax( adv_target_y, axis=-1))) / (np.shape(adv_target_y)[0]) acc_2 = (sum( np.argmax(pred_adv_recon, axis=-1) == np.argmax( adv_input_y, axis=-1))) / (np.shape(adv_target_y)[0]) print("noise: ", noise) print("d1: ", d1) print("d2: ", d2) print("classifier acc_target: ", acc_1) print("classifier acc_true: ", acc_2) curr_class = 0 if viz_enabled: for j in range(nb_classes): for i in range(nb_classes): #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i] if (i == j): grid_viz_data[i, j] = recon_orig[curr_class * 9] grid_viz_data_1[i, j] = adv_inputs[curr_class * 9] curr_class = curr_class + 1 else: if (j > i): grid_viz_data[i, j] = recon_adv[i * (nb_classes - 1) + j - 1] grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) + j - 1] else: grid_viz_data[i, j] = recon_adv[i * (nb_classes - 1) + j] grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) + j] plt.ioff() figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') # Add the images to the plot num_cols = grid_viz_data.shape[0] num_rows = grid_viz_data.shape[1] num_channels = grid_viz_data.shape[4] for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols)) plt.axis('off') if num_channels == 1: plt.imshow(grid_viz_data[xx, yy, :, :, 0]) else: plt.imshow(grid_viz_data[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_fig1_bin') figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols)) plt.axis('off') if num_channels == 1: plt.imshow(grid_viz_data_1[xx, yy, :, :, 0]) else: plt.imshow(grid_viz_data_1[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_fig2_bin') if (mean_filtering == True): adv = uniform_filter(adv, 2) recon_orig = wrap_ae.get_layer(x, 'activation_7') recon_adv = wrap_ae.get_layer(x, 'activation_7') pred_adv_recon = wrap_cl.get_logits(x) recon_orig = sess.run(recon_orig, {x: adv_inputs}) recon_adv = sess.run(recon_adv, {x: adv}) pred_adv_recon = sess.run(pred_adv_recon, {x: recon_adv}) eval_params = {'batch_size': 90} noise = np.sum(np.square(adv - adv_inputs)) / (np.shape(adv)[0]) noise = pow(noise, 0.5) d1 = np.sum( np.square(recon_adv - adv_inputs)) / (np.shape(adv_inputs)[0]) d2 = np.sum(np.square(recon_adv - adv_input_targets)) / (np.shape(adv_inputs)[0]) acc_1 = (sum( np.argmax(pred_adv_recon, axis=-1) == np.argmax( adv_target_y, axis=-1))) / (np.shape(adv_target_y)[0]) acc_2 = (sum( np.argmax(pred_adv_recon, axis=-1) == np.argmax( adv_input_y, axis=-1))) / (np.shape(adv_target_y)[0]) print("noise: ", noise) print("d1: ", d1) print("d2: ", d2) print("classifier acc_target: ", acc_1) print("classifier acc_true: ", acc_2) curr_class = 0 if viz_enabled: for j in range(nb_classes): for i in range(nb_classes): #grid_viz_data[i, j] = adv[j * (nb_classes-1) + i] if (i == j): grid_viz_data[i, j] = recon_orig[curr_class * 9] grid_viz_data_1[i, j] = adv_inputs[curr_class * 9] curr_class = curr_class + 1 else: if (j > i): grid_viz_data[i, j] = recon_adv[i * (nb_classes - 1) + j - 1] grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) + j - 1] else: grid_viz_data[i, j] = recon_adv[i * (nb_classes - 1) + j] grid_viz_data_1[i, j] = adv[i * (nb_classes - 1) + j] plt.ioff() figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') # Add the images to the plot num_cols = grid_viz_data.shape[0] num_rows = grid_viz_data.shape[1] num_channels = grid_viz_data.shape[4] for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols)) plt.axis('off') if num_channels == 1: plt.imshow(grid_viz_data[xx, yy, :, :, 0]) else: plt.imshow(grid_viz_data[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_fig1_mean') figure = plt.figure() figure.canvas.set_window_title('Cleverhans: Grid Visualization') for yy in range(num_rows): for xx in range(num_cols): figure.add_subplot(num_rows, num_cols, (xx + 1) + (yy * num_cols)) plt.axis('off') if num_channels == 1: plt.imshow(grid_viz_data_1[xx, yy, :, :, 0]) else: plt.imshow(grid_viz_data_1[xx, yy, :, :, :]) # Draw the plot and return plt.savefig('cifar10_fig2_mean')
def test_get_layer_names(self): model = KerasModelWrapper(self.model) layer_names = model.get_layer_names() self.assertEqual(layer_names, ['l1', 'l2', 'softmax'])