def main(_): with tf.Session() as sess: K.set_session(sess) if FLAGS.dataset == 'MNIST': data, model = MNIST(), MNISTModel("models/mnist", sess) elif FLAGS.datset == 'Cifar': data, model = CIFAR(), CIFARModel("models/cifar", sess) def _model_fn(x, logits=False): ybar, logits_ = model.predict(x) if logits: return ybar, logits_ return ybar if FLAGS.dataset == 'MNIST': x_adv = fgsm(_model_fn, x, epochs=9, eps=0.02) elif FLAGS.datset == 'Cifar': x_adv = fgsm(_model_fn, x, epochs=4, eps=0.01) X_adv_test = attack(x_adv, data.test_data, data.test_labels, sess) X_adv_train = attack(x_adv, data.train_data, data.train_labels, sess) np.save('adversarial_outputs/fgsm_train_' + FLAGS.dataset.lower() + '.npy', X_adv_train) np.save('adversarial_outputs/fgsm_test_' + FLAGS.dataset.lower() + '.npy', X_adv_test) print("Legit/Adversarial training set") model.evaluate(data.train_data, data.train_labels) model.evaluate(X_adv_train, data.train_labels) print("Legit/Adversarial test set") model.evaluate(data.test_data, data.test_labels) model.evaluate(X_adv_test, data.test_labels)
def main(): data, model = MNIST(), MNISTModel(restore="models/mnist", use_log=True) origImgs, origLabels, origImgID = util.generate_attack_data_set(data, model, MGR) delImgAT_Init = np.zeros(origImgs[0].shape) objfunc = ObjectiveFunc.OBJFUNC(MGR, model, origImgs, origLabels) MGR.Add_Parameter('eta', MGR.parSet['alpha']/origImgs[0].size) MGR.Log_MetaData() if(MGR.parSet['optimizer'] == 'ZOSVRG'): delImgAT = svrg.ZOSVRG(delImgAT_Init, MGR, objfunc) elif(MGR.parSet['optimizer'] == 'ZOSGD'): delImgAT = sgd.ZOSGD(delImgAT_Init, MGR, objfunc) else: print('Please specify a valid optimizer') for idx_ImgID in range(MGR.parSet['nFunc']): currentID = origImgID[idx_ImgID] orig_prob = model.model.predict(np.expand_dims(origImgs[idx_ImgID], axis=0)) advImg = np.tanh(np.arctanh(origImgs[idx_ImgID]*1.9999999)+delImgAT)/2.0 adv_prob = model.model.predict(np.expand_dims(advImg, axis=0)) suffix = "id{}_Orig{}_Adv{}".format(currentID, np.argmax(orig_prob), np.argmax(adv_prob)) util.save_img(advImg, "{}/Adv_{}.png".format(MGR.parSet['save_path'], suffix)) util.save_img(np.tanh(delImgAT)/2.0, "{}/Delta.png".format(MGR.parSet['save_path'])) sys.stdout.flush() MGR.logHandler.close()
def prep_bbox(sess, x, y, X_train, Y_train, X_test, Y_test, nb_epochs, batch_size, learning_rate): """ Define and train a model that simulates the "remote" black-box oracle described in the original paper. :param sess: the TF session :param x: the input placeholder for MNIST :param y: the ouput placeholder for MNIST :param X_train: the training data for the oracle :param Y_train: the training labels for the oracle :param X_test: the testing data for the oracle :param Y_test: the testing labels for the oracle :param nb_epochs: number of epochs to train model :param batch_size: size of training batches :param learning_rate: learning rate for training :return: """ # Define TF model graph (for the black-box model) if DATASET == "mnist": model = MNISTModel(use_log=True).model else: model = CIFARModel(use_log=True).model predictions = model(x) print("Defined TensorFlow model graph.") # Train an MNIST model if FLAGS.load_pretrain: tf_model_load(sess) else: train_params = { 'nb_epochs': nb_epochs, 'batch_size': batch_size, 'learning_rate': learning_rate } model_train(sess, x, y, predictions, X_train, Y_train, verbose=True, save=True, args=train_params) # Print out the accuracy on legitimate data eval_params = {'batch_size': batch_size} accuracy = model_eval(sess, x, y, predictions, X_test, Y_test, args=eval_params) print('Test accuracy of black-box on legitimate test ' 'examples: ' + str(accuracy)) return model, predictions, accuracy
def run_pca(Data, num_components=10, invert=False): data = Data() sess = K.get_session() K.set_learning_phase(False) shape = (-1, 784) pca = sklearn.decomposition.PCA(n_components=num_components) pca.fit(data.train_data.reshape(shape)) # [:10000] if invert: model = MNISTModel("models/mnist-pca-cnn-top-"+str(num_components)) else: model = make_model(num_components) model.load_weights("models/mnist-pca-top-"+str(num_components)) model = Wrap(model,pca) tf_mean = tf.constant(pca.mean_,dtype=tf.float32) tf_components = tf.constant(pca.components_.T,dtype=tf.float32) def new_predict(xs): # map to PCA space xs = tf.reshape(xs,(-1,784)) xs -= tf_mean xs = tf.matmul(xs, tf_components) # map back xs = tf.matmul(xs, tf.transpose(tf_components)) xs += tf_mean xs = tf.reshape(xs, (-1, 28, 28, 1)) return model.model(xs) if invert: model.predict = new_predict attack = CarliniL2(sess, model, batch_size=100, max_iterations=3000, binary_search_steps=6, targeted=False, initial_const=1) N = 100 test_adv = attack.attack(data.test_data[:N], data.test_labels[:N]) print('accuracy',np.mean(np.argmax(sess.run(model.predict(tf.constant(data.test_data,dtype=np.float32))),axis=1)==np.argmax(data.test_labels,axis=1))) print(list(test_adv[0].flatten())) print('dist',np.mean(np.sum((test_adv-data.test_data[:N])**2,axis=(1,2,3))**.5)) it = np.argmax(sess.run(model.predict(tf.constant(test_adv))),axis=1) print('success',np.mean(it==np.argmax(data.test_labels,axis=1)[:N]))
def compare_baseline(): data = MNIST() model = MNISTModel("models/mnist") sess = K.get_session() attack = CarliniL2(sess, model, batch_size=100, max_iterations=3000, binary_search_steps=4, targeted=False, initial_const=10) N = 100 test_adv = attack.attack(data.test_data[:N], data.test_labels[:N]) print('dist',np.mean(np.sum((test_adv-data.test_data[:N])**2,axis=(1,2,3))**.5))
def train(data, file_name, components=100, num_epochs=20, batch_size=256, pca=None, invert=False): """ Standard neural network training procedure. """ shape = (-1, data.train_data.shape[1]*data.train_data.shape[2]*data.train_data.shape[3]) train_data = pca.transform(data.train_data.reshape(shape))[:,:components] validation_data = pca.transform(data.validation_data.reshape(shape))[:,:components] test_data = pca.transform(data.test_data.reshape(shape))[:,:components] print(train_data.shape) if invert: train_data = pca.inverse_transform(train_data).reshape((-1, 28, 28, 1)) validation_data = pca.inverse_transform(validation_data).reshape((-1, 28, 28, 1)) test_data = pca.inverse_transform(test_data).reshape((-1, 28, 28, 1)) model = MNISTModel(None).model else: model = make_model(components) def fn(correct, predicted): return tf.nn.softmax_cross_entropy_with_logits(labels=correct, logits=predicted) #sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss=fn, optimizer='adam', metrics=['accuracy']) model.fit(train_data, data.train_labels, batch_size=batch_size, validation_data=(validation_data, data.validation_labels), nb_epoch=num_epochs, shuffle=True) acc = np.mean(np.argmax(model.predict(test_data),axis=1)==np.argmax(data.test_labels,axis=1)) print("Overall accuracy on test set:", acc) if file_name != None: model.save(file_name) return model
def main(args): with tf.Session() as sess: print("Loading data and classification model: {}".format( args["dataset"])) if args['dataset'] == "mnist": data, model = MNIST(), MNISTModel("models/mnist", sess, use_softmax=True) elif args['dataset'] == "cifar10": data, model = CIFAR(), CIFARModel("models/cifar", sess, use_softmax=True) elif args['dataset'] == "imagenet": # data, model = ImageNet(data_path=args["imagenet_dir"], targetFile=args["attack_single_img"]), InceptionModel(sess, use_softmax=True) data, model = ImageNetDataNP(), InceptionModel(sess, use_softmax=True) # elif args['dataset'] == "imagenet_np": if len(data.test_labels) < args["num_img"]: raise Exception("No enough data, only have {} but need {}".format( len(data.test_labels), args["num_img"])) if args["attack_single_img"]: # manually setup attack set # attacking only one image with random attack] orig_img = data.test_data orig_labels = data.test_labels orig_img_id = np.array([1]) if args["attack_type"] == "targeted": target_labels = [ np.eye(model.num_labels)[args["single_img_target_label"]] ] else: target_labels = orig_labels else: # generate attack set if args["dataset"] == "imagenet" or args[ "dataset"] == "imagenet_np": shift_index = True else: shift_index = False if args["random_target"] and (args["dataset"] == "imagenet" or args["dataset"] == "imagenet_np"): # find all possible class all_class = np.unique(np.argmax(data.test_labels, 1)) all_orig_img, all_target_labels, all_orig_labels, all_orig_img_id = util.generate_attack_data_set( data, args["num_img"], args["img_offset"], model, attack_type=args["attack_type"], random_target_class=all_class, shift_index=shift_index) elif args["random_target"]: # random target on all possible classes class_num = data.test_labels.shape[1] all_orig_img, all_target_labels, all_orig_labels, all_orig_img_id = util.generate_attack_data_set( data, args["num_img"], args["img_offset"], model, attack_type=args["attack_type"], random_target_class=list(range(class_num)), shift_index=shift_index) else: all_orig_img, all_target_labels, all_orig_labels, all_orig_img_id = util.generate_attack_data_set( data, args["num_img"], args["img_offset"], model, attack_type=args["attack_type"], shift_index=shift_index) # check attack data # for i in range(len(orig_img_id)): # tar_lab = np.argmax(target_labels[i]) # orig_lab = np.argmax(orig_labels[i]) # print("{}:, target label:{}, orig_label:{}, orig_img_id:{}".format(i, tar_lab, orig_lab, orig_img_id[i])) # attack related settings if args["attack_method"] == "zoo" or args[ "attack_method"] == "autozoom_bilin": if args["img_resize"] is None: args["img_resize"] = model.image_size print( "Argument img_resize is not set and not using autoencoder, set to image original size:{}" .format(args["img_resize"])) if args["attack_method"] == "zoo" or args["attack_method"] == "zoo_ae": if args["batch_size"] is None: args["batch_size"] = 128 print( "Using zoo or zoo_ae attack, and batch_size is not set.\nSet batch_size to {}." .format(args["batch_size"])) else: if args["batch_size"] is not None: print("Argument batch_size is not used") args["batch_size"] = 1 # force to be 1 if args["attack_method"] == "zoo_ae" or args[ "attack_method"] == "autozoom_ae": #_, decoder = util.load_codec(args["codec_prefix"]) if args["dataset"] == "mnist" or args["dataset"] == "cifar10": codec = CODEC(model.image_size, model.num_channels, args["compress_mode"], use_tanh=False) else: codec = CODEC(128, model.num_channels, args["compress_mode"]) print(args["codec_prefix"]) codec.load_codec(args["codec_prefix"]) decoder = codec.decoder print(decoder.input_shape) args["img_resize"] = decoder.input_shape[1] print("Using autoencoder, set the attack image size to:{}".format( args["img_resize"])) # setup attack if args["attack_method"] == "zoo": blackbox_attack = ZOO(sess, model, args) elif args["attack_method"] == "zoo_ae": blackbox_attack = ZOO_AE(sess, model, args, decoder) elif args["attack_method"] == "autozoom_bilin": blackbox_attack = AutoZOOM_BiLIN(sess, model, args) elif args["attack_method"] == "autozoom_ae": blackbox_attack = AutoZOOM_AE(sess, model, args, decoder, codec) save_prefix = os.path.join(args["save_path"], args["dataset"], args["attack_method"], args["attack_type"]) os.system("mkdir -p {}".format(save_prefix)) total_success = 0 l2_total = 0 for i in range(all_orig_img_id.size): orig_img = all_orig_img[i:i + 1] target = all_target_labels[i:i + 1] label = all_orig_labels[i:i + 1] target_class = np.argmax(target) true_class = np.argmax(label) test_index = all_orig_img_id[i] # print information print( "[Info][Start]{}: test_index:{}, true label:{}, target label:{}" .format(i, test_index, true_class, target_class)) if args["attack_method"] == "zoo_ae" or args[ "attack_method"] == "autozoom_ae": #print ae info if args["dataset"] == "mnist" or args["dataset"] == "cifar10": temp_img = all_orig_img[i:i + 1] else: temp_img = all_orig_img[i] temp_img = (temp_img + 0.5) * 255 temp_img = scipy.misc.imresize(temp_img, (128, 128)) temp_img = temp_img / 255 - 0.5 temp_img = np.expand_dims(temp_img, axis=0) encode_img = codec.encoder.predict(temp_img) decode_img = codec.decoder.predict(encode_img) diff_img = (decode_img - temp_img) diff_mse = np.mean(diff_img.reshape(-1)**2) print("[Info][AE] MSE:{:.4f}".format(diff_mse)) timestart = time.time() adv_img = blackbox_attack.attack(orig_img, target) timeend = time.time() if len(adv_img.shape) == 3: adv_img = np.expand_dims(adv_img, axis=0) l2_dist = np.sum((adv_img - orig_img)**2)**.5 adv_class = np.argmax(model.model.predict(adv_img)) success = False if args["attack_type"] == "targeted": if adv_class == target_class: success = True else: if adv_class != true_class: success = True if success: total_success += 1 l2_total += l2_dist print( "[Info][End]{}: test_index:{}, true label:{}, adv label:{}, success:{}, distortion:{:.5f}, success_rate:{:.4f}, l2_avg:{:.4f}" .format(i, test_index, true_class, adv_class, success, l2_dist, total_success / (i + 1), 0 if total_success == 0 else l2_total / total_success)) # save images suffix = "id{}_testIndex{}_true{}_adv{}".format( i, test_index, true_class, adv_class) # original image save_name = os.path.join(save_prefix, "Orig_{}.png".format(suffix)) util.save_img(orig_img, save_name) save_name = os.path.join(save_prefix, "Orig_{}.npy".format(suffix)) np.save(save_name, orig_img) # adv image save_name = os.path.join(save_prefix, "Adv_{}.png".format(suffix)) util.save_img(adv_img, save_name) save_name = os.path.join(save_prefix, "Adv_{}.npy".format(suffix)) np.save(save_name, adv_img) # diff image save_name = os.path.join(save_prefix, "Diff_{}.png".format(suffix)) util.save_img((adv_img - orig_img) / 2, save_name) save_name = os.path.join(save_prefix, "Diff_{}.npy".format(suffix)) np.save(save_name, adv_img - orig_img)
def main(): mnist = input_data.read_data_sets('MNIST_data', one_hot=True) train_data = mnist.train.images * 2.0 - 1.0 train_label = mnist.train.labels test_data = mnist.test.images * 2.0 - 1.0 test_label = mnist.test.labels x_dim = train_data.shape[1] y_dim = train_label.shape[1] opt = opts.parse_opt() batch_size = opt.batch_size # Changing the options here. opt.input_data = "MNIST" opt.input_c_dim = 1 opt.output_c_dim = 1 opt.input_dim = x_dim opt.label_dim = y_dim # Running arguments opt.c = 1. opt.ld = 500. opt.H_lambda = 10. opt.cgan_flag = True opt.patch_flag = True opt.G_lambda = 10. opt.s_l = 0 opt.t_l = 1 # batch_size = opt.batch_size # Runnign a session, to load the saved model. with tf.Session() as sess: model_store = opt.model_restore print 'MNIST model is stored at %s' % model_store whitebox_model = MNISTModel(model_store) #initial ADVGAN model = advGAN(whitebox_model, model_store, opt, sess) best_model_path = './GAN/save/best.ckpt' print 'advGAN is stored at %s' % best_model_path model.load(best_model_path) # tvars = tf.trainable_variables() # tvars_vals = sess.run(tvars) # for var, val in zip(tvars, tvars_vals): # if 'generator' not in var.name: # continue # print(var.name, val.shape) # Prints the name of the variable alongside its value. # We have to load a batch of images, then create the fake ones. # They should be identical. num_images = 10 images = train_data[:num_images] fake_images = sess.run([model.fake_images_sample], {model.source: images}) plt.imshow(np.reshape(fake_images[0], [28, 28])) plt.show()
def train(): flatten_flag = True # flatten output of G or not? opt = opts.parse_opt() opt.input_data = "MNIST" # mapping [0,1] -> [-1,1] # load data # mnist = input_data.read_data_sets('MNIST_data', one_hot=True) # train_data = mnist.train.images * 2.0 - 1.0 # train_label = mnist.train.labels # test_data = mnist.test.images * 2.0 - 1.0 # test_label = mnist.test.labels loaded = np.load('MNIST_data/B.npz') train_data, train_label, test_data, test_label = \ loaded['train_data'], loaded['train_label'], \ loaded['test_data'], loaded['test_label'] # We create the label clues here. if opt.cgan_gen is True: label_clue = np.zeros((train_label.shape[1], opt.img_dim, opt.img_dim, train_label.shape[1])) for lbl in range(train_label.shape[1]): label_clue[lbl, :, :, lbl] = 1 if opt.cgan_gen: output_samples, output_labels = output_sample(test_data, test_label, True) else: output_samples = output_sample(test_data, test_label) print output_samples.shape print 'Shape of data:' print '\tTraining data: ' + str(train_data.shape) print '\tTraining label: ' + str(train_label.shape) print '\tTest data: ' + str(test_data.shape) print '\tTest label: ' + str(test_label.shape) x_dim = train_data.shape[1] y_dim = train_label.shape[1] opt.input_c_dim = 1 opt.output_c_dim = 1 opt.input_dim = x_dim opt.label_dim = y_dim batch_size = opt.batch_size NUM_THREADS = 2 tf_config = tf.ConfigProto() tf_config.intra_op_parallelism_threads = NUM_THREADS tf_config.gpu_options.allow_growth = True with tf.Session(config=tf_config) as sess: # Initialize the variables, and restore the variables form checkpoint if there is. # and initialize the writer global_step = 0 print '\tRetrieving evil model from "%s"' % opt.evil_model_path evil_model = MNISTModel(opt.evil_model_path) print '\tRetrieving good model from "%s"' % opt.good_model_path good_model = OddEvenMNIST(opt.good_model_path) # model = advGAN(whitebox_model, model_store, opt, sess) model = advGAN(good_model, evil_model, opt, sess) min_adv_accuracy = 10e10 max_accuracy_diff = -np.inf # summary_dir = "logs/MNIST/g_%d_ld_%d_gl_%d_L2_%.2f_dn_%d" % ( # opt.G_lambda, opt.ld, opt.good_loss_coeff, # opt.L2_lambda, opt.d_train_num) summary_dir = "logs/MNIST/dn_%d_gn_%d" % (opt.d_train_num, opt.g_train_num) duplicate_num = 0 while os.path.isdir(summary_dir + '_' + str(duplicate_num) + '/'): duplicate_num += 1 summary_dir += '_' + str(duplicate_num) + '/' print 'Creating directory %s for logs.' % summary_dir os.mkdir(summary_dir) writer = tf.summary.FileWriter(summary_dir, sess.graph) loader = Dataset2(train_data, train_label) print 'Training data loaded.' best_evil_accuracy = -1.0 best_res_epoch = -1 best_res = None for epoch_num in range(opt.max_epoch): print 'Epoch %d' % epoch_num # Randomly shuffle the data. random_indices = np.arange(train_data.shape[0]) np.random.shuffle(random_indices) train_data = train_data[random_indices, :] train_label = train_label[random_indices, :] real_buckets = [] for lbl in range(train_label.shape[1]): real_buckets.append(np.where(train_label[:, lbl] == 1)[0]) # Mini-batch Gradient Descent. batch_no = 0 while (batch_no * batch_size) < train_data.shape[0]: head = batch_no * batch_size if head + batch_size <= train_data.shape[0]: tail = head + batch_size else: tail = train_data.shape[0] head = train_data.shape[0] - batch_size feed_data = train_data[head:tail, :] evil_labels = train_label[head:tail, :] good_labels = odd_even_labels(evil_labels) # Finding randomly sampled real data. real_data = np.zeros_like(feed_data) # Indices of training batch with specific label. # label_indices[i] = indices of feed data, that have evil_label[i] == 1. label_indices = [np.where(evil_labels[:, lbl] == 1)[0] \ for lbl in range(evil_labels.shape[1])] for lbl in range(evil_labels.shape[1]): # We take a random sample of size |label_indices[lbl]| # from the real bucket of `lbl`. selected_real_data = np.random.choice( real_buckets[lbl], label_indices[lbl].shape[0]) # We put this random sample in the same index of their # corresponding batch training data. real_data[label_indices[lbl], :] = train_data[ selected_real_data, :] feed = { model.source: feed_data, model.target: real_data, model.good_labels: good_labels, model.evil_labels: evil_labels } # Train G. for _ in range(opt.g_train_num): summary_str, G_loss, gan_loss, hinge_loss, l1_loss, l2_loss, \ good_fn_loss, evil_fn_loss, adv_loss, total_loss, _ = sess.run([ model.total_loss_merge_sum, model.g_loss, model.gan_loss, model.hinge_loss, model.l1_loss, model.l2_loss, model.good_fn_loss, model.evil_fn_loss, model.adv_loss, model.total_loss, model.G_train_op], feed) writer.add_summary(summary_str, global_step) # Train D. for _ in range(opt.d_train_num): summary_str, D_loss, _ = sess.run([ model.total_loss_merge_sum, model.d_loss, model.D_pre_train_op ], feed) writer.add_summary(summary_str, global_step) global_step += 1 batch_no += 1 # Validation after each trainig epoch. print '\tD: %.4f, G: %.4f\n\thinge(%.1f): %.4f, L1(%.1f): %.4f, L2(%.1f): %.4f' % ( D_loss, G_loss, opt.H_lambda, hinge_loss, opt.L1_lambda, l1_loss, opt.L2_lambda, l2_loss) print '\t\tGAN total loss: %.4f' % gan_loss print '\tGood: %.4f, Evil: %.4f' % (good_fn_loss, evil_fn_loss) print '\tAdv: %.4f, Total: %.4f' % (adv_loss, total_loss) new_pred_data = [] head = 0 last_batch = False while head < test_data.shape[0]: if head + batch_size <= test_data.shape[0]: tail = head + batch_size else: tail = test_data.shape[0] head = test_data.shape[0] - batch_size last_batch = True if opt.cgan_gen: cur_data = sess.run( model.fake_images_sample, {model.evil_labels: test_label[head:tail, :]}) else: cur_data = sess.run( model.fake_images_sample, {model.source: test_data[head:tail, :]}) if last_batch: new_pred_data.append( cur_data[-(test_data.shape[0] % batch_size):, :]) else: new_pred_data.append(cur_data) head += batch_size new_pred_data = np.concatenate(new_pred_data) good_pred = np.argmax( model.good_model.model.predict(new_pred_data), axis=1) evil_pred = np.argmax( model.evil_model.model.predict(new_pred_data), axis=1) evil_true = np.argmax(test_label, axis=1) good_true = np.argmax(odd_even_labels(test_label), axis=1) good_accuracy = accuracy_score(good_true, good_pred) evil_accuracy = accuracy_score(evil_true, evil_pred) total_good_confusion = confusion_matrix(good_true, good_pred) total_evil_confusion = confusion_matrix(evil_true, evil_pred, labels=range( opt.evil_label_num)) print '\tGood Accuracy: %.4f, Evil Accuracy: %.4f' % ( good_accuracy, evil_accuracy) print '\tAccuracy diff: %f' % (good_accuracy - evil_accuracy) print 'Good confusion matrix:' print total_good_confusion print 'Evil confusion matrix:' print total_evil_confusion # Creating snapshots to save. if opt.cgan_gen: fake_samples = sess.run(model.fake_images_sample, {model.evil_labels: output_labels}) else: fake_samples, fake_noise = sess.run( [model.fake_images_sample, model.sample_noise], {model.source: output_samples}) max_accuracy_diff = good_accuracy - evil_accuracy fakes = merge(fake_samples[:100, :], [10, 10]) separator = np.ones((280, 2)) original = merge(output_samples[:100].reshape(-1, 28, 28, 1), [10, 10]) if opt.cgan_gen: scipy.misc.imsave( 'snapshot_%d.png' % epoch_num, np.concatenate([fakes, separator, original], axis=1)) else: noise = merge(fake_noise[:100], [10, 10]) scipy.misc.imsave( 'snapshot_%d.png' % epoch_num, np.concatenate([fakes, noise, original], axis=1)) # Only for the purpose of finding best D and G training times. if evil_accuracy > best_evil_accuracy: best_evil_accuracy = evil_accuracy best_res_epoch = epoch_num if opt.cgan_gen: best_res = np.concatenate([fakes, separator, original], axis=1) else: best_res = np.concatenate([fakes, noise, original], axis=1) best_image_path = 'best_dn_%d_gn_%d_%d_epoch_%d.png' % \ (opt.d_train_num, opt.g_train_num, duplicate_num, best_res_epoch) scipy.misc.imsave(best_image_path, best_res) # print 'Maximum iterations: %d' % opt.max_iteration # while iteration < opt.max_iteration: # # this function returns (data, label, np.array(target)). # # data = loader.next_batch(batch_size, negative=False) # feed_data, evil_labels, real_data = loader.next_batch( # batch_size, negative=False) # good_labels = odd_even_labels(evil_labels) # feed = { # model.source: feed_data, # model.target: real_data, # model.good_labels: good_labels, # model.evil_labels: evil_labels # } # # if opt.cgan_gen: # # feed[model.label_clue] = label_clue[evil_labels.argmax(axis=1)] # # Training G once. # # summary_str, G_loss, _ = sess.run( # # [model.total_loss_merge_sum, model.g_loss, model.G_train_op], feed) # # writer.add_summary(summary_str, iteration) # # Training G twice. # summary_str, G_loss, gan_loss, hinge_loss, l1_loss, l2_loss, \ # good_fn_loss, evil_fn_loss, adv_loss, total_loss, _ = sess.run([ # model.total_loss_merge_sum, # model.g_loss, # model.gan_loss, # model.hinge_loss, # model.l1_loss, # model.l2_loss, # model.good_fn_loss, # model.evil_fn_loss, # model.adv_loss, # model.total_loss, # model.G_train_op], feed) # writer.add_summary(summary_str, iteration) # # Training D. # for _ in range(opt.d_train_num): # summary_str, D_loss, _ = sess.run( # [model.total_loss_merge_sum, model.d_loss, model.D_pre_train_op], feed) # writer.add_summary(summary_str, iteration) # if iteration % opt.losses_log_every == 0: # # if iteration != 0 and iteration % opt.save_checkpoint_every == 0: # # checkpoint_path = os.path.join(opt.checkpoint_path, 'checkpoint.ckpt') # # print 'Saving the model in "%s"' % checkpoint_path # # model.saver.save(sess, checkpoint_path, global_step=iteration) # # test_loader = Dataset2(test_data, test_label) # # test_num = test_loader._num_examples # # test_iter_num = (test_num - batch_size) / batch_size # # total_evil_accuracy = 0.0 # # total_good_accuracy = 0.0 # # fake_samples = [[] for _ in range(test_loader._num_labels)] # # fake_noise = [[] for _ in range(test_loader._num_labels)] # # original_samples = [[] for _ in range(test_loader._num_labels)] # # for _ in range(test_iter_num): # # # Loading the next batch of test images # # test_input_data, test_evil_labels, _ = \ # # test_loader.next_batch(batch_size) # # evil_categorical_labels = np.argmax(test_evil_labels, axis=1) # # test_good_labels = odd_even_labels(test_evil_labels) # # feed = { # # model.source: test_input_data, # # model.evil_labels: test_evil_labels, # # model.good_labels: test_good_labels # # } # # # if opt.cgan_gen: # # # feed[model.label_clue] = label_clue[test_evil_labels.argmax(axis=1)] # # evil_accuracy, good_accuracy = sess.run( # # [model.evil_accuracy, model.good_accuracy], feed) # # # We divide the total accuracy by the number of test iterations. # # total_good_accuracy += good_accuracy # # total_evil_accuracy += evil_accuracy # # # print 'Evil accuracy: %.6f\tGood accuracy: %.6f' % ( # # # evil_accuracy, good_accuracy) # # # test_accuracy, test_adv_accuracy = sess.run( # # # [model.accuracy, model.adv_accuracy], feed) # # # test_acc += test_accuracy # # # test_adv_acc += test_adv_accuracy # # # fake_images, g_x = sess.run( # # # [model.fake_images_sample, model.sample_noise], # # # {model.source: test_input_data}) # # # for lbl in range(test_loader._num_labels): # # # if len(fake_samples[lbl]) < 10: # # # idx = np.where(evil_categorical_labels == lbl)[0] # # # if idx.shape[0] >= 10: # # # fake_samples[lbl] = fake_images[idx[:10]] # # # fake_noise[lbl] = g_x[idx[:10]] # # # original_samples[lbl] = test_input_data[idx[:10]] # # # for lbl, sample, noise in zip(test_evil_labels, fake_images, fake_noise): # # # if len(fake_samples[lbl]) > 10: # # # continue # # # fake_samples[lbl].append(sample) # # # fake_noise[lbl].append(noise) # # # pdb.set_trace() # # # print fake_images.shape # # # Finding those predicted labels that are equal to the target label # # # idxs = np.where(out_predict_labels == target_label)[0] # # # save_images(samples[:100], [10, 10], 'CIFAR10/result2/test_' + str(source_idx) + str(target_idx)+ '_.png') # # # pdb.set_trace() # # # show_samples.append(samples) # # # input_samples.append(s_imgs) # # # save_samples.append(samples) # # # if opt.is_advGAN: # # # save_samples.append(samples[idxs]) # # # else: # # # We add all samples. # # # show_samples = np.concatenate(show_samples, axis=0) # # # save_samples = np.concatenate(save_samples, axis=0) # # good_accuracy = total_good_accuracy / float(test_iter_num) # # evil_accuracy = total_evil_accuracy / float(test_iter_num) # # print '\tAccuracy diff: %f' % (good_accuracy - evil_accuracy) # # print '\tGood accuracy %f, Evil accuracy %f' % ( # # good_accuracy, evil_accuracy) # # Resizing the samples to save them later on. # # fake_samples = np.reshape(np.array(fake_samples), [100, -1]) # # original_samples = np.reshape(np.array(original_samples), [100, -1]) # # fake_noise = np.reshape(np.array(fake_noise), [100, -1]) # # if (good_accuracy - evil_accuracy) > max_accuracy_diff: # # test_accuracy = test_acc / float(test_iter_num) # # test_adv_accuracy = test_adv_acc / float(test_iter_num) # # if (good_accuracy - evil_accuracy) > max_accuracy_diff: # # max_accuracy_diff = good_accuracy - evil_accuracy # # if min_adv_accuracy > test_adv_accuracy: # # min_adv_accuracy = test_adv_accuracy # # save_images(fake_images[:100], [10, 10], 'fake.png') # # save_images(test_input_data[:100], [10, 10], 'real.png') # # all_idx = np.arange(100) # # odds = np.where((all_idx / 10) % 2 == 1)[0] # # evens = np.where((all_idx / 10) % 2 == 0)[0] # # order = np.concatenate((odds, evens)) # # save_images(fake_samples[order], [10, 10], 'best_images.png') # # save_images(fake_noise[order], [10, 10], 'best_noise.png') # # save_images(original_samples[order], [10, 10], 'best_original.png') # # save_anything = True # # Saving the best yet model. # # best_model_path = os.path.join(opt.checkpoint_path, 'best.ckpt') # # print 'Saving the best model yet at "%s"' % best_model_path # # model.saver.save(sess, best_model_path) # # if save_anything is False: # # # Nothing is saved. We save a version here. # # save_images(fake_samples[:100], [10, 10], 'last_images.png') # # save_images(fake_noise[:100], [10, 10], 'last_noise.png') # # save_anything = True # iteration += 1 # We can transform the training and test data given in the beginning here. # This is only half the actual data. if opt.save_data: # if opt.cgan_gen: raise NotImplementedError( 'Saving data for CGAN_GEN is not yet implemented.')
def main(args): with tf.Session() as sess: if (args['dataset'] == 'mnist'): data, model = MNIST(), MNISTModel("models/mnist", sess) handpick = False inception = False if (args['dataset'] == "cifar"): data, model = CIFAR(), CIFARModel("models/cifar", sess) handpick = True inception = False if (args['dataset'] == "imagenet"): data, model = ImageNet(args['seed_imagenet']), InceptionModel(sess) handpick = True inception = True if (args['adversarial'] != "none"): model = MNISTModel("models/mnist_cw" + str(args['adversarial']), sess) if (args['temp'] and args['dataset'] == 'mnist'): model = MNISTModel("models/mnist-distilled-" + str(args['temp']), sess) if (args['temp'] and args['dataset'] == 'cifar'): model = CIFARModel("models/cifar-distilled-" + str(args['temp']), sess) inputs, targets, labels, true_ids = generate_data( data, model, samples=args['numimg'], inception=inception, handpick=handpick, train=args['train'], seed=args['seed']) timestart = time.time() if (args['attack'] == 'L2'): attack = CarliniL2(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], binary_search_steps=args['binary_steps'], beta=args['beta'], abort_early=args['abort_early']) adv = attack.attack(inputs, targets) if (args['attack'] == 'L1'): attack = EADL1(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], binary_search_steps=args['binary_steps'], beta=args['beta'], abort_early=args['abort_early']) adv = attack.attack(inputs, targets) if (args['attack'] == 'EN'): attack = EADEN(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], binary_search_steps=args['binary_steps'], beta=args['beta'], abort_early=args['abort_early']) adv = attack.attack(inputs, targets) """If untargeted, pass labels instead of targets""" if (args['attack'] == 'FGSM'): attack = FGM(sess, model, batch_size=args['batch_size'], ord=np.inf, inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'FGML1'): attack = FGM(sess, model, batch_size=args['batch_size'], ord=1, inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'FGML2'): attack = FGM(sess, model, batch_size=args['batch_size'], ord=2, inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'IFGSM'): attack = IGM(sess, model, batch_size=args['batch_size'], ord=np.inf, inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'IFGML1'): attack = IGM(sess, model, batch_size=args['batch_size'], ord=1, inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'IFGML2'): attack = IGM(sess, model, batch_size=args['batch_size'], ord=2, inception=inception) adv = attack.attack(inputs, targets) timeend = time.time() print("Took", timeend - timestart, "seconds to run", len(inputs) / args['batch_size'], "random instances.") if (args['train']): np.save('labels_train.npy', labels) np.save(str(args['attack']) + '_train.npy', adv) return r_best = [] d_best_l1 = [] d_best_l2 = [] d_best_linf = [] r_average = [] d_average_l1 = [] d_average_l2 = [] d_average_linf = [] r_worst = [] d_worst_l1 = [] d_worst_l2 = [] d_worst_linf = [] if (args['conf'] != 0): model = MNISTModel("models/mnist-distilled-100", sess) if (args['show']): if not os.path.exists( str(args['save']) + "/" + str(args['dataset']) + "/" + str(args['attack'])): os.makedirs( str(args['save']) + "/" + str(args['dataset']) + "/" + str(args['attack'])) for i in range(0, len(inputs), args['batch_size']): pred = [] for j in range(i, i + args['batch_size']): if inception: pred.append( np.reshape(model.model.predict(adv[j:j + 1]), (data.test_labels[0:1].shape))) else: pred.append(model.model.predict(adv[j:j + 1])) dist_l1 = 1e10 dist_l2 = 1e10 dist_linf = 1e10 dist_l1_index = 1e10 dist_l2_index = 1e10 dist_linf_index = 1e10 for k, j in enumerate(range(i, i + args['batch_size'])): if (np.argmax(pred[k], 1) == np.argmax(targets[j:j + 1], 1)): if (np.sum(np.abs(adv[j] - inputs[j])) < dist_l1): dist_l1 = np.sum(np.abs(adv[j] - inputs[j])) dist_l1_index = j if (np.amax(np.abs(adv[j] - inputs[j])) < dist_linf): dist_linf = np.amax(np.abs(adv[j] - inputs[j])) dist_linf_index = j if ((np.sum((adv[j] - inputs[j])**2)**.5) < dist_l2): dist_l2 = (np.sum((adv[j] - inputs[j])**2)**.5) dist_l2_index = j if (dist_l1_index != 1e10): d_best_l2.append((np.sum( (adv[dist_l2_index] - inputs[dist_l2_index])**2)**.5)) d_best_l1.append( np.sum(np.abs(adv[dist_l1_index] - inputs[dist_l1_index]))) d_best_linf.append( np.amax( np.abs(adv[dist_linf_index] - inputs[dist_linf_index]))) r_best.append(1) else: r_best.append(0) rand_int = np.random.randint(i, i + args['batch_size']) if inception: pred_r = np.reshape( model.model.predict(adv[rand_int:rand_int + 1]), (data.test_labels[0:1].shape)) else: pred_r = model.model.predict(adv[rand_int:rand_int + 1]) if (np.argmax(pred_r, 1) == np.argmax(targets[rand_int:rand_int + 1], 1)): r_average.append(1) d_average_l2.append( np.sum((adv[rand_int] - inputs[rand_int])**2)**.5) d_average_l1.append( np.sum(np.abs(adv[rand_int] - inputs[rand_int]))) d_average_linf.append( np.amax(np.abs(adv[rand_int] - inputs[rand_int]))) else: r_average.append(0) dist_l1 = 0 dist_l1_index = 1e10 dist_linf = 0 dist_linf_index = 1e10 dist_l2 = 0 dist_l2_index = 1e10 for k, j in enumerate(range(i, i + args['batch_size'])): if (np.argmax(pred[k], 1) != np.argmax(targets[j:j + 1], 1)): r_worst.append(0) dist_l1_index = 1e10 dist_l2_index = 1e10 dist_linf_index = 1e10 break else: if (np.sum(np.abs(adv[j] - inputs[j])) > dist_l1): dist_l1 = np.sum(np.abs(adv[j] - inputs[j])) dist_l1_index = j if (np.amax(np.abs(adv[j] - inputs[j])) > dist_linf): dist_linf = np.amax(np.abs(adv[j] - inputs[j])) dist_linf_index = j if ((np.sum((adv[j] - inputs[j])**2)**.5) > dist_l2): dist_l2 = (np.sum((adv[j] - inputs[j])**2)**.5) dist_l2_index = j if (dist_l1_index != 1e10): d_worst_l2.append((np.sum( (adv[dist_l2_index] - inputs[dist_l2_index])**2)**.5)) d_worst_l1.append( np.sum(np.abs(adv[dist_l1_index] - inputs[dist_l1_index]))) d_worst_linf.append( np.amax( np.abs(adv[dist_linf_index] - inputs[dist_linf_index]))) r_worst.append(1) if (args['show']): for j in range(i, i + args['batch_size']): target_id = np.argmax(targets[j:j + 1], 1) label_id = np.argmax(labels[j:j + 1], 1) prev_id = np.argmax( np.reshape(model.model.predict(inputs[j:j + 1]), (data.test_labels[0:1].shape)), 1) adv_id = np.argmax( np.reshape(model.model.predict(adv[j:j + 1]), (data.test_labels[0:1].shape)), 1) suffix = "id{}_seq{}_lbl{}_prev{}_adv{}_{}_l1_{:.3f}_l2_{:.3f}_linf_{:.3f}".format( true_ids[i], target_id, label_id, prev_id, adv_id, adv_id == target_id, np.sum(np.abs(adv[j] - inputs[j])), np.sum((adv[j] - inputs[j])**2)**.5, np.amax(np.abs(adv[j] - inputs[j]))) show( inputs[j:j + 1], str(args['save']) + "/" + str(args['dataset']) + "/" + str(args['attack']) + "/original_{}.png".format(suffix)) show( adv[j:j + 1], str(args['save']) + "/" + str(args['dataset']) + "/" + str(args['attack']) + "/adversarial_{}.png".format(suffix)) print('best_case_L1_mean', np.mean(d_best_l1)) print('best_case_L2_mean', np.mean(d_best_l2)) print('best_case_Linf_mean', np.mean(d_best_linf)) print('best_case_prob', np.mean(r_best)) print('average_case_L1_mean', np.mean(d_average_l1)) print('average_case_L2_mean', np.mean(d_average_l2)) print('average_case_Linf_mean', np.mean(d_average_linf)) print('average_case_prob', np.mean(r_average)) print('worst_case_L1_mean', np.mean(d_worst_l1)) print('worst_case_L2_mean', np.mean(d_worst_l2)) print('worst_case_Linf_mean', np.mean(d_worst_linf)) print('worst_case_prob', np.mean(r_worst))
def load_model(self, dataset="mnist", model_name="2-layer", activation="relu", model=None, batch_size=0, compute_slope=False, order=1): """ model: if set to None, then load dataset with model_name. Otherwise use the model directly. dataset: mnist, cifar and imagenet. recommend to use mnist and cifar as a starting point. model_name: possible options are 2-layer, distilled, and normal """ from setup_cifar import CIFAR, CIFARModel, TwoLayerCIFARModel from setup_mnist import MNIST, MNISTModel, TwoLayerMNISTModel from nlayer_model import NLayerModel from setup_imagenet import ImageNet, ImageNetModel # if set this to true, we will use the logit layer output instead of probability # the logit layer's gradients are usually larger and more stable output_logits = True self.dataset = dataset self.model_name = model_name if model is None: print('Loading model...') if dataset == "mnist": self.batch_size = 1024 if model_name == "2-layer": model = TwoLayerMNISTModel("models/mnist_2layer", self.sess, not output_logits) elif model_name == "normal": if activation == "relu": model = MNISTModel("models/mnist", self.sess, not output_logits) else: print("actviation = {}".format(activation)) model = MNISTModel("models/mnist_cnn_7layer_" + activation, self.sess, not output_logits, activation=activation) time.sleep(5) elif model_name == "brelu": model = MNISTModel("models/mnist_brelu", self.sess, not output_logits, use_brelu=True) elif model_name == "distilled": model = MNISTModel("models/mnist-distilled-100", self.sess, not output_logits) else: # specify model parameters as N,M,opts model_params = model_name.split(",") if len(model_params) < 3: raise (RuntimeError("incorrect model option" + model_name)) numlayer = int(model_params[0]) nhidden = int(model_params[1]) modelfile = "models/mnist_{}layer_relu_{}_{}".format( numlayer, nhidden, model_params[2]) print("loading", modelfile) model = NLayerModel([nhidden] * (numlayer - 1), modelfile) elif dataset == "cifar": self.batch_size = 1024 if model_name == "2-layer": model = TwoLayerCIFARModel("models/cifar_2layer", self.sess, not output_logits) elif model_name == "normal": if activation == "relu": model = CIFARModel("models/cifar", self.sess, not output_logits) else: model = CIFARModel("models/cifar_cnn_7layer_" + activation, self.sess, not output_logits, activation=activation) elif model_name == "brelu": model = CIFARModel("models/cifar_brelu", self.sess, not output_logits, use_brelu=True) elif model_name == "distilled": model = CIFARModel("models/cifar-distilled-100", self.sess, not output_logits) else: # specify model parameters as N,M,opts model_params = model_name.split(",") if len(model_params) < 3: raise (RuntimeError("incorrect model option" + model_name)) numlayer = int(model_params[0]) nhidden = int(model_params[1]) modelfile = "models/cifar_{}layer_relu_{}_{}".format( numlayer, nhidden, model_params[2]) print("loading", modelfile) model = NLayerModel([nhidden] * (numlayer - 1), modelfile, image_size=32, image_channel=3) elif dataset == "imagenet": self.batch_size = 32 model = ImageNetModel(self.sess, use_softmax=not output_logits, model_name=model_name, create_prediction=False) else: raise (RuntimeError("dataset unknown")) #print("*** Loaded model successfully") self.model = model self.compute_slope = compute_slope if batch_size != 0: self.batch_size = batch_size ## placeholders: self.img, self.true_label, self.target_label # img is the placeholder for image input self.img = tf.placeholder(shape=[ None, model.image_size, model.image_size, model.num_channels ], dtype=tf.float32) # output is the output tensor of the entire network self.output = model.predict(self.img) # create the graph to compute gradient # get the desired true label and target label self.true_label = tf.placeholder(dtype=tf.int32, shape=[]) self.target_label = tf.placeholder(dtype=tf.int32, shape=[]) true_output = self.output[:, self.true_label] target_output = self.output[:, self.target_label] # get the difference self.objective = true_output - target_output # get the gradient(deprecated arguments) self.grad_op = tf.gradients(self.objective, self.img)[0] # compute gradient norm: (in computation graph, so is faster) grad_op_rs = tf.reshape(self.grad_op, (tf.shape(self.grad_op)[0], -1)) self.grad_2_norm_op = tf.norm(grad_op_rs, axis=1) self.grad_1_norm_op = tf.norm(grad_op_rs, ord=1, axis=1) self.grad_inf_norm_op = tf.norm(grad_op_rs, ord=np.inf, axis=1) ### Lily: added Hessian-vector product calculation here for 2nd order bound: if order == 2: ## _hessian_vector_product(ys, xs, v): return a list of tensors containing the product between the Hessian and v ## ys: a scalar valur or a tensor or a list of tensors to be summed to yield of scalar ## xs: a list of tensors that we should construct the Hessian over ## v: a list of tensors with the same shape as xs that we want to multiply by the Hessian # self.randv: shape = (Nimg,28,28,1) (the v in _hessian_vector_product) self.randv = tf.placeholder(shape=[ None, model.image_size, model.image_size, model.num_channels ], dtype=tf.float32) # hv_op_tmp: shape = (Nimg,28,28,1) for mnist, same as self.img (the xs in _hessian_vector_product) hv_op_tmp = gradients_impl._hessian_vector_product( self.objective, [self.img], [self.randv])[0] # hv_op_rs: reshape hv_op_tmp to hv_op_rs whose shape = (Nimg, 784) for mnist hv_op_rs = tf.reshape(hv_op_tmp, (tf.shape(hv_op_tmp)[0], -1)) # self.hv_norm_op: norm of hessian vector product, keep shape = (Nimg,1) using keepdims self.hv_norm_op = tf.norm(hv_op_rs, axis=1, keepdims=True) # hv_op_rs_normalize: normalize Hv to Hv/||Hv||, shape = (Nimg, 784) hv_op_rs_normalize = hv_op_rs / self.hv_norm_op # self.hv_op: reshape hv_op_rs_normalize to shape = (Nimg,28,28,1) self.hv_op = tf.reshape(hv_op_rs_normalize, tf.shape(hv_op_tmp)) ## reshape randv and compute its norm # shape: (Nimg, 784) randv_rs = tf.reshape(self.randv, (tf.shape(self.randv)[0], -1)) # shape: (Nimg,) self.randv_norm_op = tf.norm(randv_rs, axis=1) ## compute v'Hv: use un-normalized Hv (hv_op_tmp, hv_op_rs) # element-wise multiplication and then sum over axis = 1 (now shape: (Nimg,)) self.vhv_op = tf.reduce_sum(tf.multiply(randv_rs, hv_op_rs), axis=1) ## compute Rayleigh quotient: v'Hv/v'v (estimated largest eigenvalue), shape: (Nimg,) # note: self.vhv_op and self.randv_norm_op has to be in the same dimension (either (Nimg,) or (Nimg,1)) self.eig_est = self.vhv_op / tf.square(self.randv_norm_op) ## Lily added the tf.while to compute the eigenvalue in computational graph later # cond for computing largest abs/neg eigen-value def cond(it, randv, eig_est, eig_est_prev, tfconst): norm_diff = tf.norm(eig_est - eig_est_prev, axis=0) return tf.logical_and(it < 500, norm_diff > 0.001) # compute largest abs eigenvalue: tfconst = 0 # compute largest neg eigenvalue: tfconst = 10 def body(it, randv, eig_est, eig_est_prev, tfconst): #hv_op_tmp = gradients_impl._hessian_vector_product(self.objective, [self.img], [randv])[0]-10*randv hv_op_tmp = gradients_impl._hessian_vector_product( self.objective, [self.img], [randv])[0] - tf.multiply( tfconst, randv) hv_op_rs = tf.reshape(hv_op_tmp, (tf.shape(hv_op_tmp)[0], -1)) hv_norm_op = tf.norm(hv_op_rs, axis=1, keepdims=True) hv_op_rs_normalize = hv_op_rs / hv_norm_op hv_op = tf.reshape(hv_op_rs_normalize, tf.shape(hv_op_tmp)) randv_rs = tf.reshape(randv, (tf.shape(randv)[0], -1)) randv_norm_op = tf.norm(randv_rs, axis=1) vhv_op = tf.reduce_sum(tf.multiply(randv_rs, hv_op_rs), axis=1) eig_est_prev = eig_est eig_est = vhv_op / tf.square(randv_norm_op) return (it + 1, hv_op, eig_est, eig_est_prev, tfconst) it = tf.constant(0) # compute largest abs eigenvalue result = tf.while_loop( cond, body, [it, self.randv, self.vhv_op, self.eig_est, tf.constant(0.0)]) # compute largest neg eigenvalue self.shiftconst = tf.placeholder(shape=(), dtype=tf.float32) result_1 = tf.while_loop( cond, body, [it, self.randv, self.vhv_op, self.eig_est, self.shiftconst]) # computing largest abs eig value and save result self.it = result[0] self.while_hv_op = result[1] self.while_eig = result[2] # computing largest neg eig value and save result self.it_1 = result_1[0] #self.while_eig_1 = tf.add(result_1[2], tfconst) self.while_eig_1 = tf.add(result_1[2], result_1[4]) show_tensor_op = False if show_tensor_op: print("====================") print("Define hessian_vector_product operator: ") print("hv_op_tmp = {}".format(hv_op_tmp)) print("hv_op_rs = {}".format(hv_op_rs)) print("self.hv_norm_op = {}".format(self.hv_norm_op)) print("hv_op_rs_normalize = {}".format(hv_op_rs_normalize)) print("self.hv_op = {}".format(self.hv_op)) print("self.grad_op = {}".format(self.grad_op)) print("randv_rs = {}".format(randv_rs)) print("self.randv_norm_op = {}".format(self.randv_norm_op)) print("self.vhv_op = {}".format(self.vhv_op)) print("self.eig_est = {}".format(self.eig_est)) print("====================") return self.img, self.output
def main(args): with tf.Session() as sess: if args['dataset'] == 'mnist': data, model = MNIST(), MNISTModel("models/mnist", sess) handpick = False inception = False if args['dataset'] == "cifar": data, model = CIFAR(), CIFARModel("models/cifar", sess) handpick = True inception = False if args['dataset'] == "imagenet": data, model = ImageNet(args['seed_imagenet']), InceptionModel(sess) handpick = True inception = True if args['adversarial'] != "none": model = MNISTModel("models/mnist_cwl2_admm" + str(args['adversarial']), sess) if args['temp'] and args['dataset'] == 'mnist': model = MNISTModel("models/mnist-distilled-" + str(args['temp']), sess) if args['temp'] and args['dataset'] == 'cifar': model = CIFARModel("models/cifar-distilled-" + str(args['temp']), sess) inputs, targets, labels, true_ids = generate_data_ST(data, model, samples=args['numimg'], samplesT=args['numimgT'], targeted=True, start=0, inception=inception, handpick=handpick, seed=args['seed']) #print(true_ids) if args['attack'] == 'L2C': attack = CarliniL2(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], binary_search_steps=args['binary_steps'], abort_early=args['abort_early']) if args['attack'] == 'L2LA2': attack = LADMML2re(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], layernum=args['layer_number'], use_kernel=args['use_kernel'], confidence=args['conf'], binary_search_steps=args['iteration_steps'], ro=args['ro'], abort_early=args['abort_early']) timestart = time.time() adv = attack.attack(inputs, targets) timeend = time.time() print("Took", timeend - timestart, "seconds to run", len(inputs), "samples.\n") if args['conf'] != 0: model = MNISTModel("models/mnist-distilled-100", sess) if args['kernel_bias']: EP = evaluate_perturbation_kb(args, sess, model, inputs) scores, l2 = EP(inputs, targets, adv) EPT = evaluate_perturbation_testset(args, sess, model, data.test_data) test_scores = EPT(data.test_data, data.test_labels) EP2 = evaluate_perturbation_kb_restore(args, sess, model, inputs) scores2 = EP2(inputs, targets, adv) EPT2 = evaluate_perturbation_testset(args, sess, model, data.test_data) test_scores2 = EPT2(data.test_data, data.test_labels) else: EP = evaluate_perturbation(args, sess, model, inputs) # scores = EP(inputs, targets, adv) # scores2 = EP2(inputs, targets, adv) score_count = [] score_count2 = [] score_count3 = [] score_count4 = [] for e, (sc) in enumerate(scores): if np.argmax(sc) == np.argmax(targets[e]): score_count.append(1) if e < args['numimg']: score_count4.append(1) else: score_count.append(0) if e < args['numimg']: score_count4.append(0) for e, (sc) in enumerate(scores): if np.argmax(sc) == np.argmax(labels[e]): score_count3.append(1) else: score_count3.append(0) for e, (sc2) in enumerate(scores2): if np.argmax(sc2) == np.argmax(labels[e]): score_count2.append(1) else: score_count2.append(0) test_score_count = [] test_score_count2 = [] for e, (tsc) in enumerate(test_scores): if np.argmax(tsc) == np.argmax(data.test_labels[e]): test_score_count.append(1) else: test_score_count.append(0) for e, (tsc2) in enumerate(test_scores2): if np.argmax(tsc2) == np.argmax(data.test_labels[e]): test_score_count2.append(1) else: test_score_count2.append(0) l0s = np.count_nonzero(adv) successrate = np.mean(score_count) successrate2 = np.mean(score_count2) successrate3 = np.mean(score_count3) test_successrate = np.mean(test_score_count) test_successrate2 = np.mean(test_score_count2) print('original model, success rate of T images for the original labels:', successrate2) print('modified model, success rate of T images for the original labels:', successrate3) print('modified model, success rate of T images for the target labels:', successrate) print('modified model, success rate of S imges for the target labels:', np.mean(score_count4)) print('modified model, success rate of test set for the original labels:', test_successrate) print('original model, success rate of test set for the original labels:', test_successrate2) print('l0 distance:', l0s) print('l2 distance:', l2)
K = int(sys.argv[3]) bias = float(sys.argv[4]) config = tf.ConfigProto() config.gpu_options.allow_growth=True set_session(tf.Session(config=config)) sess = Keras.get_session() Keras.set_learning_phase(False) np.random.seed(1) tf.set_random_seed(1) if dataset == "MNIST": data = MNIST() model = MNISTModel("../1-Models/MNIST") x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) elif dataset == "CIFAR": data = CIFAR() model = CIFARModel("../1-Models/CIFAR") x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3)) training_accuracy = np.mean(np.argmax(model.model.predict(data.train_data), axis = 1) == np.argmax(data.train_labels, axis = 1)) print("Training Accuracy: " + str(training_accuracy)) testing_accuracy = np.mean(np.argmax(model.model.predict(data.test_data), axis = 1) == np.argmax(data.test_labels, axis = 1)) print("Testing Accuracy: " + str(testing_accuracy)) X = data.train_data X_adv = np.load("../2-AEs/" + dataset + "/train_" + mode + ".npy") pred_original = model.model.predict(X)
def main(args): # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.7) with tf.Session() as sess: if args['dataset'] == 'mnist': data, model = MNIST(), MNISTModel("models/mnist", sess) handpick = False inception = False if args['dataset'] == "cifar": data, model = CIFAR(), CIFARModel("models/cifar", sess) handpick = True inception = False if args['dataset'] == "imagenet": data, model = ImageNet(args['seed_imagenet']), InceptionModel(sess) handpick = True inception = True if args['adversarial'] != "none": model = MNISTModel( "models/mnist_cwl2_admm" + str(args['adversarial']), sess) if args['temp'] and args['dataset'] == 'mnist': model = MNISTModel("models/mnist-distilled-" + str(args['temp']), sess) if args['temp'] and args['dataset'] == 'cifar': model = CIFARModel("models/cifar-distilled-" + str(args['temp']), sess) inputs, targets, labels, true_ids = generate_data( data, model, samples=args['numimg'], targeted=args['targeted'], start=0, inception=inception, handpick=handpick, seed=args['seed']) #print(true_ids) if args['attack'] == 'L2C': attack = CarliniL2(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], targeted=args['targeted'], binary_search_steps=args['binary_steps'], abort_early=args['abort_early']) if args['attack'] == 'L2BB': # score-based ZO-ADMM attack attack = LADMMBB(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], targeted=args['targeted'], confidence=args['conf'], binary_search_steps=args['iteration_steps'], ro=args['ro'], abort_early=args['abort_early'], gama=args['gama'], epi=args['epi'], alpha=args['alpha']) timestart = time.time() # adv = attack.attack(inputs, targets) adv, querycount, queryl2 = attack.attack(inputs, targets) timeend = time.time() print("Took", timeend - timestart, "seconds to run", len(inputs), "samples.\n") if args['train']: np.save('labels_train.npy', labels) np.save(str(args['attack']) + '_train.npy', adv) if (args['conf'] != 0): model = MNISTModel("models/mnist-distilled-100", sess) if args['targeted']: l1_l2_li_computation(args, data, model, adv, inception, inputs, targets, labels, true_ids, querycount, queryl2) else: l2_computation(args, data, model, adv, inception, inputs, targets, labels, true_ids, querycount, queryl2)
assert args.lsa ^ args.dsa, "Select either 'lsa' or 'dsa'" print(args) if args.d == "mnist": #(x_train, y_train), (x_test, y_test) = mnist.load_data() data = MNIST() x_train = data.train_data y_train = data.train_labels x_test = data.test_data y_test = data.test_labels x_train = x_train.reshape(-1, 28, 28, 1) x_test = x_test.reshape(-1, 28, 28, 1) # Load pre-trained model. #model = load_model("./model/model_mnist.h5") model = MNISTModel("./models/mnist") model = model.model model.summary() # You can select some layers you want to test. # layer_names = ["activation_1"] # layer_names = ["activation_2"] layer_names = ["activation_3"] # Load target set. #x_target = np.load("./adv/adv_mnist_{}.npy".format(args.target)) #x_target = [] #for i in range(1, 10): # target_img = imread("/tmp/adv_result_{}_to_0.jpg".format(i)) # x_target.append(target_img)
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--model_path', type=str, default="models/", help="Path to save trained model.") parser.add_argument( '--pert_data', type=str, default='./MNIST_data/perturbed.npz', help='Path to LFW perturbed data.') parser.add_argument( '--orig_data', type=str, default='MNIST_data/B.npz', help="Path to original data." ) parser.add_argument( '--image_size', type=int, default=28, help='Size of input images.') parser.add_argument( '--num_channels', type=int, default=1, help='Number of channels in input images.') parser.add_argument( '--train_new', dest='train_new', action='store_true', help='Train a new classifier.') parser.set_defaults(train_new=False) args = parser.parse_args() loaded = np.load(args.pert_data) pert_data = np.concatenate((loaded['train_data'], loaded['test_data'])) pert_data = pert_data.reshape(pert_data.shape[0], args.image_size, args.image_size, -1) pert_evil_label = np.concatenate((loaded['train_label'], loaded['test_label'])) pert_good_label = odd_even_labels(pert_evil_label).\ argmax(axis=1) pert_evil_label = np.argmax(pert_evil_label, axis=1) loaded = np.load(args.orig_data) orig_data = np.concatenate((loaded['train_data'], loaded['test_data'])) orig_data = orig_data.reshape(orig_data.shape[0], args.image_size, args.image_size, -1) orig_evil_label = np.concatenate((loaded['train_label'], loaded['test_label'])) orig_good_label = odd_even_labels(orig_evil_label).\ argmax(axis=1) orig_evil_label = np.argmax(orig_evil_label, axis=1) print 'Original data shape:', orig_data.shape good_used = OddEvenMNIST(args.model_path + 'A_odd_even') good_left = OddEvenMNIST(args.model_path + 'C_odd_even') evil_used = MNISTModel(args.model_path + 'A_digits') evil_left = MNISTModel(args.model_path + 'C_digits') evil_pair = (orig_evil_label, pert_evil_label) good_pair = (orig_good_label, pert_good_label) for model, label_pair, name in zip( [evil_used, good_used, evil_left, good_left], [evil_pair, good_pair, evil_pair, good_pair], ['Used Evil', 'Used Good', 'Left-out Evil', 'Left-out Good']): org_true, pert_true = label_pair print name + ':' org_pred = np.argmax(model.model.predict(orig_data), axis=1) print org_pred.shape print org_true.shape org_acc = accuracy_score(org_true, org_pred) print '\tOriginal Accuracy: %.4f' % org_acc dst_pred = np.argmax(model.model.predict(pert_data), axis=1) dst_acc = accuracy_score(pert_true, dst_pred) print '\tPerturbed Accuracy: %.4f' % dst_acc if args.train_new: # Train a new classifier with the new training data, test with original test data. raise NotImplementedError( 'Training new classifier is not yet implemented.')
def load_model(self, dataset="mnist", model_name="2-layer", model=None, batch_size=0, compute_slope=False): """ model: if set to None, then load dataset with model_name. Otherwise use the model directly. dataset: mnist, cifar and imagenet. recommend to use mnist and cifar as a starting point. model_name: possible options are 2-layer, distilled, and normal """ import tensorflow as tf from setup_cifar import CIFAR, CIFARModel, TwoLayerCIFARModel from setup_mnist import MNIST, MNISTModel, TwoLayerMNISTModel from setup_imagenet import ImageNet, ImageNetModel # if set this to true, we will use the logit layer output instead of probability # the logit layer's gradients are usually larger and more stable output_logits = True self.dataset = dataset self.model_name = model_name if model is None: print('Loading model...') if dataset == "mnist": self.batch_size = 1024 if model_name == "2-layer": model = TwoLayerMNISTModel("models/mnist_2layer", self.sess, not output_logits) elif model_name == "normal": model = MNISTModel("models/mnist", self.sess, not output_logits) elif model_name == "brelu": model = MNISTModel("models/mnist_brelu", self.sess, not output_logits, use_brelu=True) elif model_name == "distilled": model = MNISTModel("models/mnist-distilled-100", self.sess, not output_logits) else: raise (RuntimeError("incorrect model option")) elif dataset == "cifar": self.batch_size = 1024 if model_name == "2-layer": model = TwoLayerCIFARModel("models/cifar_2layer", self.sess, not output_logits) elif model_name == "normal": model = CIFARModel("models/cifar", self.sess, not output_logits) elif model_name == "brelu": model = CIFARModel("models/cifar_brelu", self.sess, not output_logits, use_brelu=True) elif model_name == "distilled": model = CIFARModel("models/cifar-distilled-100", self.sess, not output_logits) else: raise (RuntimeError("incorrect model option")) elif dataset == "imagenet": self.batch_size = 32 model = ImageNetModel(self.sess, use_softmax=not output_logits, model_name=model_name, create_prediction=False) else: raise (RuntimeError("dataset unknown")) self.model = model self.compute_slope = compute_slope if batch_size != 0: self.batch_size = batch_size # img is the placeholder for image input self.img = tf.placeholder(shape=[ None, model.image_size, model.image_size, model.num_channels ], dtype=tf.float32) # output is the output tensor of the entire network self.output = model.predict(self.img) # create the graph to compute gradient # get the desired true label and target label self.true_label = tf.placeholder(dtype=tf.int32, shape=[]) self.target_label = tf.placeholder(dtype=tf.int32, shape=[]) true_output = self.output[:, self.true_label] target_output = self.output[:, self.target_label] # get the different self.objective = true_output - target_output # get the gradient self.grad_op = tf.gradients(self.objective, self.img)[0] # compute gradient norm grad_op_rs = tf.reshape(self.grad_op, (tf.shape(self.grad_op)[0], -1)) self.grad_2_norm_op = tf.norm(grad_op_rs, axis=1) self.grad_1_norm_op = tf.norm(grad_op_rs, ord=1, axis=1) self.grad_inf_norm_op = tf.norm(grad_op_rs, ord=np.inf, axis=1) return self.img, self.output
if __name__ == "__main__": config = tf.ConfigProto() config.gpu_options.allow_growth = True #the path of storing UAE dir_adv = 'unsupervised_attack/' #load model of Autoencoder dir_model = 'models/MNIST/convAE' data = MNIST() inputs = data.train_data tf.reset_default_graph() with tf.Session(config=config) as sess: model = MNISTModel(dir_model, sess) attack = MINE_unsupervised(sess, model, batch_size=1, max_iterations=40, confidence=0, epsilon=1.0, mine_batch='conv') adv, Mi = attack.attack(inputs) np.save(dir_adv + 'adv.npy', image) np.save(dir_adv + 'mi.npy', Mi) tf.get_default_graph().finalize()
def main(args): temp_encoder = encoder(level=args['level']) with tf.Session() as sess: use_log = not args['use_zvalue'] is_inception = args['dataset'] == "imagenet" # load network print('Loading model', args['dataset']) if args['dataset'] == "mnist": data, model = MNIST(), MNISTModel("models/mnist", sess, use_log) # data, model = MNIST(), MNISTModel("models/mnist-distilled-100", sess, use_log) elif args['dataset'] == "cifar10": #data, model = CIFAR(), CIFARModel("models/cifar", sess, use_log) # data, model = CIFAR(), CIFARModel("models/cifar-distilled-100", sess, use_log) data, model = CIFAR(), CIFAR_WIDE("models/wide_resnet", sess, use_log) elif args['dataset'] == "imagenet": data, model = ImageNet(), InceptionModel(sess, use_log) print('Done...') if args['numimg'] == 0: args['numimg'] = len(data.test_labels) - args['firstimg'] print('Using', args['numimg'], 'test images') # load attack module if args['attack'] == "white": # batch size 1, optimize on 1 image at a time, rather than optimizing images jointly attack = CarliniL2(sess, model, batch_size=1, max_iterations=args['maxiter'], print_every=args['print_every'], early_stop_iters=args['early_stop_iters'], confidence=0, learning_rate=args['lr'], initial_const=args['init_const'], binary_search_steps=args['binary_steps'], targeted=not args['untargeted'], use_log=use_log, adam_beta1=args['adam_beta1'], adam_beta2=args['adam_beta2']) else: # batch size 128, optimize on 128 coordinates of a single image attack = BlackBoxL2(sess, model, batch_size=128, max_iterations=args['maxiter'], print_every=args['print_every'], early_stop_iters=args['early_stop_iters'], confidence=0, learning_rate=args['lr'], initial_const=args['init_const'], binary_search_steps=args['binary_steps'], targeted=not args['untargeted'], use_log=use_log, use_tanh=args['use_tanh'], use_resize=args['use_resize'], adam_beta1=args['adam_beta1'], adam_beta2=args['adam_beta2'], reset_adam_after_found=args['reset_adam'], solver=args['solver'], save_ckpts=args['save_ckpts'], load_checkpoint=args['load_ckpt'], start_iter=args['start_iter'], init_size=args['init_size'], use_importance=not args['uniform']) random.seed(args['seed']) np.random.seed(args['seed']) print('Generate data') all_inputs, all_targets, all_labels, all_true_ids, encoding_all = generate_data( data, samples=args['numimg'], targeted=not args['untargeted'], start=args['firstimg'], inception=is_inception) print('Done...') #print('all_inputs : ', all_inputs.shape) #print('encoding_all : ',encoding_all.shape) os.system("mkdir -p {}/{}".format(args['save'], args['dataset'])) img_no = 0 total_success = 0 l2_total = 0.0 origin_correct = 0 adv_correct = 0 for i in range(all_true_ids.size): print(' adversarial_image_no: ', i) inputs = all_inputs[i:i + 1] encoding_inputs = encoding_all[i:i + 1] #print('encoding_inputs shape: ', encoding_inputs) targets = all_targets[i:i + 1] labels = all_labels[i:i + 1] print("true labels:", np.argmax(labels), labels) print("target:", np.argmax(targets), targets) # test if the image is correctly classified original_predict = model.model.predict(encoding_inputs) original_predict = np.squeeze(original_predict) original_prob = np.sort(original_predict) original_class = np.argsort(original_predict) print("original probabilities:", original_prob[-1:-6:-1]) print("original classification:", original_class[-1:-6:-1]) print("original probabilities (most unlikely):", original_prob[:6]) print("original classification (most unlikely):", original_class[:6]) if original_class[-1] != np.argmax(labels): print( "skip wrongly classified image no. {}, original class {}, classified as {}" .format(i, np.argmax(labels), original_class[-1])) continue origin_correct += np.argmax(labels, 1) == original_class[-1] img_no += 1 timestart = time.time() adv, const = attack.attack_batch(inputs, targets) if type(const) is list: const = const[0] if len(adv.shape) == 3: adv = adv.reshape((1, ) + adv.shape) timeend = time.time() l2_distortion = np.sum((adv - inputs)**2)**.5 ##### llj encode_adv = np.transpose(adv, axes=(0, 3, 1, 2)) channel0, channel1, channel2 = encode_adv[:, 0, :, :], encode_adv[:, 1, :, :], encode_adv[:, 2, :, :] channel0, channel1, channel2 = temp_encoder.tempencoding( channel0), temp_encoder.tempencoding( channel1), temp_encoder.tempencoding(channel2) encode_adv = np.concatenate([channel0, channel1, channel2], axis=1) encode_adv = np.transpose(encode_adv, axes=(0, 2, 3, 1)) #### llj adversarial_predict = model.model.predict(encode_adv) adversarial_predict = np.squeeze(adversarial_predict) adversarial_prob = np.sort(adversarial_predict) adversarial_class = np.argsort(adversarial_predict) print("adversarial probabilities:", adversarial_prob[-1:-6:-1]) print("adversarial classification:", adversarial_class[-1:-6:-1]) adv_correct += np.argmax(labels, 1) == adversarial_class[-1] success = False if args['untargeted']: if adversarial_class[-1] != original_class[-1]: success = True else: if adversarial_class[-1] == np.argmax(targets): success = True if l2_distortion > 20.0: success = False if success: total_success += 1 l2_total += l2_distortion suffix = "id{}_seq{}_prev{}_adv{}_{}_dist{}".format( all_true_ids[i], i, original_class[-1], adversarial_class[-1], success, l2_distortion) print("Saving to", suffix) show( inputs, "{}/{}/{}_original_{}.png".format(args['save'], args['dataset'], img_no, suffix)) show( adv, "{}/{}/{}_adversarial_{}.png".format(args['save'], args['dataset'], img_no, suffix)) show( adv - inputs, "{}/{}/{}_diff_{}.png".format(args['save'], args['dataset'], img_no, suffix)) print( "[STATS][L1] total = {}, seq = {}, id = {}, time = {:.3f}, success = {}, const = {:.6f}, prev_class = {}, new_class = {}, distortion = {:.5f}, success_rate = {:.3f}, l2_avg = {:.5f}" .format(img_no, i, all_true_ids[i], timeend - timestart, success, const, original_class[-1], adversarial_class[-1], l2_distortion, total_success / float(img_no), 0 if total_success == 0 else l2_total / total_success)) sys.stdout.flush() print(' origin accuracy : ', 100.0 * origin_correct / all_true_ids.size) print(' adv accuracy : ', 100.0 * adv_correct / all_true_ids.size)
def main(args): with tf.Session() as sess: random.seed(SEED) np.random.seed(SEED) tf.set_random_seed(SEED) image_id_set = np.random.choice(range(1000), args["image_number"] * 3, replace=False) #image_id_set = np.random.randint(1, 1000, args["image_number"] ) arg_max_iter = args['maxiter'] ### max number of iterations arg_init_const = args[ 'init_const'] ### regularization prior to attack loss arg_kappa = args['kappa'] ### attack confidence level arg_q = args['q'] ### number of random direction vectors arg_mode = args['mode'] ### algorithm name arg_save_iteration = args['save_iteration'] arg_Dataset = args["dataset"] arg_targeted_attack = args["targeted_attack"] arg_bsz = args["mini_batch_sz"] idx_lr = args["lr_idx"] ## load classofier For MNIST and CIFAR pixel value range is [-0.5,0.5] if (arg_Dataset == 'mnist'): data, model = MNIST(), MNISTModel("models/mnist", sess, True) elif (arg_Dataset == 'cifar10'): data, model = CIFAR(), CIFARModel("models/cifar", sess, True) elif (arg_Dataset == 'imagenet'): data, model = ImageNet(SEED), InceptionModel(sess, True) else: print('Please specify a valid dataset') succ_count, ii, iii = 0, 0, 0 final_distortion_count,first_iteration_count, first_distortion_count = [], [], [] while iii < args["image_number"]: ii = ii + 1 image_id = image_id_set[ii] # if image_id!= 836: continue # for test only orig_prob, orig_class, orig_prob_str = util.model_prediction( model, np.expand_dims(data.test_data[image_id], axis=0)) ## orig_class: predicted label; if arg_targeted_attack: ### target attack target_label = np.remainder(orig_class + 1, 10) else: target_label = orig_class orig_img, target = util.generate_data(data, image_id, target_label) # shape of orig_img is (1,28,28,1) in [-0.5, 0.5] true_label_list = np.argmax(data.test_labels, axis=1) true_label = true_label_list[image_id] print("Image ID:{}, infer label:{}, true label:{}".format( image_id, orig_class, true_label)) if true_label != orig_class: print( "True Label is different from the original prediction, pass!" ) continue else: iii = iii + 1 print('\n', iii, '/', args["image_number"]) ## parameter d = orig_img.size # feature dim print("dimension = ", d) # mu=1/d**2 # smoothing parameter q = arg_q + 0 I = arg_max_iter + 0 kappa = arg_kappa + 0 const = arg_init_const + 0 ## flatten image to vec orig_img_vec = np.resize(orig_img, (1, d)) delta_adv = np.zeros((1, d)) ### initialized adv. perturbation #delta_adv = np.random.uniform(-16/255,16/255,(1,d)) ## w adv image initialization if args["constraint"] == 'uncons': # * 0.999999 to avoid +-0.5 return +-infinity w_ori_img_vec = np.arctanh( 2 * (orig_img_vec) * 0.999999 ) # in real value, note that orig_img_vec in [-0.5, 0.5] w_img_vec = np.arctanh( 2 * (np.clip(orig_img_vec + delta_adv, -0.5, 0.5)) * 0.999999) else: w_ori_img_vec = orig_img_vec.copy() w_img_vec = np.clip(w_ori_img_vec + delta_adv, -0.5, 0.5) # ## test ## # for test_value in w_ori_img_vec[0, :]: # if np.isnan(test_value) or np.isinf(test_value): # print(test_value) # initialize the best solution & best loss best_adv_img = [] # successful adv image in [-0.5, 0.5] best_delta = [] # best perturbation best_distortion = (0.5 * d)**2 # threshold for best perturbation total_loss = np.zeros(I) ## I: max iters l2s_loss_all = np.zeros(I) attack_flag = False first_flag = True ## record first successful attack # parameter setting for ZO gradient estimation mu = args["mu"] ### smoothing parameter ## learning rate base_lr = args["lr"] if arg_mode == "ZOAdaMM": ## parameter initialization for AdaMM v_init = 1e-7 #0.00001 v_hat = v_init * np.ones((1, d)) v = v_init * np.ones((1, d)) m = np.zeros((1, d)) # momentum parameter for first and second order moment beta_1 = 0.9 beta_2 = 0.9 # only used by AMSGrad print(beta_1, beta_2) #for i in tqdm(range(I)): for i in range(I): if args["decay_lr"]: base_lr = args["lr"] / np.sqrt(i + 1) ## Total loss evaluation if args["constraint"] == 'uncons': total_loss[i], l2s_loss_all[ i] = function_evaluation_uncons( w_img_vec, kappa, target_label, const, model, orig_img, arg_targeted_attack) else: total_loss[i], l2s_loss_all[i] = function_evaluation_cons( w_img_vec, kappa, target_label, const, model, orig_img, arg_targeted_attack) ## gradient estimation w.r.t. w_img_vec if arg_mode == "ZOSCD": grad_est = grad_coord_estimation(mu, q, w_img_vec, d, kappa, target_label, const, model, orig_img, arg_targeted_attack, args["constraint"]) elif arg_mode == "ZONES": grad_est = gradient_estimation_NES(mu, q, w_img_vec, d, kappa, target_label, const, model, orig_img, arg_targeted_attack, args["constraint"]) else: grad_est = gradient_estimation_v2(mu, q, w_img_vec, d, kappa, target_label, const, model, orig_img, arg_targeted_attack, args["constraint"]) # if np.remainder(i,50)==0: # print("total loss:",total_loss[i]) # print(np.linalg.norm(grad_est, np.inf)) ## ZO-Attack, unconstrained optimization formulation if arg_mode == "ZOSGD": delta_adv = delta_adv - base_lr * grad_est if arg_mode == "ZOsignSGD": delta_adv = delta_adv - base_lr * np.sign(grad_est) if arg_mode == "ZOSCD": delta_adv = delta_adv - base_lr * grad_est if arg_mode == "ZOAdaMM": m = beta_1 * m + (1 - beta_1) * grad_est v = beta_2 * v + (1 - beta_2) * np.square(grad_est) ### vt v_hat = np.maximum(v_hat, v) #print(np.mean(v_hat)) delta_adv = delta_adv - base_lr * m / np.sqrt(v_hat) if args["constraint"] == 'cons': tmp = delta_adv.copy() #X_temp = orig_img_vec.reshape((-1,1)) #V_temp2 = np.diag(np.sqrt(v_hat.reshape(-1)+1e-10)) V_temp = np.sqrt(v_hat.reshape(1, -1)) delta_adv = projection_box(tmp, orig_img_vec, V_temp, -0.5, 0.5) #delta_adv2 = projection_box_2(tmp, X_temp, V_temp2, -0.5, 0.5) # v_init = 1e-2 #0.00001 # v = v_init * np.ones((1, d)) # m = np.zeros((1, d)) # # momentum parameter for first and second order moment # beta_1 = 0.9 # beta_2 = 0.99 # only used by AMSGrad # m = beta_1 * m + (1-beta_1) * grad_est # v = np.maximum(beta_2 * v + (1-beta_2) * np.square(grad_est),v) # delta_adv = delta_adv - base_lr * m /np.sqrt(v+1e-10) # if args["constraint"] == 'cons': # V_temp = np.diag(np.sqrt(v.reshape(-1)+1e-10)) # X_temp = orig_img_vec.reshape((-1,1)) # delta_adv = projection_box(delta_adv, X_temp, V_temp, -0.5, 0.5) if arg_mode == "ZOSMD": delta_adv = delta_adv - 0.5 * base_lr * grad_est # delta_adv = delta_adv - base_lr* grad_est if args["constraint"] == 'cons': #V_temp = np.eye(orig_img_vec.size) V_temp = np.ones_like(orig_img_vec) #X_temp = orig_img_vec.reshape((-1,1)) delta_adv = projection_box(delta_adv, orig_img_vec, V_temp, -0.5, 0.5) if arg_mode == "ZOPSGD": delta_adv = delta_adv - base_lr * grad_est if args["constraint"] == 'cons': #V_temp = np.eye(orig_img_vec.size) V_temp = np.ones_like(orig_img_vec) #X_temp = orig_img_vec.reshape((-1,1)) delta_adv = projection_box(delta_adv, orig_img_vec, V_temp, -0.5, 0.5) if arg_mode == "ZONES": delta_adv = delta_adv - base_lr * np.sign(grad_est) if args["constraint"] == 'cons': #V_temp = np.eye(orig_img_vec.size) V_temp = np.ones_like(orig_img_vec) #X = orig_img_vec.reshape((-1,1)) delta_adv = projection_box(delta_adv, orig_img_vec, V_temp, -0.5, 0.5) # if arg_mode == "ZO-AdaFom": # m = beta_1 * m + (1-beta_1) * grad_est # v = v* (float(i)/(i+1)) + np.square(grad_est)/(i+1) # w_img_vec = w_img_vec - base_lr * m/np.sqrt(v) ## ### adv. example update w_img_vec = w_ori_img_vec + delta_adv ## covert back to adv_img in [-0.5 , 0.5] if args["constraint"] == 'uncons': adv_img_vec = 0.5 * np.tanh((w_img_vec)) / 0.999999 # else: adv_img_vec = w_img_vec.copy() adv_img = np.resize(adv_img_vec, orig_img.shape) ## update the best solution in the iterations attack_prob, _, _ = util.model_prediction(model, adv_img) target_prob = attack_prob[0, target_label] attack_prob_tmp = attack_prob.copy() attack_prob_tmp[0, target_label] = 0 other_prob = np.amax(attack_prob_tmp) if args["print_iteration"]: if np.remainder(i + 1, 1) == 0: if true_label != np.argmax(attack_prob): print( "Iter %d (Succ): ID = %d, lr = %3.5f, decay = %d, ZO = %s %s, loss = %3.5f, l2sdist = %3.5f, TL = %d, PL = %d" % (i + 1, image_id, args["lr"], int(args["decay_lr"]), arg_mode, args["constraint"], total_loss[i], l2s_loss_all[i], true_label, np.argmax(attack_prob))) else: print( "Iter %d (Fail): ID = %d, lr = %3.6f, decay = %d, ZO = %s %s, loss = %3.5f, l2sdist = %3.5f, TL = %d, PL = %d" % (i + 1, image_id, args["lr"], int(args["decay_lr"]), arg_mode, args["constraint"], total_loss[i], l2s_loss_all[i], true_label, np.argmax(attack_prob))) if arg_save_iteration: os.system("mkdir Examples") if (np.logical_or( true_label != np.argmax(attack_prob), np.remainder(i + 1, 10) == 0)): ## every 10 iterations suffix = "id_{}_Mode_{}_True_{}_Pred_{}_Ite_{}".format( image_id, arg_mode, true_label, np.argmax(attack_prob), i + 1) # util.save_img(adv_img, "Examples/{}.png".format(suffix)) if arg_targeted_attack: if (np.log(target_prob + 1e-10) - np.log(other_prob + 1e-10) >= kappa): # check attack confidence if (distortion(adv_img, orig_img) < best_distortion): # check distortion # print('best distortion obtained at',i,'-th iteration') best_adv_img = adv_img best_distortion = distortion(adv_img, orig_img) best_delta = adv_img - orig_img best_iteration = i + 1 adv_class = np.argmax(attack_prob) attack_flag = True ## Record first attack if (first_flag): first_flag = False ### once gets into this, it will no longer record the next sucessful attack first_adv_img = adv_img first_distortion = distortion( adv_img, orig_img) first_delta = adv_img - orig_img first_class = adv_class first_iteration = i + 1 else: if (np.log(other_prob + 1e-10) - np.log(target_prob + 1e-10) >= kappa): # check attack confidence if (distortion(adv_img, orig_img) < best_distortion): # check distortion # print('best distortion obtained at',i,'-th iteration') best_adv_img = adv_img best_distortion = distortion(adv_img, orig_img) best_delta = adv_img - orig_img best_iteration = i + 1 adv_class = np.argmax(attack_prob) attack_flag = True ## Record first attack if (first_flag): first_flag = False first_adv_img = adv_img first_distortion = distortion( adv_img, orig_img) first_delta = adv_img - orig_img first_class = adv_class first_iteration = i + 1 if (attack_flag): # os.system("mkdir Results_SL") # ## best attack (final attack) # suffix = "id_{}_Mode_{}_True_{}_Pred_{}".format(image_id, arg_mode, true_label, orig_class) ## orig_class, predicted label # suffix2 = "id_{}_Mode_{}_True_{}_Pred_{}".format(image_id, arg_mode, true_label, adv_class) # suffix3 = "id_{}_Mode_{}".format(image_id, arg_mode) # ### save original image # util.save_img(orig_img, "Results_SL/id_{}.png".format(image_id)) # util.save_img(orig_img, "Results_SL/{}_Orig.png".format(suffix)) # ### adv. image # util.save_img(best_adv_img, "Results_SL/{}_Adv_best.png".format(suffix2)) # ### adv. perturbation # util.save_img(best_delta, "Results_SL/{}_Delta_best.png".format(suffix3)) # # # ## first attack # suffix4 = "id_{}_Mode_{}_True_{}_Pred_{}".format(image_id, arg_mode, true_label, first_class) # ## first adv. imag # util.save_img(first_adv_img, "Results_SL/{}_Adv_first.png".format(suffix4)) # ### first adv. perturbation # util.save_img(first_delta, "Results_SL/{}_Delta_first.png".format(suffix3)) ## save data succ_count = succ_count + 1 final_distortion_count.append(l2s_loss_all[-1]) first_distortion_count.append(first_distortion) first_iteration_count.append(first_iteration) suffix0 = "retperimage2/id_{}_Mode_{}_{}_lr_{}_decay_{}_case{}_per".format( image_id, arg_mode, args["constraint"], args["lr"], int(args["decay_lr"]), args["exp_code"]) np.savez("{}".format(suffix0), id=image_id, mode=arg_mode, loss=total_loss, perturbation=l2s_loss_all, best_distortion=best_distortion, first_distortion=first_distortion, first_iteration=first_iteration, best_iteation=best_iteration, learn_rate=args["lr"], decay_lr=args["decay_lr"], attack_flag=attack_flag) ## print print("It takes {} iteations to find the first attack".format( first_iteration)) # print(total_loss) else: ## save data suffix0 = "retperimage2/id_{}_Mode_{}_{}_lr_{}_decay_{}_case{}_per".format( image_id, arg_mode, args["constraint"], args["lr"], int(args["decay_lr"]), args["exp_code"]) np.savez("{}".format(suffix0), id=image_id, mode=arg_mode, loss=total_loss, perturbation=l2s_loss_all, best_distortion=best_distortion, learn_rate=args["lr"], decay_lr=args["decay_lr"], attack_flag=attack_flag) print("Attack Fails") sys.stdout.flush() print('succ rate:', succ_count / args["image_number"]) print('average first success l2', np.mean(first_distortion_count)) print('average first itrs', np.mean(first_iteration_count)) print('average l2:', np.mean(final_distortion_count), ' best l2:', np.min(final_distortion_count), ' worst l2:', np.max(final_distortion_count))
def main(args): with tf.Session() as sess: if (args['dataset'] == 'mnist'): data = MNIST() inception = False if (args['adversarial'] != "none"): model = MNISTModel( "models/mnist_cw" + str(args['adversarial']), sess) elif (args['temp']): model = MNISTModel( "models/mnist-distilled-" + str(args['temp']), sess) else: model = MNISTModel("models/mnist", sess) if (args['dataset'] == "cifar"): data = CIFAR() inception = False if (args['adversarial'] != "none"): model = CIFARModel( "models/cifar_cw" + str(args['adversarial']), sess) elif (args['temp']): model = CIFARModel( "models/cifar-distilled-" + str(args['temp']), sess) else: model = CIFARModel("models/cifar", sess) if (args['dataset'] == "imagenet"): data, model = ImageNet(args['seed_imagenet'], 2 * args['numimg']), InceptionModel(sess) inception = True inputs, targets, labels, true_ids = generate_data( data, model, samples=args['numimg'], targeted=not args['untargeted'], target_num=args['targetnum'], inception=inception, train=args['train'], seed=args['seed']) timestart = time.time() if (args['restore_np']): if (args['train']): adv = np.load( str(args['dataset']) + '_' + str(args['attack']) + '_train.npy') else: adv = np.load( str(args['dataset']) + '_' + str(args['attack']) + '.npy') else: if (args['attack'] == 'L2'): attack = CarliniL2(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], initial_const=args['init_const'], binary_search_steps=args['binary_steps'], targeted=not args['untargeted'], beta=args['beta'], abort_early=args['abort_early']) adv = attack.attack(inputs, targets) if (args['attack'] == 'L1'): attack = EADL1(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], initial_const=args['init_const'], binary_search_steps=args['binary_steps'], targeted=not args['untargeted'], beta=args['beta'], abort_early=args['abort_early']) adv = attack.attack(inputs, targets) if (args['attack'] == 'EN'): attack = EADEN(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], initial_const=args['init_const'], binary_search_steps=args['binary_steps'], targeted=not args['untargeted'], beta=args['beta'], abort_early=args['abort_early']) adv = attack.attack(inputs, targets) """If untargeted, pass labels instead of targets""" if (args['attack'] == 'FGSM'): attack = FGM(sess, model, batch_size=args['batch_size'], ord=np.inf, eps=args['eps'], inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'FGML1'): attack = FGM(sess, model, batch_size=args['batch_size'], ord=1, eps=args['eps'], inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'FGML2'): attack = FGM(sess, model, batch_size=args['batch_size'], ord=2, eps=args['eps'], inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'IFGSM'): attack = IFGM(sess, model, batch_size=args['batch_size'], ord=np.inf, eps=args['eps'], inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'IFGML1'): attack = IFGM(sess, model, batch_size=args['batch_size'], ord=1, eps=args['eps'], inception=inception) adv = attack.attack(inputs, targets) if (args['attack'] == 'IFGML2'): attack = IFGM(sess, model, batch_size=args['batch_size'], ord=2, eps=args['eps'], inception=inception) adv = attack.attack(inputs, targets) timeend = time.time() if args['untargeted']: num_targets = 1 else: num_targets = args['targetnum'] print("Took", timeend - timestart, "seconds to run", len(inputs) / num_targets, "random instances.") if (args['save_np']): if (args['train']): np.save(str(args['dataset']) + '_labels_train.npy', labels) np.save( str(args['dataset']) + '_' + str(args['attack']) + '_train.npy', adv) else: np.save( str(args['dataset']) + '_' + str(args['attack'] + '.npy'), adv) r_best_ = [] d_best_l1_ = [] d_best_l2_ = [] d_best_linf_ = [] r_average_ = [] d_average_l1_ = [] d_average_l2_ = [] d_average_linf_ = [] r_worst_ = [] d_worst_l1_ = [] d_worst_l2_ = [] d_worst_linf_ = [] #Transferability Tests model_ = [] model_.append(model) if (args['targetmodel'] != "same"): if (args['targetmodel'] == "dd_100"): model_.append(MNISTModel("models/mnist-distilled-100", sess)) num_models = len(model_) if (args['show']): if not os.path.exists( str(args['save']) + "/" + str(args['dataset']) + "/" + str(args['attack'])): os.makedirs( str(args['save']) + "/" + str(args['dataset']) + "/" + str(args['attack'])) for m, model in enumerate(model_): r_best = [] d_best_l1 = [] d_best_l2 = [] d_best_linf = [] r_average = [] d_average_l1 = [] d_average_l2 = [] d_average_linf = [] r_worst = [] d_worst_l1 = [] d_worst_l2 = [] d_worst_linf = [] for i in range(0, len(inputs), num_targets): pred = [] for j in range(i, i + num_targets): if inception: pred.append( np.reshape(model.model.predict(adv[j:j + 1]), (data.test_labels[0:1].shape))) else: pred.append(model.model.predict(adv[j:j + 1])) dist_l1 = 1e10 dist_l1_index = 1e10 dist_linf = 1e10 dist_linf_index = 1e10 dist_l2 = 1e10 dist_l2_index = 1e10 for k, j in enumerate(range(i, i + num_targets)): success = False if (args['untargeted']): if (np.argmax(pred[k], 1) != np.argmax( targets[j:j + 1], 1)): success = True else: if (np.argmax(pred[k], 1) == np.argmax(targets[j:j + 1], 1)): success = True if (success): if (np.sum(np.abs(adv[j] - inputs[j])) < dist_l1): dist_l1 = np.sum(np.abs(adv[j] - inputs[j])) dist_l1_index = j if (np.amax(np.abs(adv[j] - inputs[j])) < dist_linf): dist_linf = np.amax(np.abs(adv[j] - inputs[j])) dist_linf_index = j if ((np.sum((adv[j] - inputs[j])**2)**.5) < dist_l2): dist_l2 = (np.sum((adv[j] - inputs[j])**2)**.5) dist_l2_index = j if (dist_l1_index != 1e10): d_best_l2.append((np.sum( (adv[dist_l2_index] - inputs[dist_l2_index])**2)**.5)) d_best_l1.append( np.sum( np.abs(adv[dist_l1_index] - inputs[dist_l1_index]))) d_best_linf.append( np.amax( np.abs(adv[dist_linf_index] - inputs[dist_linf_index]))) r_best.append(1) else: r_best.append(0) rand_int = np.random.randint(i, i + num_targets) if inception: pred_r = np.reshape( model.model.predict(adv[rand_int:rand_int + 1]), (data.test_labels[0:1].shape)) else: pred_r = model.model.predict(adv[rand_int:rand_int + 1]) success_average = False if (args['untargeted']): if (np.argmax(pred_r, 1) != np.argmax( targets[rand_int:rand_int + 1], 1)): success_average = True else: if (np.argmax(pred_r, 1) == np.argmax( targets[rand_int:rand_int + 1], 1)): success_average = True if success_average: r_average.append(1) d_average_l2.append( np.sum((adv[rand_int] - inputs[rand_int])**2)**.5) d_average_l1.append( np.sum(np.abs(adv[rand_int] - inputs[rand_int]))) d_average_linf.append( np.amax(np.abs(adv[rand_int] - inputs[rand_int]))) else: r_average.append(0) dist_l1 = 0 dist_l1_index = 1e10 dist_linf = 0 dist_linf_index = 1e10 dist_l2 = 0 dist_l2_index = 1e10 for k, j in enumerate(range(i, i + num_targets)): failure = True if (args['untargeted']): if (np.argmax(pred[k], 1) != np.argmax( targets[j:j + 1], 1)): failure = False else: if (np.argmax(pred[k], 1) == np.argmax(targets[j:j + 1], 1)): failure = False if failure: r_worst.append(0) dist_l1_index = 1e10 dist_l2_index = 1e10 dist_linf_index = 1e10 break else: if (np.sum(np.abs(adv[j] - inputs[j])) > dist_l1): dist_l1 = np.sum(np.abs(adv[j] - inputs[j])) dist_l1_index = j if (np.amax(np.abs(adv[j] - inputs[j])) > dist_linf): dist_linf = np.amax(np.abs(adv[j] - inputs[j])) dist_linf_index = j if ((np.sum((adv[j] - inputs[j])**2)**.5) > dist_l2): dist_l2 = (np.sum((adv[j] - inputs[j])**2)**.5) dist_l2_index = j if (dist_l1_index != 1e10): d_worst_l2.append((np.sum( (adv[dist_l2_index] - inputs[dist_l2_index])**2)**.5)) d_worst_l1.append( np.sum( np.abs(adv[dist_l1_index] - inputs[dist_l1_index]))) d_worst_linf.append( np.amax( np.abs(adv[dist_linf_index] - inputs[dist_linf_index]))) r_worst.append(1) if (args['show'] and m == (num_models - 1)): for j in range(i, i + num_targets): target_id = np.argmax(targets[j:j + 1], 1) label_id = np.argmax(labels[j:j + 1], 1) prev_id = np.argmax( np.reshape(model.model.predict(inputs[j:j + 1]), (data.test_labels[0:1].shape)), 1) adv_id = np.argmax( np.reshape(model.model.predict(adv[j:j + 1]), (data.test_labels[0:1].shape)), 1) suffix = "id{}_seq{}_lbl{}_prev{}_adv{}_{}_l1_{:.3f}_l2_{:.3f}_linf_{:.3f}".format( true_ids[i], target_id, label_id, prev_id, adv_id, adv_id == target_id, np.sum(np.abs(adv[j] - inputs[j])), np.sum((adv[j] - inputs[j])**2)**.5, np.amax(np.abs(adv[j] - inputs[j]))) show( inputs[j:j + 1], str(args['save']) + "/" + str(args['dataset']) + "/" + str(args['attack']) + "/original_{}.png".format(suffix)) show( adv[j:j + 1], str(args['save']) + "/" + str(args['dataset']) + "/" + str(args['attack']) + "/adversarial_{}.png".format(suffix)) if (m != (num_models - 1)): lbl = "Src_" if (num_models > 2): lbl += str(m) + "_" else: lbl = "Tgt_" if (num_targets > 1): print(lbl + 'best_case_L1_mean', np.mean(d_best_l1)) print(lbl + 'best_case_L2_mean', np.mean(d_best_l2)) print(lbl + 'best_case_Linf_mean', np.mean(d_best_linf)) print(lbl + 'best_case_prob', np.mean(r_best)) print(lbl + 'average_case_L1_mean', np.mean(d_average_l1)) print(lbl + 'average_case_L2_mean', np.mean(d_average_l2)) print(lbl + 'average_case_Linf_mean', np.mean(d_average_linf)) print(lbl + 'average_case_prob', np.mean(r_average)) print(lbl + 'worst_case_L1_mean', np.mean(d_worst_l1)) print(lbl + 'worst_case_L2_mean', np.mean(d_worst_l2)) print(lbl + 'worst_case_Linf_mean', np.mean(d_worst_linf)) print(lbl + 'worst_case_prob', np.mean(r_worst)) else: print(lbl + 'L1_mean', np.mean(d_average_l1)) print(lbl + 'L2_mean', np.mean(d_average_l2)) print(lbl + 'Linf_mean', np.mean(d_average_linf)) print(lbl + 'success_prob', np.mean(r_average))
def main(args): with tf.Session() as sess: if args['dataset'] == 'mnist': data, model = MNIST(), MNISTModel("models/mnist", sess) handpick = False inception = False if args['dataset'] == "cifar": data, model = CIFAR(), CIFARModel("models/cifar", sess) handpick = True inception = False if args['dataset'] == "imagenet": data, model = ImageNet(args['seed_imagenet']), InceptionModel(sess) handpick = True inception = True if args['adversarial'] != "none": model = MNISTModel( "models/mnist_cwl2_admm" + str(args['adversarial']), sess) if args['temp'] and args['dataset'] == 'mnist': model = MNISTModel("models/mnist-distilled-" + str(args['temp']), sess) if args['temp'] and args['dataset'] == 'cifar': model = CIFARModel("models/cifar-distilled-" + str(args['temp']), sess) inputs, targets, labels, true_ids = generate_data( data, model, samples=args['numimg'], targeted=True, start=0, inception=inception, handpick=handpick, seed=args['seed']) #print(true_ids) if args['attack'] == 'L2C': attack = CarliniL2(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], binary_search_steps=args['binary_steps'], abort_early=args['abort_early']) if args['attack'] == 'L0A': attack = ADMML0(sess, model, batch_size=args['batch_size'], max_iterations=args['maxiter'], confidence=args['conf'], binary_search_steps=args['iteration_steps'], ro=args['ro'], abort_early=args['abort_early']) timestart = time.time() adv = attack.attack(inputs, targets) timeend = time.time() print("Took", timeend - timestart, "seconds to run", len(inputs), "samples.\n") if args['train']: np.save('labels_train.npy', labels) np.save(str(args['attack']) + '_train.npy', adv) if (args['conf'] != 0): model = MNISTModel("models/mnist-distilled-100", sess) if args['attack'] != 'L0A' and args['attack'] != 'L0AE' and args[ 'attack'] != 'L0C' and args['attack'] != 'L0AE2': l1_l2_li_computation(args, data, model, adv, inception, inputs, targets, labels, true_ids) else: l0_computation(args, data, model, adv, inception, inputs, targets, labels, true_ids)
def run(args, restrict=True): if restrict: # Restrict the visible GPUs to the one for this subprocess id = np.int(multiprocessing.current_process().name.split("-")[1]) os.environ["CUDA_VISIBLE_DEVICES"] = str(id - 1) # Load Parameters dataset = args[0] epsilon = float(args[1]) mode = args[2] K = int(args[3]) fname = dataset + "/" + str(epsilon) + "_" + mode + "_" + str(K) # Configure Keras/Tensorflow Keras.clear_session() config = tf.ConfigProto() config.gpu_options.allow_growth = True set_session(tf.Session(config=config)) sess = Keras.get_session() Keras.set_learning_phase(False) # Fix Random Seeds np.random.seed(1) tf.set_random_seed( 1 ) #Having this before keras.clear_session() causes it it hang for some reason # Load Model/Data and setup SPSA placeholders N = 50 if dataset == "MNIST": # Base Model base_model = MNISTModel("../1-Models/MNIST") data = MNIST() x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) # SPSA shape_spsa = (1, 28, 28, 1) x_spsa = tf.placeholder(tf.float32, shape=shape_spsa) elif dataset == "CIFAR": # Base Model base_model = CIFARModel("../1-Models/CIFAR") data = CIFAR() x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3)) # SPSA shape_spsa = (1, 32, 32, 3) x_spsa = tf.placeholder(tf.float32, shape=shape_spsa) y_spsa = tf.placeholder(tf.int32) # Load the hidden representations of the real and adversarial examples from the training set x_train_real = np.squeeze( np.load("../3-Representation/" + dataset + "/train_" + mode + ".npy")) x_train_adv = np.squeeze( np.load("../3-Representation/" + dataset + "/train_adv_" + mode + ".npy")) n_train = x_train_real.shape[0] n_train_adv = x_train_adv.shape[0] x_train = np.float32(np.vstack((x_train_real, x_train_adv))) #print("Bounds ", np.max(np.abs(x_train))) y_train = np.float32( np.hstack((-1.0 * np.ones(n_train), np.ones(n_train_adv)))) # Create the defended model model_defended = DefendedModel(base_model, x_train, y_train, K) defended_logits = model_defended.get_logits(x) # Configure the attack attack = SPSA(model_defended, back="tf", sess=sess) with tf.name_scope("Attack") as scope: gen = attack.generate(x_spsa, y=y_spsa, epsilon=epsilon, is_targeted=False, num_steps=100, batch_size=2048, early_stop_loss_threshold=-5.0) # Run the attack f = open(fname + ".txt", "w") sample = np.random.choice(data.test_data.shape[0], N, replace=False) x_sample = data.test_data[sample] y_sample = np.argmax(data.test_labels[sample], axis=1) logits_nat = sess.run(defended_logits, {x: x_sample}) f.write("Accuracy on Natural Images: " + str(np.mean(np.argmax(logits_nat, axis=1) == y_sample)) + "\n") pred_adv = -1.0 * np.ones((N)) for i in range(N): x_real = x_sample[i].reshape(shape_spsa) x_adv = sess.run(gen, {x_spsa: x_real, y_spsa: y_sample[i]}) pred_adv[i] = np.argmax(sess.run(defended_logits, {x: x_adv})) f.write("Accuracy on Adversarial Images: " + str(np.mean(pred_adv == y_sample))) f.close()
def train_sub(sess, x, y, bbox_preds, X_sub, Y_sub, nb_classes, nb_epochs_s, batch_size, learning_rate, data_aug, lmbda): """ This function creates the substitute by alternatively augmenting the training data and training the substitute. :param sess: TF session :param x: input TF placeholder :param y: output TF placeholder :param bbox_preds: output of black-box model predictions :param X_sub: initial substitute training data :param Y_sub: initial substitute training labels :param nb_classes: number of output classes :param nb_epochs_s: number of epochs to train substitute model :param batch_size: size of training batches :param learning_rate: learning rate for training :param data_aug: number of times substitute training data is augmented :param lmbda: lambda from arxiv.org/abs/1602.02697 :return: """ # Define TF model graph (for the black-box model) # model_sub = substitute_model() if DATASET == "mnist": model_sub = MNISTModel(use_log = True).model else: model_sub = CIFARModel(use_log = True).model preds_sub = model_sub(x) print("Defined TensorFlow model graph for the substitute.") # Define the Jacobian symbolically using TensorFlow grads = jacobian_graph(preds_sub, x, nb_classes) # Train the substitute and augment dataset alternatively for rho in xrange(data_aug): print("Substitute training epoch #" + str(rho)) train_params = { 'nb_epochs': nb_epochs_s, 'batch_size': batch_size, 'learning_rate': learning_rate } model_train(sess, x, y, preds_sub, X_sub, to_categorical(Y_sub), init_all=False, verbose=False, args=train_params) # If we are not at last substitute training iteration, augment dataset if rho < data_aug - 1: if FLAGS.cached_aug: augs = np.load('sub_saved/{}-aug-{}.npz'.format(DATASET, rho)) X_sub = augs['X_sub'] Y_sub = augs['Y_sub'] else: print("Augmenting substitute training data.") # Perform the Jacobian augmentation X_sub = jacobian_augmentation(sess, x, X_sub, Y_sub, grads, lmbda) print("Labeling substitute training data.") # Label the newly generated synthetic points using the black-box Y_sub = np.hstack([Y_sub, Y_sub]) X_sub_prev = X_sub[int(len(X_sub)/2):] eval_params = {'batch_size': batch_size} bbox_val = batch_eval(sess, [x], [bbox_preds], [X_sub_prev], args=eval_params)[0] # Note here that we take the argmax because the adversary # only has access to the label (not the probabilities) output # by the black-box model Y_sub[int(len(X_sub)/2):] = np.argmax(bbox_val, axis=1) # cache the augmentation if not FLAGS.cached_aug: np.savez('sub_saved/{}-aug-{}.npz'.format(DATASET, rho), X_sub = X_sub, Y_sub = Y_sub) return model_sub, preds_sub
def main(args): with tf.Session() as sess: random.seed(121) np.random.seed(1211) image_id = args['img_id'] arg_max_iter = args['maxiter'] arg_b = args['binary_steps'] arg_init_const = args['init_const'] arg_mode = args['mode'] arg_kappa = args['kappa'] arg_beta = args['beta'] arg_gamma = args['gamma'] AE_model = util.load_AE("mnist_AE_1") data, model = MNIST(), MNISTModel("models/mnist", sess, False) orig_prob, orig_class, orig_prob_str = util.model_prediction( model, np.expand_dims(data.test_data[image_id], axis=0)) target_label = orig_class print("Image:{}, infer label:{}".format(image_id, target_label)) orig_img, target = util.generate_data(data, image_id, target_label) attack = AEADEN(sess, model, mode=arg_mode, AE=AE_model, batch_size=1, kappa=arg_kappa, init_learning_rate=1e-2, binary_search_steps=arg_b, max_iterations=arg_max_iter, initial_const=arg_init_const, beta=arg_beta, gamma=arg_gamma) adv_img = attack.attack(orig_img, target) adv_prob, adv_class, adv_prob_str = util.model_prediction( model, adv_img) delta_prob, delta_class, delta_prob_str = util.model_prediction( model, orig_img - adv_img) INFO = "[INFO]id:{}, kappa:{}, Orig class:{}, Adv class:{}, Delta class: {}, Orig prob:{}, Adv prob:{}, Delta prob:{}".format( image_id, arg_kappa, orig_class, adv_class, delta_class, orig_prob_str, adv_prob_str, delta_prob_str) print(INFO) suffix = "id{}_kappa{}_Orig{}_Adv{}_Delta{}".format( image_id, arg_kappa, orig_class, adv_class, delta_class) arg_save_dir = "{}_ID{}_Gamma_{}".format(arg_mode, image_id, arg_gamma) os.system("mkdir -p Results/{}".format(arg_save_dir)) util.save_img( orig_img, "Results/{}/Orig_original{}.png".format(arg_save_dir, orig_class)) util.save_img(adv_img, "Results/{}/Adv_{}.png".format(arg_save_dir, suffix)) util.save_img( np.absolute(orig_img - adv_img) - 0.5, "Results/{}/Delta_{}.png".format(arg_save_dir, suffix)) sys.stdout.flush()
def run(args, restrict=True): if restrict: # Restrict the visible GPUs to the one for this subprocess id = np.int(multiprocessing.current_process().name.split("-")[1]) os.environ["CUDA_VISIBLE_DEVICES"] = str(id - 1) # Load Parameters dataset = args[0] epsilon = float(args[1]) mode = args[2] K = int(args[3]) bias = float(args[4]) fname = dataset + "/" + str(epsilon) + "_" + mode + "_" + str( K) + "_" + str(bias) # Configure Keras/Tensorflow Keras.clear_session() config = tf.ConfigProto() config.gpu_options.allow_growth = True set_session(tf.Session(config=config)) sess = Keras.get_session() Keras.set_learning_phase(False) # Fix Random Seeds np.random.seed(1) tf.set_random_seed( 1 ) #Having this before keras.clear_session() causes it it hang for some reason # Load Model/Data and setup SPSA placeholders N = 1000 if dataset == "MNIST": # Base Model base_model = MNISTModel("../1-Models/MNIST") data = MNIST() x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) # SPSA shape_spsa = (1, 28, 28, 1) x_spsa = tf.placeholder(tf.float32, shape=shape_spsa) elif dataset == "CIFAR": # Base Model base_model = CIFARModel("../1-Models/CIFAR") data = CIFAR() x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3)) # SPSA shape_spsa = (1, 32, 32, 3) x_spsa = tf.placeholder(tf.float32, shape=shape_spsa) y_spsa = tf.placeholder(tf.int32) # Load the hidden representations of the real and adversarial examples from the training set x_train_real = np.squeeze( np.load("../3-Representation/" + dataset + "/train_" + mode + ".npy")) x_train_adv = np.squeeze( np.load("../3-Representation/" + dataset + "/train_adv_" + mode + ".npy")) n_train = x_train_real.shape[0] n_train_adv = x_train_adv.shape[0] x_train = np.float32(np.vstack((x_train_real, x_train_adv))) #print("Bounds ", np.max(np.abs(x_train))) y_train = np.float32( np.hstack((-1.0 * np.ones(n_train), np.ones(n_train_adv)))) # Create the defended model model_defended = DefendedModel(base_model, x_train, y_train, K, bias=bias) defended_logits = model_defended.get_logits(x) # Get the predictions on the original images labels = np.argmax(data.test_labels[:N], axis=1) logits_real = sess.run(defended_logits, {x: data.test_data[:N]}) fp = (np.argmax(logits_real, axis=1) == 10) #False positives of the defense pred_undefended = np.argmax(np.delete(logits_real, -1, axis=1), axis=1) #Original model prediction # Configure the attack attack = SPSA(model_defended, back="tf", sess=sess) with tf.name_scope("Attack") as scope: gen = attack.generate(x_spsa, y_target=y_spsa, epsilon=epsilon, is_targeted=True, num_steps=100, batch_size=2048, early_stop_loss_threshold=-5.0) # Run the attack pred_adv = -1.0 * np.ones((N, 10)) for i in range(N): if i % 10 == 0: print(fname, " ", i) out = {} out["FP"] = fp out["Labels"] = labels out["UndefendedPrediction"] = pred_undefended out["AdversarialPredictions"] = pred_adv file = open(fname, "wb") pickle.dump(out, file) file.close() x_real = data.test_data[i].reshape(shape_spsa) # Try a targeted attack for each class other than the original network prediction and the adversarial class for y in range(10): if y != pred_undefended[i]: x_adv = sess.run(gen, {x_spsa: x_real, y_spsa: y}) pred_adv[i, y] = np.argmax(sess.run(defended_logits, {x: x_adv})) out = {} out["FP"] = fp out["Labels"] = labels out["UndefendedPrediction"] = pred_undefended out["AdversarialPredictions"] = pred_adv file = open(fname, "wb") pickle.dump(out, file) file.close() analysis(fname)
## Copyright (C) 2016, Nicholas Carlini <*****@*****.**>. ## ## This program is licenced under the BSD 2-Clause licence, ## contained in the LICENCE file in this directory. from setup_cifar import CIFAR, CIFARModel from setup_mnist import MNIST, MNISTModel from setup_inception import ImageNet, InceptionModel import tensorflow as tf import numpy as np BATCH_SIZE = 1 with tf.Session() as sess: data, model = MNIST(), MNISTModel("models/mnist", sess) data, model = CIFAR(), CIFARModel("models/cifar", sess) data, model = ImageNet(), InceptionModel(sess) x = tf.placeholder( tf.float32, (None, model.image_size, model.image_size, model.num_channels)) y = model.predict(x) r = [] for i in range(0, len(data.test_data), BATCH_SIZE): pred = sess.run(y, {x: data.test_data[i:i + BATCH_SIZE]}) #print(pred) #print('real',data.test_labels[i],'pred',np.argmax(pred)) r.append( np.argmax(pred, 1) == np.argmax(data.test_labels[i:i +
def expandImage(image_data): image_data2 = np.array(image_data) image_data2 = (image_data2 + 0.5) * 255 return image_data2 # In[4]: if __name__ == "__main__": config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: modelPath = '%smodels/mnist' % (nn_robust_attack_root) data, model = MNIST(), MNISTModel(modelPath, sess) attack = CarliniLi(sess, model, max_iterations=1000, targeted=False) inputs, targets = generate_data(data, samples=1000, targeted=False, start=5500, inception=False) original_classified_wrong_number = 0 #number of benign samples that are misclassified disturbed_failure_number = 0 #number of samples that failed to craft corresponding adversarial samples test_number = 0 #number of adversarial samples that we generate TTP = 0 TP = 0 FN = 0
def main(args): with tf.Session() as sess: random.seed(SEED) np.random.seed(SEED) tf.set_random_seed(SEED) class_id = args['class_id'] ### input image (natural example) target_id = args[ 'target_id'] ### target images id (adv example) if target attack arg_max_iter = args['maxiter'] ### max number of iterations arg_init_const = args[ 'init_const'] ### regularization prior to attack loss arg_kappa = args['kappa'] ### attack confidence level arg_q = args['q'] ### number of random direction vectors arg_mode = args['mode'] ### algorithm name arg_save_iteration = args['save_iteration'] arg_Dataset = args["dataset"] arg_targeted_attack = args["targeted_attack"] arg_bsz = args["mini_batch_sz"] idx_lr = args["lr_idx"] ## load classofier For MNIST and CIFAR pixel value range is [-0.5,0.5] if (arg_Dataset == 'mnist'): data, model = MNIST(), MNISTModel("models/mnist", sess, True) elif (arg_Dataset == 'cifar10'): data, model = CIFAR(), CIFARModel("models/cifar", sess, True) elif (arg_Dataset == 'imagenet'): data, model = ImageNet_Universal(SEED), InceptionModel(sess, True) #model = InceptionModel(sess, True) else: print('Please specify a valid dataset') #orig_img = np.load('ori_img_backup.npy') orig_img = data.test_data[np.where( np.argmax(data.test_labels, 1) == class_id)] #np.save('ori_img_backup',orig_img) #true_label = data.test_labels[np.where(np.argmax(data.test_labels,1) == class_id)] _, orig_class = util.model_prediction_u( model, orig_img[:30] ) # take 30 or less images to make sure arg_bsz number of them are valid # filter out the images which misclassified already orig_img = orig_img[np.where(orig_class == class_id)] if orig_img.shape[0] < arg_bsz: assert 'no enough valid inputs' orig_img = orig_img[:arg_bsz] np.save('original_imgsID' + str(class_id), orig_img) #true_label = np.zeros((arg_bsz, 1001)) #true_label[np.arange(arg_bsz), class_id] = 1 true_label = class_id if arg_targeted_attack: ### target attack #target_label = np.zeros((arg_bsz, 1001)) #target_label[np.arange(arg_bsz), target_id] = 1 target_label = target_id else: target_label = true_label #orig_img, target = util.generate_data(data, class_id, target_label) # shape of orig_img is (1,28,28,1) in [-0.5, 0.5] ## parameter if orig_img.ndim == 3 or orig_img.shape[0] == 1: d = orig_img.size # feature dim else: d = orig_img[0].size print("dimension = ", d) # mu=1/d**2 # smoothing parameter q = arg_q + 0 I = arg_max_iter + 0 kappa = arg_kappa + 0 const = arg_init_const + 0 ## flatten image to vec orig_img_vec = np.resize(orig_img, (arg_bsz, d)) ## w adv image initialization if args["constraint"] == 'uncons': # * 0.999999 to avoid +-0.5 return +-infinity w_ori_img_vec = np.arctanh( 2 * (orig_img_vec) * 0.999999 ) # in real value, note that orig_img_vec in [-0.5, 0.5] w_img_vec = w_ori_img_vec.copy() else: w_ori_img_vec = orig_img_vec.copy() w_img_vec = w_ori_img_vec.copy() # ## test ## # for test_value in w_ori_img_vec[0, :]: # if np.isnan(test_value) or np.isinf(test_value): # print(test_value) delta_adv = np.zeros((1, d)) ### initialized adv. perturbation # initialize the best solution & best loss best_adv_img = [] # successful adv image in [-0.5, 0.5] best_delta = [] # best perturbation best_distortion = (0.5 * d)**2 # threshold for best perturbation total_loss = np.zeros(I) ## I: max iters l2s_loss_all = np.zeros(I) attack_flag = False first_flag = True ## record first successful attack # parameter setting for ZO gradient estimation mu = args["mu"] ### smoothing parameter ## learning rate base_lr = args["lr"] if arg_mode == "ZOAdaMM": ## parameter initialization for AdaMM v_init = 1e-7 #0.00001 v_hat = v_init * np.ones((1, d)) v = v_init * np.ones((1, d)) m = np.zeros((1, d)) # momentum parameter for first and second order moment beta_1 = 0.9 beta_2 = 0.3 # only used by AMSGrad print(beta_1, beta_2) #for i in tqdm(range(I)): for i in range(I): if args["decay_lr"]: base_lr = args["lr"] / np.sqrt(i + 1) ## Total loss evaluation if args["constraint"] == 'uncons': total_loss[i], l2s_loss_all[i] = function_evaluation_uncons( w_img_vec, kappa, target_label, const, model, orig_img, arg_targeted_attack) else: total_loss[i], l2s_loss_all[i] = function_evaluation_cons( w_img_vec, kappa, target_label, const, model, orig_img, arg_targeted_attack) ## gradient estimation w.r.t. w_img_vec if arg_mode == "ZOSCD": grad_est = grad_coord_estimation(mu, q, w_img_vec, d, kappa, target_label, const, model, orig_img, arg_targeted_attack, args["constraint"]) elif arg_mode == "ZONES": grad_est = gradient_estimation_NES(mu, q, w_img_vec, d, kappa, target_label, const, model, orig_img, arg_targeted_attack, args["constraint"]) else: grad_est = gradient_estimation_v2(mu, q, w_img_vec, d, kappa, target_label, const, model, orig_img, arg_targeted_attack, args["constraint"]) # if np.remainder(i,50)==0: # print("total loss:",total_loss[i]) # print(np.linalg.norm(grad_est, np.inf)) ## ZO-Attack, unconstrained optimization formulation if arg_mode == "ZOSGD": delta_adv = delta_adv - base_lr * grad_est if arg_mode == "ZOsignSGD": delta_adv = delta_adv - base_lr * np.sign(grad_est) if arg_mode == "ZOSCD": delta_adv = delta_adv - base_lr * grad_est if arg_mode == "ZOAdaMM": m = beta_1 * m + (1 - beta_1) * grad_est v = beta_2 * v + (1 - beta_2) * np.square(grad_est) ### vt #print(np.mean(np.abs(m)),np.mean(np.sqrt(v))) v_hat = np.maximum(v_hat, v) delta_adv = delta_adv - base_lr * m / np.sqrt(v) if args["constraint"] == 'cons': tmp = delta_adv.copy() #X_temp = orig_img_vec.reshape((-1,1)) #V_temp2 = np.diag(np.sqrt(v_hat.reshape(-1)+1e-10)) V_temp = np.sqrt(v_hat.reshape(1, -1)) delta_adv = projection_box(tmp, orig_img_vec, V_temp, -0.5, 0.5) #delta_adv2 = projection_box_2(tmp, X_temp, V_temp2, -0.5, 0.5) # v_init = 1e-2 #0.00001 # v = v_init * np.ones((1, d)) # m = np.zeros((1, d)) # # momentum parameter for first and second order moment # beta_1 = 0.9 # beta_2 = 0.99 # only used by AMSGrad # m = beta_1 * m + (1-beta_1) * grad_est # v = np.maximum(beta_2 * v + (1-beta_2) * np.square(grad_est),v) # delta_adv = delta_adv - base_lr * m /np.sqrt(v+1e-10) # if args["constraint"] == 'cons': # V_temp = np.diag(np.sqrt(v.reshape(-1)+1e-10)) # X_temp = orig_img_vec.reshape((-1,1)) # delta_adv = projection_box(delta_adv, X_temp, V_temp, -0.5, 0.5) if arg_mode == "ZOSMD": delta_adv = delta_adv - 0.5 * base_lr * grad_est # delta_adv = delta_adv - base_lr* grad_est if args["constraint"] == 'cons': #V_temp = np.eye(orig_img_vec.size) V_temp = np.ones((1, d)) #X_temp = orig_img_vec.reshape((-1,1)) delta_adv = projection_box(delta_adv, orig_img_vec, V_temp, -0.5, 0.5) if arg_mode == "ZOPSGD": delta_adv = delta_adv - base_lr * grad_est if args["constraint"] == 'cons': #V_temp = np.eye(orig_img_vec.size) V_temp = np.ones((1, d)) #X_temp = orig_img_vec.reshape((-1,1)) delta_adv = projection_box(delta_adv, orig_img_vec, V_temp, -0.5, 0.5) if arg_mode == "ZONES": delta_adv = delta_adv - base_lr * np.sign(grad_est) if args["constraint"] == 'cons': #V_temp = np.eye(orig_img_vec.size) V_temp = np.ones((1, d)) #X = orig_img_vec.reshape((-1,1)) delta_adv = projection_box(delta_adv, orig_img_vec, V_temp, -0.5, 0.5) # if arg_mode == "ZO-AdaFom": # m = beta_1 * m + (1-beta_1) * grad_est # v = v* (float(i)/(i+1)) + np.square(grad_est)/(i+1) # w_img_vec = w_img_vec - base_lr * m/np.sqrt(v) ## ### adv. example update w_img_vec = w_ori_img_vec + delta_adv ## covert back to adv_img in [-0.5 , 0.5] if args["constraint"] == 'uncons': adv_img_vec = 0.5 * np.tanh((w_img_vec)) / 0.999999 # else: adv_img_vec = w_img_vec.copy() adv_img = np.resize(adv_img_vec, orig_img.shape) ## update the best solution in the iterations attack_prob, _, _ = util.model_prediction(model, adv_img) target_prob = attack_prob[:, target_label] attack_prob_tmp = attack_prob.copy() attack_prob_tmp[:, target_label] = 0 other_prob = np.amax(attack_prob_tmp, 1) if i % 1000 == 0 and i != 0: if arg_mode == "ZOAdaMM": print(beta_1, beta_2) print("save delta_adv") np.save( 'retimgs/' + str(i) + 'itrs' + str(np.argmax(attack_prob, 1)) + arg_mode + str(args["lr"]), delta_adv) if args["print_iteration"]: if np.remainder(i + 1, 20) == 0: if (true_label != np.argmax(attack_prob, 1)).all(): print( "Iter %d (Succ): ID = %d, lr = %3.7f, decay = %d, ZO = %s %s, loss = %3.5f, l2sdist = %3.5f, TL = %d, PL = %s" % (i + 1, class_id, args["lr"], int(args["decay_lr"]), arg_mode, args["constraint"], total_loss[i], l2s_loss_all[i], true_label, np.argmax(attack_prob, 1))) else: sr = np.sum( true_label != np.argmax(attack_prob, 1)) / arg_bsz print( "Iter %d (Fail): ID = %d, lr = %3.7f, decay = %d, ZO = %s %s, loss = %3.5f, l2sdist = %3.5f, TL = %d, PL = %s, succ rate = %.2f" % (i + 1, class_id, args["lr"], int(args["decay_lr"]), arg_mode, args["constraint"], total_loss[i], l2s_loss_all[i], true_label, np.argmax(attack_prob, 1), sr)) if arg_save_iteration: os.system("mkdir Examples") if (np.logical_or( true_label != np.argmax(attack_prob, 1), np.remainder(i + 1, 10) == 0)): ## every 10 iterations suffix = "id_{}_Mode_{}_True_{}_Pred_{}_Ite_{}".format( class_id, arg_mode, true_label, np.argmax(attack_prob, 1), i + 1) # util.save_img(adv_img, "Examples/{}.png".format(suffix)) if arg_targeted_attack: if ((np.log(target_prob + 1e-10) - np.log(other_prob + 1e-10)) >= kappa).all(): # check attack confidence if (distortion(adv_img, orig_img) < best_distortion): # check distortion # print('best distortion obtained at',i,'-th iteration') best_adv_img = adv_img best_distortion = distortion(adv_img, orig_img) #best_delta = adv_img - orig_img best_iteration = i + 1 adv_class = np.argmax(attack_prob, 1) attack_flag = True ## Record first attack if (first_flag): first_flag = False ### once gets into this, it will no longer record the next sucessful attack first_adv_img = adv_img first_distortion = distortion(adv_img, orig_img) #first_delta = adv_img - orig_img first_class = adv_class first_iteration = i + 1 else: if ((np.log(other_prob + 1e-10) - np.log(target_prob + 1e-10)) >= kappa).all(): # check attack confidence if (distortion(adv_img, orig_img) < best_distortion): # check distortion # print('best distortion obtained at',i,'-th iteration') best_adv_img = adv_img best_distortion = distortion(adv_img, orig_img) #best_delta = adv_img - orig_img best_iteration = i + 1 adv_class = np.argmax(attack_prob, 1) attack_flag = True ## Record first attack if (first_flag): first_flag = False first_adv_img = adv_img first_distortion = distortion(adv_img, orig_img) #first_delta = adv_img - orig_img first_class = adv_class first_iteration = i + 1 if (attack_flag): # os.system("mkdir Results_SL") # ## best attack (final attack) # suffix = "id_{}_Mode_{}_True_{}_Pred_{}".format(class_id, arg_mode, true_label, orig_class) ## orig_class, predicted label # suffix2 = "id_{}_Mode_{}_True_{}_Pred_{}".format(class_id, arg_mode, true_label, adv_class) # suffix3 = "id_{}_Mode_{}".format(class_id, arg_mode) # ### save original image # util.save_img(orig_img, "Results_SL/id_{}.png".format(class_id)) # util.save_img(orig_img, "Results_SL/{}_Orig.png".format(suffix)) # ### adv. image # util.save_img(best_adv_img, "Results_SL/{}_Adv_best.png".format(suffix2)) # ### adv. perturbation # util.save_img(best_delta, "Results_SL/{}_Delta_best.png".format(suffix3)) # # # ## first attack # suffix4 = "id_{}_Mode_{}_True_{}_Pred_{}".format(class_id, arg_mode, true_label, first_class) # ## first adv. imag # util.save_img(first_adv_img, "Results_SL/{}_Adv_first.png".format(suffix4)) # ### first adv. perturbation # util.save_img(first_delta, "Results_SL/{}_Delta_first.png".format(suffix3)) ## save data suffix0 = "id_{}_Mode_{}_{}_lr_{}_decay_{}_case{}_ini_{}".format( class_id, arg_mode, args["constraint"], str(args["lr"]), int(args["decay_lr"]), args["exp_code"], args["init_const"]) np.savez("{}".format(suffix0), id=class_id, mode=arg_mode, loss=total_loss, perturbation=l2s_loss_all, best_distortion=best_distortion, first_distortion=first_distortion, first_iteration=first_iteration, best_iteation=best_iteration, learn_rate=args["lr"], decay_lr=args["decay_lr"], attack_flag=attack_flag) ## print print("It takes {} iteations to find the first attack".format( first_iteration)) # print(total_loss) else: ## save data suffix0 = "id_{}_Mode_{}_{}_lr_{}_decay_{}_case{}_ini_{}".format( class_id, arg_mode, args["constraint"], str(args["lr"]), int(args["decay_lr"]), args["exp_code"], args["init_const"]) np.savez("{}".format(suffix0), id=class_id, mode=arg_mode, loss=total_loss, perturbation=l2s_loss_all, best_distortion=best_distortion, learn_rate=args["lr"], decay_lr=args["decay_lr"], attack_flag=attack_flag) print("Attack Fails") sys.stdout.flush()
#print(h%10) #print(sess.run(model.predict(new_img)[h])) #print(np.argmax(label_batch,1)[h]) #print(loss_instant[h]) #print(np.argmax(sess.run(model.predict(tf.tanh(img)*0.5+0.5))[h])) #print(sess.run(attack_pixel[h])) #label_acc.append(sum(np.argmax(sess.run(model.predict(new_img)),1)==np.argmax(label_batch,1))/90) #label_disloss.append(sum(lowest_dist[lowest_dist<1000])/len(lowest_dist<1000)) #print(lowest_dist) #print(label_disloss) #print(label_acc) #print(label_disloss) print(sum(const_c) / 90, min(const_c), max(const_c)) #load data train_data, train_label, test_data, test_label = load_data() data_batch, label_batch = batch_loader(train_data, train_label) #load model with tf.Session() as sess: model = MNISTModel('models/mnist', sess) original_img = np.arctanh((data_batch - 0.5) * 2 * 0.999) print(attack2(data_batch, label_batch, 90, model, sess))