def main(src_model_name, eps): np.random.seed(0) tf.set_random_seed(0) config = tf.ConfigProto() config.gpu_options.allow_growth = True K.set_session(tf.Session(config=config)) if args.dataset == "mnist": K.set_image_data_format('channels_last') set_mnist_flags() x = K.placeholder( (None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS)) y = K.placeholder((None, FLAGS.NUM_CLASSES)) _, _, X_test, Y_test = data_mnist() # source model for crafting adversarial examples src_model = load_model_mnist(src_model_name) elif args.dataset == "cifar10": set_flags(20) K.set_image_data_format('channels_first') x = K.placeholder( (None, FLAGS.NUM_CHANNELS, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS)) y = K.placeholder((None, FLAGS.NUM_CLASSES)) _, _, X_test, Y_test = load_data() # source model for crafting adversarial examples src_model = load_model(src_model_name) logits = src_model(x) for sd in [0.4, 0.5, 0.6]: one_hot = np.zeros(shape=(len(Y_test), 10)) for i in range(100): logits_np = batch_eval([x, y], [logits], [ X_test + np.random.normal(scale=sd, size=X_test.shape), Y_test ])[0] one_hot[np.arange(len(Y_test)), logits_np.argmax(axis=1)] += 1 robust = np.apply_along_axis(func1d=isRobust, axis=1, arr=one_hot, sd=sd, epsilon=eps) total_robust = np.sum( np.logical_and(robust[:, 0] == True, one_hot.argmax(axis=1) == Y_test.argmax(axis=1))) / 100. accuracy = np.sum(one_hot.argmax(axis=1) == Y_test.argmax( axis=1)) / 100. with open('bound_' + src_model_name + '_bound.txt', 'a') as log: log.write("Ave bound is {} at sigma = {}\n".format( np.mean(robust[:, 1]), sd)) log.write("Accuracy: {}, Robust accuracy: {}, l={}\n".format( accuracy, total_robust, eps))
def main(attack, src_model_name, target_model_names): np.random.seed(0) tf.set_random_seed(0) config = tf.ConfigProto() config.gpu_options.allow_growth = True K.set_session(tf.Session(config=config)) if args.dataset == "mnist": K.set_image_data_format('channels_last') set_mnist_flags() x = K.placeholder( (None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS)) y = K.placeholder((None, FLAGS.NUM_CLASSES)) _, _, X_test, Y_test = data_mnist() # source model for crafting adversarial examples src_model = load_model_mnist(src_model_name) sd = 0.7 elif args.dataset == "cifar10": set_flags(20) K.set_image_data_format('channels_first') x = K.placeholder( (None, FLAGS.NUM_CHANNELS, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS)) y = K.placeholder((None, FLAGS.NUM_CLASSES)) _, _, X_test, Y_test = load_data() # source model for crafting adversarial examples src_model = load_model(src_model_name) sd = 100. / 255. # model(s) to target target_models = [None] * len(target_model_names) for i in range(len(target_model_names)): target_models[i] = load_model(target_model_names[i]) # simply compute test error if attack == "test": acc = tf_test_acc(src_model, x, X_test, Y_test) print('{}: {:.1f}'.format(basename(src_model_name), acc)) for (name, target_model) in zip(target_model_names, target_models): acc = tf_test_acc(target_model, x, X_test, Y_test) print('{}: {:.1f}'.format(basename(name), acc)) return eps = args.eps # take the random step in the RAND+FGSM if attack == "rfgs": X_test = np.clip( X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0, 1.0) eps -= args.alpha logits = src_model(x) grad = gen_grad(x, logits, y) # FGSM and RAND+FGSM one-shot attack if attack in ["fgs", "rfgs"]: adv_x = symbolic_fgs(x, grad, eps=eps) # iterative FGSM if attack == "pgd": adv_x = iter_fgs(src_model, x, y, steps=args.steps, eps=eps, alpha=eps / 10.0) if attack == 'so': adv_x = so(src_model, x, y, steps=args.steps, eps=eps, alpha=eps / 10.0, norm=args.norm, sd=sd) print('start') # compute the adversarial examples and evaluate X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0] # pdb.set_trace() print('-----done----') # white-box attack acc = tf_test_acc(src_model, x, X_adv, Y_test, sd=sd, num_iter=10) with open('attacks.txt', 'a') as log: log.write('{}->{}: {:.1f}, size = {:.4f}\n'.format( basename(src_model_name), basename(src_model_name), acc, eps)) # black-box attack for (name, target_model) in zip(target_model_names, target_models): acc = tf_test_acc(target_model, x, X_adv, Y_test, sd=sd, num_iter=10) with open('attacks.txt', 'a') as log: log.write('{}->{}: {:.1f}, size = {:.4f}\n'.format( basename(src_model_name), basename(name), acc, eps))
def train(): """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) # Get images and labels for CIFAR-10. images, labels = cifar10.distorted_inputs() if FLAGS.vgg16: print('Using Vgg16 to extract image features.') print('Feeze model: %s' % (not FLAGS.freeze)) logits = cifar10.inference_vgg(images, trainable=not FLAGS.freeze) else: print('Using ConV to extract image features.') # Build a Graph that computes the logits predictions from the # inference model. logits = cifar10.inference(images) # Calculate loss. loss = cifar10.loss(logits, labels) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = cifar10.train(loss, global_step) # Create a saver. saver = tf.train.Saver(tf.all_variables()) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. config = tf.ConfigProto(log_device_placement=FLAGS.log_device_placement) # 制定显存大小 config.gpu_options.per_process_gpu_memory_fraction = 0.5 # 根据需要自动申请 # config.gpu_options.allow_growth = True sess = tf.Session(config=config) sess.run(init) # Start the queue runners. tf.train.start_queue_runners(sess=sess) summary_writer = tf.summary.FileWriter(FLAGS.train_dir, graph_def=sess.graph_def) if FLAGS.vgg16 and FLAGS.pretrained: # load VGG model.npz print('Transfer learning: load VGG16 model...') cifar10.load_model(sess) else: print('Deep learning...') pass for step in xrange(FLAGS.max_steps): start_time = time.time() _, loss_value = sess.run([train_op, loss]) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % 10 == 0: num_examples_per_step = FLAGS.batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch)) if step % 100 == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) # Save the model checkpoint periodically. if step % 1000 == 0 or (step + 1) == FLAGS.max_steps: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
parser.add_argument('--test-images', default=False, action='store_true') parser.add_argument('--incorrect', default=False, action='store_true') parser.add_argument('--shuffle', type=bool, default=True) args = parser.parse_args() images, labels, data_size = load_test_data( ) if args.test_images else load_eval_data() data_gen = create_data_gen(images) data_gen_input = data_gen.flow(images, labels, batch_size=data_size, shuffle=False) prediction_images = next(data_gen_input)[0] model = load_model(args.model_path) probabilities_list = model.predict(prediction_images).tolist() if args.shuffle: shuffled_indices = list(range(len(images))) random.shuffle(shuffled_indices) images = [images[i] for i in shuffled_indices] labels = [labels[i] for i in shuffled_indices] probabilities_list = [probabilities_list[i] for i in shuffled_indices] predictions = [ probabilities.index(max(probabilities)) for probabilities in probabilities_list ]
def main(attack, src_model_name, target_model_names): np.random.seed(0) tf.set_random_seed(0) set_flags(20) config = tf.ConfigProto() config.gpu_options.allow_growth = True K.set_session(tf.Session(config=config)) x = K.placeholder( (None, FLAGS.NUM_CHANNELS, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS)) y = K.placeholder((None, FLAGS.NUM_CLASSES)) _, _, X_test, Y_test = load_data() # source model for crafting adversarial examples src_model = load_model(src_model_name) # model(s) to target target_models = [None] * len(target_model_names) for i in range(len(target_model_names)): target_models[i] = load_model(target_model_names[i]) # simply compute test error if attack == "test": err = tf_test_error_rate(src_model, x, X_test, Y_test) print('{}: {:.1f}'.format(basename(src_model_name), 100 - err)) for (name, target_model) in zip(target_model_names, target_models): err = tf_test_error_rate(target_model, x, X_test, Y_test) print('{}: {:.1f}'.format(basename(name), 100 - err)) return eps = args.eps # take the random step in the RAND+FGSM if attack == "rfgs": X_test = np.clip( X_test + args.alpha * np.sign(np.random.randn(*X_test.shape)), 0.0, 1.0) eps -= args.alpha logits = src_model(x) grad = gen_grad(x, logits, y) # FGSM and RAND+FGSM one-shot attack if attack in ["fgs", "rfgs"]: adv_x = symbolic_fgs(x, grad, eps=eps) # iterative FGSM if attack == "pgd": adv_x = iter_fgs(src_model, x, y, steps=args.steps, eps=args.eps, alpha=args.eps / 10.0) if attack == 'mim': adv_x = momentum_fgs(src_model, x, y, eps=args.eps) print('start') # compute the adversarial examples and evaluate X_adv = batch_eval([x, y], [adv_x], [X_test, Y_test])[0] print('-----done----') # white-box attack err = tf_test_error_rate(src_model, x, X_adv, Y_test) print('{}->{}: {:.1f}'.format(basename(src_model_name), basename(src_model_name), 100 - err)) # black-box attack for (name, target_model) in zip(target_model_names, target_models): err = tf_test_error_rate(target_model, x, X_adv, Y_test) print('{}->{}: {:.1f}'.format(basename(src_model_name), basename(name), 100 - err))