def main(): parser = argparse.ArgumentParser() parser.add_argument('-c', '--continue', dest='continue_path', required=False) parser.add_argument('-l', '--loss', default='softmax') args = parser.parse_args() ## load dataset train_batch_gnr, train_set = get_dataset_batch(ds_name='train') data = tf.placeholder(tf.float32, shape=(None,) + config.image_shape + (config.nr_channel,), name='data') label = tf.placeholder(tf.int32, shape=(None,), name='label') # placeholder for targetted label gt = tf.placeholder(tf.int32, shape=(None,), name='gt') pre_noise = tf.Variable( tf.zeros((config.minibatch_size, config.image_shape[0], config.image_shape[1], config.nr_channel), dtype=tf.float32)) model = Model() attack = Attack(model, config.minibatch_size) acc, loss, adv,x = attack.generate_graph(pre_noise, data, gt, label) acc_gt = attack.evaluate(data, gt) placeholders = { 'data': data, 'label': label, 'gt': gt, } lr = 1e-2 opt = tf.train.AdamOptimizer(lr) grads = opt.compute_gradients(loss, [pre_noise]) train = opt.apply_gradients(grads) ## init tensorboard tf.summary.scalar('loss', loss) merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter(os.path.join(config.log_dir, 'tf_log', 'train'), tf.get_default_graph()) ## create a session tf.set_random_seed(12345) # ensure consistent results global_cnt = 0 epoch_start = 0 succ = 0 noise_l2 = 0 with tf.Session() as sess: for idx in range(train_set.minibatches): global_cnt = 0 sess.run(tf.global_variables_initializer()) # init all variables images, labels = sess.run(train_batch_gnr) for epoch in range(epoch_start + 1, config.nr_epoch + 1): global_cnt += 1 feed_dict = { placeholders['data']: images, placeholders['label']: labels, placeholders['gt']: labels, } _, accuracy, loss_batch, adv_examples,ori_image, summary = sess.run([train, acc, loss, adv,x, merged], feed_dict=feed_dict) if global_cnt % config.show_interval == 0: train_writer.add_summary(summary, global_cnt) print( "e:{}/{}, {}".format(idx, train_set.minibatches, epoch), 'loss: {:.3f}'.format(loss_batch), 'acc: {:3f}'.format(accuracy), ) print('Training for batch {} is done'.format(idx)) cv2.imwrite('../../ori_image/'+'{}.png'.format(idx), ori_image.astype('uint8').reshape(32,32,3)) cv2.imwrite('../../adv_image/'+'{}.png'.format(idx), adv_examples.astype('uint8').reshape(32,32,3)) cv2.imwrite('../../diff_image/'+'{}.png'.format(idx), abs(ori_image-adv_examples).astype('uint8').reshape(32,32,3)) accuracy_gt = acc_gt.eval(feed_dict={placeholders['data']: adv_examples, placeholders['gt']: labels}) succ = (idx * succ + 1 - accuracy_gt) / ( idx + 1) # compute success rate of generating adversarial examples that can be misclassified noise_l2 = (idx * (noise_l2) + ((adv_examples - images)) ** 2) / ( idx + 1) # compute l2 difference between adversarial examples and origina images print('Success rate of this attack is {}'.format(succ)) print('Noise norm of this attack is {}'.format(np.mean(noise_l2))) embed()
def main(): ## load dataset global adv_examples train_batch_gnr, train_set = get_dataset_batch(ds_name='train') data = tf.placeholder(tf.float32, shape=(config.minibatch_size,) + config.image_shape, name='data') label = tf.placeholder(tf.int32, shape=(None,), name='label') # placeholder for targetted label groundTruth = tf.placeholder(tf.int32, shape=(None,), name='groundTruth') # 真实的样本标签 pre_noise = tf.Variable(tf.zeros([1, 32, 32, 3], dtype=tf.float32)) vgg16 = Vgg16() attack = Attack(vgg16, config.minibatch_size) # 初始化一个攻击模型对象 acc, loss, adv = attack.generate_graph(pre_noise, data, groundTruth, label) acc_gt, preds = attack.evaluate(data, groundTruth) placeholders = { 'data': data, 'label': label, 'groundTruth': groundTruth, } lr = 0.01 opt = tf.train.AdamOptimizer(lr) grads = opt.compute_gradients(loss, [pre_noise]) # 优化目标是noise train = opt.apply_gradients(grads) ## create a session tf.set_random_seed(12345) # ensure consistent results succ = 0 noise_l2 = 0 target = np.array([7]) # horse with tf.Session(config=configTf) as sess: # print(train_set.minibatches) sess.run(tf.global_variables_initializer()) # init all variables for idx in range(train_set.minibatches): global_cnt = 0 images, labels = sess.run(train_batch_gnr) for epoch in range(1, config.nr_epoch + 1): global_cnt += 1 feed_dict = { placeholders['data']: images, placeholders['label']: target, placeholders['groundTruth']: labels, } "target attack should convert all the pics to horse" _, accuracy, loss_batch, adv_examples = sess.run([train, acc, loss, adv], feed_dict=feed_dict) if global_cnt % config.show_interval == 0: # 10 # train_writer.add_summary(summary, global_cnt) print( "e:{}/{}, {}".format(idx, train_set.minibatches, epoch), 'loss: {:.3f}'.format(loss_batch), 'accuracy: {:3f}'.format(accuracy), ) # outPath = './out/' + '{}_{}.jpg'.format(idx, global_cnt) # print(outPath) # output_img(pre_noise, sess, outPath) print('Training for batch {} is done'.format(idx)) output_img(x=adv_examples, out_path='./out/adv_examples/{}.png'.format(idx)) # 输出对抗样本 accuracy_gt, predAdv = sess.run([acc_gt, preds], feed_dict={placeholders['data']: adv_examples, placeholders['label']: target}) # 计算对抗样本精确率 succ = (idx * succ + 1 - accuracy_gt) / (idx + 1) noise_l2 = (idx * (noise_l2) + ((adv_examples - images)) ** 2) / (idx + 1) print("====> label for adv is :", predAdv) # 希望是 9 = truck print('Success rate of this attack is {}'.format(succ)) print('Noise norm of this attack is {}'.format(np.mean(noise_l2)))
def main(): parser = argparse.ArgumentParser() parser.add_argument( "--nr_epoch", type=int, default=500, help= "you may need to increase nr_epoch to 4000 or more for targeted adversarial attacks" ) parser.add_argument( "--alpha", type=float, default=0, help="coefficient of either cross entropy loss or C&W attack loss") parser.add_argument("--beta", type=float, default=0, help="coefficient of lasso regularization") parser.add_argument("--gamma", type=float, default=0, help="coefficient of ridge regularization") parser.add_argument("--CW_kappa", type=float, default=0, help="hyperparameter for C&W attack loss") parser.add_argument("--use_cross_entropy_loss", action='store_true') parser.add_argument("--targeted_attack", action='store_true') args = parser.parse_args() # load dataset train_batch_gnr, train_set = get_dataset_batch(ds_name='train') data = tf.placeholder(tf.float32, shape=(None, ) + config.image_shape + (config.nr_channel, ), name='data') label = tf.placeholder(tf.int32, shape=(None, ), name='label') # placeholder for targeted label gt = tf.placeholder(tf.int32, shape=(None, ), name='gt') pre_noise = tf.Variable( tf.zeros((config.minibatch_size, config.image_shape[0], config.image_shape[1], config.nr_channel), dtype=tf.float32)) model = Model() attack = Attack(model, config.minibatch_size, args.alpha, args.beta, args.gamma, args.CW_kappa, args.use_cross_entropy_loss) target = label if args.targeted_attack else None acc, loss, adv = attack.generate_graph(pre_noise, data, gt, target) acc_gt = attack.evaluate(data, gt) placeholders = { 'data': data, 'label': label, 'gt': gt, } lr = 1e-2 opt = tf.train.AdamOptimizer(lr) grads = opt.compute_gradients(loss, [pre_noise]) train = opt.apply_gradients(grads) # init tensorboard tf.summary.scalar('loss', loss) merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter( os.path.join(config.log_dir, 'tf_log', 'train'), tf.get_default_graph()) # create a session tf.set_random_seed(12345) # ensure consistent results succ = 0 noise_l1 = 0 noise_l2 = 0 noise_l_inf = 0 tot = 0 with tf.Session() as sess: assert train_set.minibatch_size == 1 for idx in range(train_set.minibatches): global_cnt = 0 sess.run(tf.global_variables_initializer()) # init all variables images, gt = sess.run(train_batch_gnr) if acc_gt.eval(feed_dict={ placeholders['data']: images, placeholders['gt']: gt }) < 0.5: continue else: tot += 1 if args.targeted_attack: labels = (gt + 5) if labels >= 10: labels -= config.nr_class else: labels = gt min_distortion = np.inf min_l1 = np.inf min_l2 = np.inf min_linf = np.inf for epoch in range(1, args.nr_epoch + 1): global_cnt += 1 feed_dict = { placeholders['data']: images, placeholders['label']: labels, placeholders['gt']: gt, } _, accuracy, loss_batch, adv_examples, summary = sess.run( [train, acc, loss, adv, merged], feed_dict=feed_dict) if global_cnt % config.show_interval == 0: train_writer.add_summary(summary, global_cnt) print( "e:{}/{}, {}".format(idx, train_set.minibatches, epoch), 'loss: {:.3f}'.format(loss_batch), 'acc: {:3f}'.format(accuracy), ) successful = acc_gt.eval(feed_dict={ placeholders['data']: adv_examples, placeholders['gt']: labels }) > 0.5 if args.targeted_attack else acc_gt.eval( feed_dict={ placeholders['data']: adv_examples, placeholders['gt']: gt }) < 0.5 if successful: l1 = np.sum(np.abs((adv_examples - images) / 255)) l2_square = np.sum(((adv_examples - images) / 255)**2) distortion = args.beta * l1 + args.gamma * l2_square if distortion < min_distortion: min_distortion = distortion min_l1 = min(min_l1, l1) min_l2 = min(min_l2, np.sqrt(l2_square)) min_linf = min(min_linf, np.max((adv_examples - images) / 255)) print('Training for batch {} is done'.format(idx)) sys.stdout.flush() if min_distortion != np.inf: succ += 1 noise_l1 += min_l1 noise_l2 += min_l2 noise_l_inf += min_linf with open(save_result_dir, "a+") as f: for attr in dir(args): if not attr.startswith("_"): print("{}: {}".format(attr, getattr(args, attr))) f.write("{}: {}\n".format(attr, getattr(args, attr))) print('Success rate: {}'.format(succ / tot)) print('Noise l1-norm: {}'.format(noise_l1 / tot)) print('Noise l2-norm: {}'.format(noise_l2 / tot)) print('Noise l-inf: {}'.format(noise_l_inf / tot)) f.write('Success rate: {}\n'.format(succ / tot)) f.write('Noise l1-norm: {}\n'.format(noise_l1 / tot)) f.write('Noise l2-norm: {}\n'.format(noise_l2 / tot)) f.write('Noise l-inf: {}\n\n'.format(noise_l_inf / tot)) sys.stdout.flush()
def main(): parser = argparse.ArgumentParser() parser.add_argument('-c', '--continue', dest='continue_path', required=False) parser.add_argument('-l', '--loss', default='softmax') args = parser.parse_args() ## load dataset train_batch_gnr, train_set = get_dataset_batch(ds_name='train') data = tf.placeholder(tf.float32, shape=(None, ) + config.image_shape + (config.nr_channel, ), name='data') label = tf.placeholder(tf.int32, shape=(None, ), name='label') # placeholder for targetted label gt = tf.placeholder(tf.int32, shape=(None, ), name='gt') pre_noise = tf.Variable( tf.zeros((config.minibatch_size, config.image_shape[0], config.image_shape[1], config.nr_channel), dtype=tf.float32)) model = Model() attack = Attack(model, config.minibatch_size) acc, loss, adv, x, logits, noise = attack.generate_graph( pre_noise, data, gt, label) acc_gt = attack.evaluate(data, gt) placeholders = { 'data': data, 'label': label, 'gt': gt, } lr = 1e-2 opt = tf.train.AdamOptimizer(lr) grads = opt.compute_gradients(loss, [pre_noise]) train = opt.apply_gradients(grads) ## create a session tf.set_random_seed(12345) # ensure consistent results global_cnt = 0 epoch_start = 0 succ = 0 noise_l2 = 0 ta_succ = 0 with tf.Session() as sess: for idx in range(train_set.minibatches): global_cnt = 0 sess.run(tf.global_variables_initializer()) # init all variables images, labels, target = sess.run(train_batch_gnr) for epoch in range(epoch_start + 1, config.nr_epoch + 1): global_cnt += 1 feed_dict = { placeholders['data']: images, placeholders['label']: target, placeholders['gt']: labels, } _, accuracy, loss_batch, adv_examples, ori_image, a, nnoise = sess.run( [train, acc, loss, adv, x, logits, noise], feed_dict=feed_dict) if global_cnt % config.show_interval == 0: print( "e:{}/{}, {}".format(idx, train_set.minibatches, epoch), 'loss: {:.3f}'.format(loss_batch), 'acc: {:3f}'.format(accuracy), 'logits: {:4f}'.format(np.max(a[0])), 'index: {}'.format(np.argmax(a[0])), 'target: {}'.format(target)) print('Training for batch {} is done'.format(idx)) accuracy_ta = acc_gt.eval(feed_dict={ placeholders['data']: adv_examples, placeholders['gt']: target }) accuracy_gt = acc_gt.eval(feed_dict={ placeholders['data']: adv_examples, placeholders['gt']: labels }) ta_succ = (idx * ta_succ + accuracy_ta) / (idx + 1) succ = (idx * succ + 1 - accuracy_gt) / ( idx + 1 ) # compute success rate of generating adversarial examples that can be misclassified noise_l2 = (idx * (noise_l2) + ((adv_examples - images))**2) / ( idx + 1 ) # compute l2 difference between adversarial examples and origina images if idx == train_set.minibatches - 1: aa_succ = 0 for i in range(1000): sess.run(tf.global_variables_initializer() ) # init all variables val_images, val_labels, val_target = sess.run( train_batch_gnr) ac_image = nnoise + val_images accuracy_attack = acc_gt.eval( feed_dict={ placeholders['data']: ac_image, placeholders['gt']: val_target }) aa_succ = (i * aa_succ + accuracy_attack) / (i + 1) print('index:{}'.format(i), 'accuracy_attack:{}'.format( accuracy_attack)) #accuracy_attack 需要高 print('Success rate of target attack is {}'.format(ta_succ)) print('Success rate of this attack is {}'.format(succ)) print('Noise norm of this attack is {}'.format(np.mean(noise_l2))) print('Success rate of target attack val image is {}'.format(aa_succ)) embed()