示例#1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-c', '--continue', dest='continue_path', required=False)
    parser.add_argument('-l', '--loss', default='softmax')
    args = parser.parse_args()

    ## load dataset
    train_batch_gnr, train_set = get_dataset_batch(ds_name='train')

    data = tf.placeholder(tf.float32, shape=(None,) + config.image_shape + (config.nr_channel,), name='data')
    label = tf.placeholder(tf.int32, shape=(None,), name='label')  # placeholder for targetted label
    gt = tf.placeholder(tf.int32, shape=(None,), name='gt')

    pre_noise = tf.Variable(
        tf.zeros((config.minibatch_size, config.image_shape[0], config.image_shape[1], config.nr_channel),
                 dtype=tf.float32))
    model = Model()
    attack = Attack(model, config.minibatch_size)
    acc, loss, adv,x = attack.generate_graph(pre_noise, data, gt, label)
    acc_gt = attack.evaluate(data, gt)

    placeholders = {
        'data': data,
        'label': label,
        'gt': gt,

    }

    lr = 1e-2
    opt = tf.train.AdamOptimizer(lr)
    grads = opt.compute_gradients(loss, [pre_noise])
    train = opt.apply_gradients(grads)
    ## init tensorboard
    tf.summary.scalar('loss', loss)
    merged = tf.summary.merge_all()
    train_writer = tf.summary.FileWriter(os.path.join(config.log_dir, 'tf_log', 'train'),
                                         tf.get_default_graph())

    ## create a session
    tf.set_random_seed(12345)  # ensure consistent results
    global_cnt = 0
    epoch_start = 0
    succ = 0
    noise_l2 = 0
    with tf.Session() as sess:

        for idx in range(train_set.minibatches):
            global_cnt = 0

            sess.run(tf.global_variables_initializer())  # init all variables
            images, labels = sess.run(train_batch_gnr)

            for epoch in range(epoch_start + 1, config.nr_epoch + 1):
                global_cnt += 1
                feed_dict = {
                    placeholders['data']: images,
                    placeholders['label']: labels,
                    placeholders['gt']: labels,
                }
                _, accuracy, loss_batch, adv_examples,ori_image, summary = sess.run([train, acc, loss, adv,x, merged],
                                                                          feed_dict=feed_dict)

                if global_cnt % config.show_interval == 0:
                    train_writer.add_summary(summary, global_cnt)
                    print(
                        "e:{}/{}, {}".format(idx, train_set.minibatches, epoch),
                        'loss: {:.3f}'.format(loss_batch),
                        'acc: {:3f}'.format(accuracy),
                    )

            print('Training for batch {} is done'.format(idx))

            cv2.imwrite('../../ori_image/'+'{}.png'.format(idx), ori_image.astype('uint8').reshape(32,32,3))
            cv2.imwrite('../../adv_image/'+'{}.png'.format(idx), adv_examples.astype('uint8').reshape(32,32,3))
            cv2.imwrite('../../diff_image/'+'{}.png'.format(idx), abs(ori_image-adv_examples).astype('uint8').reshape(32,32,3))

            accuracy_gt = acc_gt.eval(feed_dict={placeholders['data']: adv_examples, placeholders['gt']: labels})
            succ = (idx * succ + 1 - accuracy_gt) / (
                        idx + 1)  # compute success rate of generating adversarial examples that can be misclassified
            noise_l2 = (idx * (noise_l2) + ((adv_examples - images)) ** 2) / (
                    idx + 1)  # compute l2 difference between adversarial examples and origina images

    print('Success rate of this attack is {}'.format(succ))
    print('Noise norm of this attack is {}'.format(np.mean(noise_l2)))
    embed()
示例#2
0
def main():
    ## load dataset
    global adv_examples
    train_batch_gnr, train_set = get_dataset_batch(ds_name='train')

    data = tf.placeholder(tf.float32, shape=(config.minibatch_size,) + config.image_shape, name='data')
    label = tf.placeholder(tf.int32, shape=(None,), name='label')  # placeholder for targetted label
    groundTruth = tf.placeholder(tf.int32, shape=(None,), name='groundTruth')  # 真实的样本标签

    pre_noise = tf.Variable(tf.zeros([1, 32, 32, 3], dtype=tf.float32))
    vgg16 = Vgg16()
    attack = Attack(vgg16, config.minibatch_size)  # 初始化一个攻击模型对象
    acc, loss, adv = attack.generate_graph(pre_noise, data, groundTruth, label)
    acc_gt, preds = attack.evaluate(data, groundTruth)

    placeholders = {
        'data': data,
        'label': label,
        'groundTruth': groundTruth,

    }

    lr = 0.01
    opt = tf.train.AdamOptimizer(lr)
    grads = opt.compute_gradients(loss, [pre_noise])  # 优化目标是noise
    train = opt.apply_gradients(grads)
    ## create a session
    tf.set_random_seed(12345)  # ensure consistent results
    succ = 0
    noise_l2 = 0
    target = np.array([7])  # horse
    with tf.Session(config=configTf) as sess:
        # print(train_set.minibatches)
        sess.run(tf.global_variables_initializer())  # init all variables
        for idx in range(train_set.minibatches):
            global_cnt = 0
            images, labels = sess.run(train_batch_gnr)
            for epoch in range(1, config.nr_epoch + 1):
                global_cnt += 1
                feed_dict = {
                    placeholders['data']: images,
                    placeholders['label']: target,
                    placeholders['groundTruth']: labels,
                }
                "target attack should convert all the pics to horse"
                _, accuracy, loss_batch, adv_examples = sess.run([train, acc, loss, adv],
                                                                 feed_dict=feed_dict)

                if global_cnt % config.show_interval == 0:  # 10
                    # train_writer.add_summary(summary, global_cnt)
                    print(
                        "e:{}/{}, {}".format(idx, train_set.minibatches, epoch),
                        'loss: {:.3f}'.format(loss_batch),
                        'accuracy: {:3f}'.format(accuracy),
                    )
                    # outPath = './out/' + '{}_{}.jpg'.format(idx, global_cnt)
                    # print(outPath)
                    # output_img(pre_noise, sess, outPath)

            print('Training for batch {} is done'.format(idx))
            output_img(x=adv_examples, out_path='./out/adv_examples/{}.png'.format(idx))  # 输出对抗样本
            accuracy_gt, predAdv = sess.run([acc_gt, preds],
                                            feed_dict={placeholders['data']: adv_examples,
                                                       placeholders['label']: target})  # 计算对抗样本精确率
            succ = (idx * succ + 1 - accuracy_gt) / (idx + 1)
            noise_l2 = (idx * (noise_l2) + ((adv_examples - images)) ** 2) / (idx + 1)
            print("====> label for adv is :", predAdv) # 希望是 9 = truck
            print('Success rate of this attack is {}'.format(succ))
            print('Noise norm of this attack is {}'.format(np.mean(noise_l2)))
示例#3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--nr_epoch",
        type=int,
        default=500,
        help=
        "you may need to increase nr_epoch to 4000 or more for targeted adversarial attacks"
    )
    parser.add_argument(
        "--alpha",
        type=float,
        default=0,
        help="coefficient of either cross entropy loss or C&W attack loss")
    parser.add_argument("--beta",
                        type=float,
                        default=0,
                        help="coefficient of lasso regularization")
    parser.add_argument("--gamma",
                        type=float,
                        default=0,
                        help="coefficient of ridge regularization")
    parser.add_argument("--CW_kappa",
                        type=float,
                        default=0,
                        help="hyperparameter for C&W attack loss")
    parser.add_argument("--use_cross_entropy_loss", action='store_true')
    parser.add_argument("--targeted_attack", action='store_true')
    args = parser.parse_args()

    # load dataset
    train_batch_gnr, train_set = get_dataset_batch(ds_name='train')

    data = tf.placeholder(tf.float32,
                          shape=(None, ) + config.image_shape +
                          (config.nr_channel, ),
                          name='data')
    label = tf.placeholder(tf.int32, shape=(None, ),
                           name='label')  # placeholder for targeted label
    gt = tf.placeholder(tf.int32, shape=(None, ), name='gt')

    pre_noise = tf.Variable(
        tf.zeros((config.minibatch_size, config.image_shape[0],
                  config.image_shape[1], config.nr_channel),
                 dtype=tf.float32))
    model = Model()
    attack = Attack(model, config.minibatch_size, args.alpha, args.beta,
                    args.gamma, args.CW_kappa, args.use_cross_entropy_loss)
    target = label if args.targeted_attack else None
    acc, loss, adv = attack.generate_graph(pre_noise, data, gt, target)
    acc_gt = attack.evaluate(data, gt)

    placeholders = {
        'data': data,
        'label': label,
        'gt': gt,
    }

    lr = 1e-2
    opt = tf.train.AdamOptimizer(lr)
    grads = opt.compute_gradients(loss, [pre_noise])
    train = opt.apply_gradients(grads)
    # init tensorboard
    tf.summary.scalar('loss', loss)
    merged = tf.summary.merge_all()
    train_writer = tf.summary.FileWriter(
        os.path.join(config.log_dir, 'tf_log', 'train'),
        tf.get_default_graph())

    # create a session
    tf.set_random_seed(12345)  # ensure consistent results
    succ = 0
    noise_l1 = 0
    noise_l2 = 0
    noise_l_inf = 0
    tot = 0
    with tf.Session() as sess:

        assert train_set.minibatch_size == 1
        for idx in range(train_set.minibatches):
            global_cnt = 0

            sess.run(tf.global_variables_initializer())  # init all variables
            images, gt = sess.run(train_batch_gnr)
            if acc_gt.eval(feed_dict={
                    placeholders['data']: images,
                    placeholders['gt']: gt
            }) < 0.5:
                continue
            else:
                tot += 1

            if args.targeted_attack:
                labels = (gt + 5)
                if labels >= 10:
                    labels -= config.nr_class
            else:
                labels = gt

            min_distortion = np.inf
            min_l1 = np.inf
            min_l2 = np.inf
            min_linf = np.inf

            for epoch in range(1, args.nr_epoch + 1):
                global_cnt += 1
                feed_dict = {
                    placeholders['data']: images,
                    placeholders['label']: labels,
                    placeholders['gt']: gt,
                }
                _, accuracy, loss_batch, adv_examples, summary = sess.run(
                    [train, acc, loss, adv, merged], feed_dict=feed_dict)

                if global_cnt % config.show_interval == 0:
                    train_writer.add_summary(summary, global_cnt)
                    print(
                        "e:{}/{}, {}".format(idx, train_set.minibatches,
                                             epoch),
                        'loss: {:.3f}'.format(loss_batch),
                        'acc: {:3f}'.format(accuracy),
                    )

                successful = acc_gt.eval(feed_dict={
                    placeholders['data']: adv_examples,
                    placeholders['gt']: labels
                }) > 0.5 if args.targeted_attack else acc_gt.eval(
                    feed_dict={
                        placeholders['data']: adv_examples,
                        placeholders['gt']: gt
                    }) < 0.5

                if successful:
                    l1 = np.sum(np.abs((adv_examples - images) / 255))
                    l2_square = np.sum(((adv_examples - images) / 255)**2)
                    distortion = args.beta * l1 + args.gamma * l2_square
                    if distortion < min_distortion:
                        min_distortion = distortion
                        min_l1 = min(min_l1, l1)
                        min_l2 = min(min_l2, np.sqrt(l2_square))
                        min_linf = min(min_linf,
                                       np.max((adv_examples - images) / 255))

            print('Training for batch {} is done'.format(idx))
            sys.stdout.flush()

            if min_distortion != np.inf:
                succ += 1
                noise_l1 += min_l1
                noise_l2 += min_l2
                noise_l_inf += min_linf
        with open(save_result_dir, "a+") as f:
            for attr in dir(args):
                if not attr.startswith("_"):
                    print("{}: {}".format(attr, getattr(args, attr)))
                    f.write("{}: {}\n".format(attr, getattr(args, attr)))
            print('Success rate: {}'.format(succ / tot))
            print('Noise l1-norm: {}'.format(noise_l1 / tot))
            print('Noise l2-norm: {}'.format(noise_l2 / tot))
            print('Noise l-inf: {}'.format(noise_l_inf / tot))
            f.write('Success rate: {}\n'.format(succ / tot))
            f.write('Noise l1-norm: {}\n'.format(noise_l1 / tot))
            f.write('Noise l2-norm: {}\n'.format(noise_l2 / tot))
            f.write('Noise l-inf: {}\n\n'.format(noise_l_inf / tot))
            sys.stdout.flush()
示例#4
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-c',
                        '--continue',
                        dest='continue_path',
                        required=False)
    parser.add_argument('-l', '--loss', default='softmax')
    args = parser.parse_args()

    ## load dataset
    train_batch_gnr, train_set = get_dataset_batch(ds_name='train')

    data = tf.placeholder(tf.float32,
                          shape=(None, ) + config.image_shape +
                          (config.nr_channel, ),
                          name='data')
    label = tf.placeholder(tf.int32, shape=(None, ),
                           name='label')  # placeholder for targetted label
    gt = tf.placeholder(tf.int32, shape=(None, ), name='gt')

    pre_noise = tf.Variable(
        tf.zeros((config.minibatch_size, config.image_shape[0],
                  config.image_shape[1], config.nr_channel),
                 dtype=tf.float32))
    model = Model()
    attack = Attack(model, config.minibatch_size)
    acc, loss, adv, x, logits, noise = attack.generate_graph(
        pre_noise, data, gt, label)
    acc_gt = attack.evaluate(data, gt)

    placeholders = {
        'data': data,
        'label': label,
        'gt': gt,
    }

    lr = 1e-2
    opt = tf.train.AdamOptimizer(lr)
    grads = opt.compute_gradients(loss, [pre_noise])
    train = opt.apply_gradients(grads)

    ## create a session
    tf.set_random_seed(12345)  # ensure consistent results
    global_cnt = 0
    epoch_start = 0
    succ = 0
    noise_l2 = 0
    ta_succ = 0
    with tf.Session() as sess:

        for idx in range(train_set.minibatches):
            global_cnt = 0

            sess.run(tf.global_variables_initializer())  # init all variables
            images, labels, target = sess.run(train_batch_gnr)

            for epoch in range(epoch_start + 1, config.nr_epoch + 1):
                global_cnt += 1
                feed_dict = {
                    placeholders['data']: images,
                    placeholders['label']: target,
                    placeholders['gt']: labels,
                }
                _, accuracy, loss_batch, adv_examples, ori_image, a, nnoise = sess.run(
                    [train, acc, loss, adv, x, logits, noise],
                    feed_dict=feed_dict)

                if global_cnt % config.show_interval == 0:
                    print(
                        "e:{}/{}, {}".format(idx, train_set.minibatches,
                                             epoch),
                        'loss: {:.3f}'.format(loss_batch),
                        'acc: {:3f}'.format(accuracy),
                        'logits: {:4f}'.format(np.max(a[0])),
                        'index: {}'.format(np.argmax(a[0])),
                        'target: {}'.format(target))

            print('Training for batch {} is done'.format(idx))

            accuracy_ta = acc_gt.eval(feed_dict={
                placeholders['data']: adv_examples,
                placeholders['gt']: target
            })
            accuracy_gt = acc_gt.eval(feed_dict={
                placeholders['data']: adv_examples,
                placeholders['gt']: labels
            })
            ta_succ = (idx * ta_succ + accuracy_ta) / (idx + 1)
            succ = (idx * succ + 1 - accuracy_gt) / (
                idx + 1
            )  # compute success rate of generating adversarial examples that can be misclassified
            noise_l2 = (idx * (noise_l2) + ((adv_examples - images))**2) / (
                idx + 1
            )  # compute l2 difference between adversarial examples and origina images

            if idx == train_set.minibatches - 1:
                aa_succ = 0
                for i in range(1000):
                    sess.run(tf.global_variables_initializer()
                             )  # init all variables
                    val_images, val_labels, val_target = sess.run(
                        train_batch_gnr)
                    ac_image = nnoise + val_images
                    accuracy_attack = acc_gt.eval(
                        feed_dict={
                            placeholders['data']: ac_image,
                            placeholders['gt']: val_target
                        })
                    aa_succ = (i * aa_succ + accuracy_attack) / (i + 1)
                    print('index:{}'.format(i), 'accuracy_attack:{}'.format(
                        accuracy_attack))  #accuracy_attack 需要高

        print('Success rate of target attack is {}'.format(ta_succ))
        print('Success rate of this attack is {}'.format(succ))
        print('Noise norm of this attack is {}'.format(np.mean(noise_l2)))
        print('Success rate of target attack val image is {}'.format(aa_succ))
        embed()