示例#1
0
def save_adv_examples(sess,
                      model,
                      data_iter,
                      save_folder,
                      fgm_settings={np.inf: [0.1]}):
    '''
    fgm_settings should be a dictionary of the form { 'NORM' : 'ARRAY OF VALUES' }
    '''
    examples_folder = os.path.join(save_folder, 'adv_examples')
    if not os.path.isdir(examples_folder):
        os.makedirs(examples_folder)

    for norm in fgm_settings:
        for eps in fgm_settings[norm]:
            data_iter.reset()
            adv_examples, labels, _ = gen_adv_examples(
                sess, model, data_iter,
                fgm(model.input, model.output, eps=eps, ord=norm))
            _save_adv_examples(examples_folder, adv_examples, labels, eps,
                               norm)

    for norm in fgm_settings:
        for eps in fgm_settings[norm]:
            data_iter.reset()
            adv_examples, labels, targets = gen_adv_examples(
                sess, model, data_iter,
                fgm_target(model.input,
                           model.output,
                           model.label,
                           eps=eps,
                           ord=norm))
            _save_adv_examples(examples_folder, adv_examples, labels, eps,
                               norm, targets)
示例#2
0
    def __init__(self, sess, resnet):
        '''
        images: though it is not a placeholder, most of the time you should feed your image into this variable
        images_scaled: a placeholder that contains images scaled to -0.5 to 0.5
        '''
        self.sess = sess
        self.images = tf.placeholder(
            tf.float32,
            (None, FLAGS.image_size, FLAGS.image_size, FLAGS.channels),
            name="images")

        # combined attack for feature squeezing!
        if FLAGS.feature_squeeze:
            smoothed_images = median_filtering_2x2(self.images,
                                                   dataset=FLAGS.dataset)

        self.labels = tf.placeholder(tf.int64, (None, ), name="labels")
        self.resnet = resnet
        self.logits = resnet(
            self.images) if not FLAGS.feature_squeeze else resnet(
                smoothed_images)
        self.softmax = tf.nn.softmax(self.logits)
        self.eps = tf.placeholder(tf.float32, (), name="fgsm_eps")

        self.cw_model = types.SimpleNamespace()
        self.cw_model.image_size = FLAGS.image_size
        self.cw_model.num_channels = FLAGS.channels
        self.cw_model.predict = self.resnet
        self.cw_model.num_labels = 10

        labels_onehot = tf.one_hot(self.labels, depth=self.cw_model.num_labels)
        self.adv_image = fgm(self.images,
                             self.softmax,
                             y=labels_onehot,
                             eps=self.eps,
                             clip_min=0.0,
                             clip_max=255.0)

        saver = tf.train.Saver()
        save_path, save_path_ckpt = get_weights_path()
        try:
            ckpt_state = tf.train.get_checkpoint_state(save_path)
        except tf.errors.OutOfRangeError as e:
            raise AssertionError('Cannot restore checkpoint: %s', e)
        if not (ckpt_state and ckpt_state.model_checkpoint_path):
            raise FileNotFoundError('No model to eval yet at %s', save_path)

        tf.logging.info('Loading checkpoint %s',
                        ckpt_state.model_checkpoint_path)
        saver.restore(sess, ckpt_state.model_checkpoint_path)

        self.index = tf.placeholder(tf.int32, (), name="index")
        self.grad = tf.gradients(self.logits[:, self.index], [self.images],
                                 name="grad")[0]

        # Images in CW attack are rescaled to [-0.5, 0.5]
        self.cw_attacker = CarliniLi(self.sess, self.cw_model, targeted=False)
示例#3
0
def fgs_eval(sess, model, data_iter, fgm_eps, norm=np.inf, logger=None):
    '''
    Returns (untargeted_fgs_acc, targeted_fgs_acc, targeted_atk_success_rate)
    '''
    untarget_num_correct = 0.0
    target_num_correct = 0.0
    target_atk_success = 0.0
    total_count = 0
    iter_ = tqdm(data_iter)
    fgm_attack = fgm(model.input, model.output, eps=fgm_eps, ord=norm)
    targeted_fgm_attack = fgm_target(model.input,
                                     model.output,
                                     model.label,
                                     eps=fgm_eps,
                                     ord=norm)

    for batch in iter_:
        target_labels = permute_labels(batch["label"])
        # Try to perturb with cleverhans (untargeted)
        perturbed_imgs_fgm = sess.run(fgm_attack, {model.input: batch["img"]})
        targeted_fgm_imgs = sess.run(targeted_fgm_attack, {
            model.input: batch["img"],
            model.label: target_labels
        })
        y_untarget = sess.run(model.output, {model.input: perturbed_imgs_fgm})
        y_targeted = sess.run(model.output, {model.input: targeted_fgm_imgs})

        untarget_pred_label = np.argmax(y_untarget, axis=1)
        target_pred_label = np.argmax(y_targeted, axis=1)
        untarget_num_correct += np.sum(
            np.equal(untarget_pred_label, batch["label"]).astype(float))
        target_num_correct += np.sum(
            np.equal(target_pred_label, batch["label"]).astype(float))
        target_atk_success += np.sum(
            np.equal(target_pred_label, target_labels).astype(float))
        total_count += untarget_pred_label.size
    untargeted_pred_acc = (untarget_num_correct / total_count)
    targeted_pred_acc = (target_num_correct / total_count)
    targeted_success_rate = (target_atk_success / total_count)

    # Here we generate targeted adversarial attacks
    # target_perturbed_imgs_fgs = model.perturb_inputs_fgs(fgs_eps, imgs, target_labels)
    # untargeted_pred_acc = model.predictive_accuracy(perturbed_imgs_fgs, labels)
    # targeted_pred_acc = model.predictive_accuracy(target_perturbed_imgs_fgs, target_labels)

    logger.log_adv_stats(norm, fgm_eps, untargeted_pred_acc, targeted_pred_acc,
                         targeted_success_rate)
    return (untargeted_pred_acc, targeted_pred_acc, targeted_success_rate)
 def __init__(self, sess, model_input, model_result, global_threshold, ord):
     self.sess = sess
     self.model_result_2classes = tf.concat(
         (global_threshold - model_result, model_result - global_threshold),
         axis=1)
     self.model_input = model_input
     self.eps = tf.placeholder(dtype=tf.float32)
     self.labels = tf.placeholder(dtype=tf.float32)
     self.adv = fgm(self.model_input,
                    self.model_result_2classes,
                    self.labels,
                    eps=self.eps,
                    clip_min=0,
                    clip_max=255,
                    ord=ord,
                    targeted=True)
示例#5
0
 def test_fgm_gradient_max(self):
     input_dim = 2
     nb_classes = 3
     batch_size = 4
     rng = np.random.RandomState([2017, 8, 23])
     x = tf.placeholder(tf.float32, [batch_size, input_dim])
     weights = tf.placeholder(tf.float32, [input_dim, nb_classes])
     logits = tf.matmul(x, weights)
     probs = tf.nn.softmax(logits)
     adv_x = fgm(x, probs)
     random_example = rng.randint(batch_size)
     random_feature = rng.randint(input_dim)
     output = tf.slice(adv_x, [random_example, random_feature], [1, 1])
     (dx, ) = tf.gradients(output, x)
     # The following line catches GitHub issue #243
     self.assertIsNotNone(dx)
     dx = self.sess.run(dx, feed_dict=random_feed_dict(rng, [x, weights]))
     ground_truth = np.zeros((batch_size, input_dim))
     ground_truth[random_example, random_feature] = 1.0
     self.assertClose(dx, ground_truth)
示例#6
0
def test_fgm_gradient_max():
    input_dim = 2
    num_classes = 3
    batch_size = 4
    loss_type = KEYWORDS.CE
    rng = np.random.RandomState([2017, 8, 23])
    x = tf.placeholder(tf.float32, [batch_size, input_dim])
    weights = tf.placeholder(tf.float32, [input_dim, num_classes])
    logits = tf.matmul(x, weights)
    probs = tf.nn.softmax(logits)
    adv_x = fgm(x, probs, loss_type=loss_type)
    random_example = rng.randint(batch_size)
    random_feature = rng.randint(input_dim)
    output = tf.slice(adv_x, [random_example, random_feature], [1, 1])
    dx, = tf.gradients(output, x)
    # The following line catches GitHub issue #243
    assert dx is not None
    sess = tf.Session()
    dx = sess.run(dx, feed_dict=random_feed_dict(rng, [x, weights]))
    ground_truth = np.zeros((batch_size, input_dim))
    ground_truth[random_example, random_feature] = 1.
    assert np.allclose(dx, ground_truth), (dx, ground_truth)
示例#7
0
                        [mc_preds_tensor, entropy_mean_tensor, bald_tensor])

# plot entropy and MI as eps increases

entropies = []
balds = []
accs = []

preds_tensor = K.mean(mc_preds_tensor, axis=0)

for i, ep in enumerate(eps):

    print("iteration", i, "of", len(eps), "epsilon", ep)
    sys.stdout.flush()

    adv_tensor = fgm(x, preds_tensor, eps=ep, clip_min=0, clip_max=1, ord=norm)
    b_entropies = []
    b_balds = []
    b_accs = []

    batches = U.batches_generator(tst, tsty, batch_size=500)
    for j, (bx, by) in enumerate(batches):

        print('    batch', j)
        sys.stdout.flush()  # in case we are writing to a log file not stdout

        adv = adv_tensor.eval(session=K.get_session(), feed_dict={x: bx})
        mc_samples, e_adv, b_adv = get_output([adv])
        b_entropies.append(e_adv.mean())  # mean across the batch
        b_balds.append(b_adv.mean())  # ditto
        preds = mc_samples.mean(axis=0)  # mean across the mc samples per point
示例#8
0
def evaluate(hps, data_X, data_y, eval_once=True):
    """Eval loop."""
    images = tf.placeholder(tf.float32,
                            shape=(None, args.image_size, args.image_size,
                                   args.channels))

    labels_onehot = tf.placeholder(tf.int32, shape=(None, args.num_classes))
    labels = tf.argmax(labels_onehot, axis=1)

    if args.classifier == "madry":
        net = tf.make_template('net', madry_template)
        logits = net(images, training=False)
    elif args.classifier == 'aditi':
        net = tf.make_template('net', aditi_template)
        logits = net(images, training=False)
    elif args.classifier == 'zico':
        net = tf.make_template('net', zico_template)
        logits = net(images, training=False)
    else:
        net = tf.make_template('net', resnet_template, hps=hps) if args.classifier == 'resnet' else \
            tf.make_template('net', vgg_template, hps=hps)
        logits = net(images, training=False)

    pred = tf.argmax(logits, axis=1)
    probs = tf.nn.softmax(logits)

    cost = tf.nn.softmax_cross_entropy_with_logits(logits=logits,
                                                   labels=labels_onehot)
    adv_image = fgm(images,
                    tf.nn.softmax(logits),
                    y=labels_onehot,
                    eps=args.eps / 10,
                    clip_min=0.0,
                    clip_max=1.0)
    top_5 = tf.nn.in_top_k(predictions=logits, targets=labels, k=5)

    saver = tf.train.Saver(
        tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='net'))
    if args.classifier == 'madry' and not args.trained:
        saver = tf.train.Saver({
            x.name[4:-2]: x
            for x in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                       scope="net")
        })

    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    sess = tf.Session(config=config)

    best_precision = 0.0
    save_path, save_path_ckpt = get_weights_path(args)
    while True:
        try:
            ckpt_state = tf.train.get_checkpoint_state(save_path)
        except tf.errors.OutOfRangeError as e:
            print('[!] Cannot restore checkpoint: %s', e)
            break
        if not (ckpt_state and ckpt_state.model_checkpoint_path):
            print('[!] No model to eval yet at %s', save_path)
            break
        print('[*] Loading checkpoint %s' % ckpt_state.model_checkpoint_path)
        saver.restore(sess, ckpt_state.model_checkpoint_path)

        total_prediction, correct_prediction = 0, 0
        adv_prediction = 0
        total_loss = 0
        all_preds = []
        batch_size = args.batch_size
        num_batch = len(data_X) // batch_size
        bad_images = []
        bad_labels = []
        confidences = []
        adv_images = []
        cls_preds = []
        true_labels = []
        for batch in range(num_batch):
            x = data_X[batch * batch_size:(batch + 1) * batch_size]
            x = x.astype(np.float32)
            y = data_y[batch * batch_size:(batch + 1) * batch_size]
            y = y.astype(np.int32)
            if not args.top5:
                (loss, predictions, conf) = sess.run([cost, pred, probs],
                                                     feed_dict={
                                                         images: x,
                                                         labels_onehot: y
                                                     })
                all_preds.extend(predictions)
                confidences.extend(conf[np.arange(conf.shape[0]), predictions])
                img_np = np.copy(x)
                for i in range(100):
                    img_np = sess.run(adv_image,
                                      feed_dict={
                                          images: img_np,
                                          labels_onehot: y
                                      })
                    img_np = np.clip(img_np, x - args.eps, x + args.eps)
                    img_np = np.clip(img_np, 0.0, 1.0)
                adv_images.extend(img_np)

                adv_pred_np = pred.eval(session=sess,
                                        feed_dict={
                                            images: img_np,
                                            labels_onehot: y
                                        })

                cls_preds.extend(adv_pred_np)
                true_labels.extend(np.argmax(y, axis=1))
            else:
                (loss, in_top5) = sess.run([cost, top_5],
                                           feed_dict={
                                               images: x,
                                               labels_onehot: y
                                           })
            total_loss += np.sum(loss)
            y = np.argmax(y, axis=1)
            correct_prediction += np.sum(
                y == predictions) if not args.top5 else np.sum(in_top5)
            bad_images.extend(x[y != predictions])
            bad_labels.extend(predictions[y != predictions])
            adv_prediction += np.sum(y == adv_pred_np)
            total_prediction += loss.shape[0]

        precision = 1.0 * correct_prediction / total_prediction
        loss = 1.0 * total_loss / total_prediction
        best_precision = max(precision, best_precision)
        average_conf = np.mean(np.asarray(confidences))
        adv_images = np.asarray(adv_images)
        cls_preds = np.asarray(cls_preds)
        true_labels = np.asarray(true_labels)

        if not args.top5:
            print(
                '[*] loss: %.6f, precision: %.6f, PGD precision: %.6f, Confidence: %.6f'
                % (loss, precision, adv_prediction / total_prediction,
                   average_conf))
            folder_format = '/atlas/u/yangsong/generative_adversary/{}_{}_pgd/'
            np.savez(os.path.join(
                check_folder(
                    folder_format.format(args.dataset, args.classifier)),
                'eps_{:.3f}.npz'.format(args.eps)),
                     adv_images=adv_images,
                     cls_preds=cls_preds,
                     true_labels=true_labels)
        else:
            print(
                '[*] loss: %.6f, top 5 accuracy: %.6f, best top 5 accuracy: %.6f'
                % (loss, precision, best_precision))

        bad_images = np.asarray(bad_images)
        bad_images = (255. * bad_images).astype(np.uint8)
        bad_labels = np.asarray(bad_labels).astype(np.uint8)

        if len(bad_images) > 10:
            bad_images = bad_images[:10]
            bad_labels = bad_labels[:10]

        bad_images = np.reshape(bad_images, (len(bad_images) * args.image_size,
                                             args.image_size, args.channels))
        bad_images = np.squeeze(bad_images)

        imsave(os.path.join(check_folder('tmp'), 'bad_images.png'), bad_images)
        print("bad_labels:\n{}".format(bad_labels))

        if eval_once:
            break

        time.sleep(60)
示例#9
0
ds_test, _ = get_data('test', args)

# Untargeted BIM
from tensorpack.dataflow import dataset

pp_mean = dataset.Cifar10('train').get_per_pixel_mean()

stepsize_ph = tf.placeholder(tf.float32, [])
orig_input_ph = tf.placeholder(tf.float32, image_ph.get_shape().as_list())
# adv_inp = fgm(image_ph, probs, y=tf.one_hot(label_ph, depth=10), eps=tf.to_float(1))
pp_mean_sym = tf.tile(tf.constant(pp_mean[None]),
                      [tf.shape(image_ph)[0], 1, 1, 1])
# adv_inp = tf.clip_by_value(adv_inp, -pp_mean_sym, 255 - pp_mean_sym)
# adv_inp = tf.clip_by_value(adv_inp, orig_input_ph - stepsize_ph, orig_input_ph + stepsize_ph)
adv_inp = fgm(
    image_ph,
    probs,  #y=tf.one_hot(label_ph, depth=10)
    eps=tf.to_float(stepsize_ph))
adv_inp = tf.clip_by_value(adv_inp, -pp_mean_sym, 255 - pp_mean_sym)
adv_inp = tf.clip_by_value(adv_inp, orig_input_ph - stepsize_ph,
                           orig_input_ph + stepsize_ph)

for EPSILON in [0, 1, 2, 4, 8, 16]:
    ds_test.reset_state()
    crts, ents = [], []
    with tqdm(total=10000) as pbar:
        for i, (img, lbl) in tqdm(enumerate(ds_test.get_data())):
            if img.shape[0] != 128:
                break
            fd = {
                image_ph: img,
                label_ph: lbl,
示例#10
0
def train(hps, data):
    """Training loop."""
    images = tf.placeholder(tf.float32, shape=(None, FLAGS.image_size, FLAGS.image_size, FLAGS.channels), name="images")
    labels = tf.placeholder(tf.int64, shape=(None), name="labels")
    labels_onehot = tf.one_hot(labels, depth=hps.num_classes, dtype=tf.float32, name="labels_onehot")
    if FLAGS.label_smooth:
        labels_onehot = label_smooth(labels_onehot)

    lrn_rate = tf.placeholder(tf.float32, shape=(), name="lrn_rate")
    tf.logging.info(json.dumps(vars(FLAGS)))
    tf.logging.info(json.dumps(hps._asdict()))

    flipped_images = random_flip_left_right(images)

    net = tf.make_template('net', resnet_template, hps=hps) if FLAGS.model == 'resnet' else \
        tf.make_template('net', vgg_template, hps=hps)

    truth = labels
    if FLAGS.adversarial or FLAGS.adversarial_BIM:
        logits = net(flipped_images, training=False)
    else:
        logits = net(flipped_images, training=True)
    probs = tf.nn.softmax(logits)

    predictions = tf.argmax(logits, axis=1)
    precision = tf.reduce_mean(tf.to_float(tf.equal(predictions, truth)))

    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels_onehot))

    weights = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='net')
    weight_norm = tf.add_n([tf.nn.l2_loss(v) for v in weights])
    cost = cost + 0.0005 * weight_norm

    with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
        train_op = tf.train.MomentumOptimizer(learning_rate=lrn_rate, momentum=0.9).minimize(cost)

    if FLAGS.adversarial or FLAGS.adversarial_BIM:
        eps = tf.abs(tf.truncated_normal(shape=(tf.shape(images)[0],), mean=0, stddev=FLAGS.adv_std))
        eps = eps[:, None, None, None]
        adv_x = fgsm(flipped_images, probs, eps=eps, clip_min=0.0, clip_max=255.0)
        adv_x_leak = fgm(flipped_images, probs, y=labels_onehot, eps=np.asarray([1])[:, None, None, None],
                         clip_min=0.0, clip_max=255.0)

        adv_logits = net(adv_x, training=False)
        adv_pred = tf.argmax(adv_logits, axis=1)
        adv_precision = tf.reduce_mean(tf.to_float(tf.equal(adv_pred, truth)))

        adv_logits_leak = net(adv_x_leak, training=False)
        adv_pred_leak = tf.argmax(adv_logits_leak, axis=1)
        adv_precision_leak = tf.reduce_mean(tf.to_float(tf.equal(adv_pred_leak, truth)))

        num_normal = hps.batch_size // 2
        combined_images = tf.concat([flipped_images[:num_normal], images[num_normal:]], axis=0)
        com_logits = net(combined_images, training=True)

        normal_cost = 2.0 / 1.3 * tf.nn.softmax_cross_entropy_with_logits(logits=com_logits[:num_normal],
                                                                          labels=labels_onehot[:num_normal])
        adv_cost = 0.6 / 1.3 * tf.nn.softmax_cross_entropy_with_logits(logits=com_logits[num_normal:],
                                                                       labels=labels_onehot[num_normal:])

        combined_cost = tf.reduce_mean(tf.concat([normal_cost, adv_cost], axis=0)) + 0.0005 * weight_norm

        with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
            train_op_adv = tf.train.MomentumOptimizer(learning_rate=lrn_rate, momentum=0.9).minimize(combined_cost)

    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        tf.global_variables_initializer().run()
        saver = tf.train.Saver(max_to_keep=3)
        save_path, save_path_ckpt = get_weights_path()
        state = tf.train.get_checkpoint_state(save_path)
        if state and state.model_checkpoint_path:
            ans = verify("Warning: model already trained. Delete files and re-train? (y/n)")
            if ans:
                shutil.rmtree(save_path)
                os.makedirs(save_path)
            else:
                saver_state = tf.train.get_checkpoint_state(save_path)
                saver.restore(sess, saver_state.model_checkpoint_path)
                # raise FileExistsError("Model weight already exists")
        else:
            os.makedirs(save_path, exist_ok=True)

        hps_path = os.path.join(save_path, 'hps.txt')
        with open(hps_path, 'w') as fout:
            fout.write(json.dumps(vars(FLAGS)))
            fout.write(json.dumps(hps._asdict()))

        for iter in range(FLAGS.maxiter):
            try:
                x, y = data.next(hps.batch_size)
            except StopIteration:
                tf.logging.info("New epoch!")

            if iter < 40000:
                lr = 0.1
            elif iter < 60000:
                lr = 0.01
            elif iter < 80000:
                lr = 0.001
            else:
                lr = 0.0001

            if not FLAGS.adversarial and not FLAGS.adversarial_BIM:
                _, acc = sess.run([train_op, precision], feed_dict={
                    images: x,
                    labels: y,
                    lrn_rate: lr
                })
                tf.logging.info("Iter: {}, Precision: {:.6f}".format(iter + 1, acc))
            elif FLAGS.adversarial:
                adv_images, acc, acc_adv = sess.run([adv_x, precision, adv_precision], feed_dict={
                    images: x,
                    labels: y,
                })
                combined_batch = np.concatenate([x[:num_normal], adv_images[num_normal:]], axis=0)
                _, com_loss = sess.run([train_op_adv, combined_cost], feed_dict={
                    images: combined_batch,
                    labels: y,
                    lrn_rate: lr
                })
                tf.logging.info("Iter: {}, Precision: {:.6f}, Adv precision: {:.6f}, Combined loss: {:.6f}"
                                .format(iter + 1, acc, acc_adv, com_loss))

            elif FLAGS.adversarial_BIM:
                BIM_eps = np.abs(truncnorm.rvs(a=-2., b=2.) * FLAGS.adv_std)
                attack_iter = int(min(BIM_eps + 4, 1.25 * BIM_eps))
                adv_images = np.copy(x)
                for i in range(attack_iter):
                    adv_images, acc, acc_adv = sess.run([adv_x_leak, precision, adv_precision_leak], feed_dict={
                        images: adv_images,
                        labels: y,
                    })

                combined_batch = np.concatenate([x[:num_normal], adv_images[num_normal:]], axis=0)
                _, com_loss = sess.run([train_op_adv, combined_cost], feed_dict={
                    images: combined_batch,
                    labels: y,
                    lrn_rate: lr
                })
                tf.logging.info("Iter: {}, Precision: {:.6f}, Adv precision: {:.6f}, Combined loss: {:.6f}"
                                .format(iter + 1, acc, acc_adv, com_loss))

            if (iter + 1) % 5000 == 0:
                saver.save(sess, save_path_ckpt, global_step=iter + 1)
                tf.logging.info("Model saved! Path: " + save_path)