示例#1
0
    def __init__(self,
                 model,
                 step_size_iter=0.05,
                 max_perturbation=0.3,
                 n_iterations=10,
                 targeted=False,
                 norm_order=np.inf,
                 rand_init=None,
                 rand_minmax=0.3,
                 clip_min=None,
                 clip_max=None,
                 sanity_checks=True):
        super().__init__(model=model, clip_min=clip_min, clip_max=clip_max)
        self._targeted = targeted
        self._step_size_iter = step_size_iter
        self._max_perturbation = max_perturbation
        self._n_iterations = n_iterations
        self._norm_order = norm_order
        self._rand_init = rand_init
        self._rand_minmax = rand_minmax
        self._sanity_checks = sanity_checks

        with self.graph.as_default():
            self._method = BasicIterativeMethod(
                self._model,
                sess=self.session,
                eps=self._max_perturbation,
                eps_iter=self._step_size_iter,
                nb_iter=self._n_iterations,
                ord=self._norm_order,
                clip_min=self._clip_min,
                clip_max=self._clip_max,
                rand_init=self._rand_init,
                sanity_checks=self._sanity_checks)
示例#2
0
def main(_):
    # Images for inception classifier are normalized to be in [-1, 1] interval,
    # eps is a difference between pixels so it should be in [0, 2] interval.
    # Renormalizing epsilon from [0, 255] to [0, 2].
    eps = 2.0 * FLAGS.max_epsilon / 255.0
    batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3]
    num_classes = 1001

    tf.logging.set_verbosity(tf.logging.INFO)

    with tf.Graph().as_default():
        # Prepare graph
        x_input = tf.placeholder(tf.float32, shape=batch_shape)

        model = InceptionModel(num_classes)

        fgsm = BasicIterativeMethod(model)
        x_adv = fgsm.generate(x_input, eps=eps, clip_min=-1., clip_max=1.)

        # Run computation
        saver = tf.train.Saver(slim.get_model_variables())
        session_creator = tf.train.ChiefSessionCreator(
            scaffold=tf.train.Scaffold(saver=saver),
            checkpoint_filename_with_path=FLAGS.checkpoint_path,
            master=FLAGS.master)

        with tf.train.MonitoredSession(
                session_creator=session_creator) as sess:
            for filenames, images in load_images(FLAGS.input_dir, batch_shape):
                adv_images = sess.run(x_adv, feed_dict={x_input: images})
                save_images(adv_images, filenames, FLAGS.output_dir)
示例#3
0
def backtracking(sess, x, y, model, x_test, y_test, params, batch_size=128):
    tf.set_random_seed(1822)
    set_log_level(logging.DEBUG)
    from cleverhans.attacks import BasicIterativeMethod
    method = BasicIterativeMethod(model, sess=sess)
    adv_x = method.generate(x, **params)
    num_batch = x_test.shape[0] // batch_size
    adv_imgs = []
    for i in range(num_batch):
        if (i + 1) * batch_size >= x_test.shape[0]:
            adv_imgs.append(
                sess.run(adv_x,
                         feed_dict={
                             x: x_test[i * batch_size:],
                             y: y_test[i * batch_size:]
                         }))
        else:
            adv_imgs.append(
                sess.run(adv_x,
                         feed_dict={
                             x: x_test[i * batch_size:(i + 1) * batch_size],
                             y: y_test[i * batch_size:(i + 1) * batch_size]
                         }))
    adv_imgs = np.concatenate(adv_imgs, axis=0)

    return adv_imgs
示例#4
0
        def fgsm_combo():
            acc = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_par)
            print('Test accuracy on legitimate examples: %0.4f\n' % acc)

            fgsm = FastGradientMethod(model, sess=sess)
            #initialize_uninitialized_global_variables(sess)
            adv_x = fgsm.generate(x, **fgsm_params)

            preds_adv = model.get_probs(adv_x)
            acc = model_eval(sess,
                             x,
                             y,
                             preds_adv,
                             X_test,
                             Y_test,
                             args=eval_par)

            print(
                'Test accuracy on adversarial examples generated by fgsm: %0.4f\n'
                % acc)
            bim = BasicIterativeMethod(model, sess=sess)
            adv_x = bim.generate(x)
            preds_adv = model.get_probs(adv_x)

            acc = model_eval(sess,
                             x,
                             y,
                             preds_adv,
                             X_test,
                             Y_test,
                             args=eval_par)
            print(
                'Test accuracy on adversarial examples generated by IterativeMethod: %0.4f\n'
                % acc)
示例#5
0
class TestBasicIterativeMethod(TestFastGradientMethod):
    def setUp(self):
        super(TestBasicIterativeMethod, self).setUp()
        import tensorflow as tf

        # The world's simplest neural network
        def my_model(x):
            W1 = tf.constant([[1.5, .3], [-2, 0.3]], dtype=tf.float32)
            h1 = tf.nn.sigmoid(tf.matmul(x, W1))
            W2 = tf.constant([[-2.4, 1.2], [0.5, -2.3]], dtype=tf.float32)
            res = tf.nn.softmax(tf.matmul(x, W2))
            return res

        self.sess = tf.Session()
        self.model = my_model
        self.attack = BasicIterativeMethod(self.model, sess=self.sess)

    def test_generate_np_does_not_cache_graph_computation_for_nb_iter(self):
        import tensorflow as tf

        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        x_adv = self.attack.generate_np(x_val,
                                        eps=1.0,
                                        ord=np.inf,
                                        clip_min=-5.0,
                                        clip_max=5.0,
                                        nb_iter=10)

        orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1)
        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)
        self.assertTrue(np.mean(orig_labs == new_labs) < 0.1)

        ok = [False]
        old_grads = tf.gradients

        def fn(*x, **y):
            ok[0] = True
            return old_grads(*x, **y)

        tf.gradients = fn

        x_adv = self.attack.generate_np(x_val,
                                        eps=1.0,
                                        ord=np.inf,
                                        clip_min=-5.0,
                                        clip_max=5.0,
                                        nb_iter=11)

        orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1)
        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)
        self.assertTrue(np.mean(orig_labs == new_labs) < 0.1)

        tf.gradients = old_grads

        self.assertTrue(ok[0])
示例#6
0
class TestBasicIterativeMethod(TestFastGradientMethod):
    def setUp(self):
        super(TestBasicIterativeMethod, self).setUp()

        self.sess = tf.Session()
        self.model = SimpleModel()
        self.attack = BasicIterativeMethod(self.model, sess=self.sess)

    def test_attack_strength(self):
        """
        If clipping is not done at each iteration (not passing clip_min and
        clip_max to fgm), this attack fails by
        np.mean(orig_labels == new_labels) == .39.
        """
        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        x_adv = self.attack.generate_np(x_val, eps=1.0, ord=np.inf,
                                        clip_min=0.5, clip_max=0.7,
                                        nb_iter=5)

        orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1)
        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)
        self.assertTrue(np.mean(orig_labs == new_labs) < 0.1)

    def test_generate_np_does_not_cache_graph_computation_for_nb_iter(self):

        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        x_adv = self.attack.generate_np(x_val, eps=1.0, ord=np.inf,
                                        clip_min=-5.0, clip_max=5.0,
                                        nb_iter=10)

        orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1)
        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)
        self.assertTrue(np.mean(orig_labs == new_labs) < 0.1)

        ok = [False]
        old_grads = tf.gradients

        def fn(*x, **y):
            ok[0] = True
            return old_grads(*x, **y)
        tf.gradients = fn

        x_adv = self.attack.generate_np(x_val, eps=1.0, ord=np.inf,
                                        clip_min=-5.0, clip_max=5.0,
                                        nb_iter=11)

        orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1)
        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)
        self.assertTrue(np.mean(orig_labs == new_labs) < 0.1)

        tf.gradients = old_grads

        self.assertTrue(ok[0])
示例#7
0
def main(argv):
    checkpoint = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)

    if checkpoint is None:
        raise ValueError("Couldn't find latest checkpoint in " +
                         FLAGS.checkpoint_dir)

    train_start = 0
    train_end = 60000
    test_start = 0
    test_end = 10000
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    assert Y_train.shape[1] == 10

    # NOTE: for compatibility with Madry Lab downloadable checkpoints,
    # we cannot enclose this in a scope or do anything else that would
    # change the automatic naming of the variables.
    model = MadryMNIST()

    x_input = tf.placeholder(tf.float32, shape=[None, 784])
    x_image = tf.placeholder(tf.float32, shape=[None, 28, 28, 1])
    y = tf.placeholder(tf.float32, shape=[None, 10])

    if FLAGS.attack_type == 'fgsm':
        fgsm = FastGradientMethod(model)
        fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}
        adv_x = fgsm.generate(x_image, **fgsm_params)
    elif FLAGS.attack_type == 'bim':
        bim = BasicIterativeMethod(model)
        bim_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.,
                      'nb_iter': 50,
                      'eps_iter': .01}
        adv_x = bim.generate(x_image, **bim_params)
    else:
        raise ValueError(FLAGS.attack_type)
    preds_adv = model.get_probs(adv_x)

    saver = tf.train.Saver()

    with tf.Session() as sess:
        # Restore the checkpoint
        saver.restore(sess, checkpoint)

        # Evaluate the accuracy of the MNIST model on adversarial examples
        eval_par = {'batch_size': FLAGS.batch_size}
        t1 = time.time()
        acc = model_eval(
            sess, x_image, y, preds_adv, X_test, Y_test, args=eval_par)
        t2 = time.time()
        print("Took", t2 - t1, "seconds")
        print('Test accuracy on adversarial examples: %0.4f\n' % acc)
示例#8
0
def main(argv):
    checkpoint = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)

    if checkpoint is None:
        raise ValueError("Couldn't load checkpoint")

    train_start = 0
    train_end = 60000
    test_start = 0
    test_end = 10000
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    assert Y_train.shape[1] == 10

    # NOTE: for compatibility with Madry Lab downloadable checkpoints,
    # we cannot enclose this in a scope or do anything else that would
    # change the automatic naming of the variables.
    model = MadryMNIST()

    x_input = tf.placeholder(tf.float32, shape=[None, 784])
    x_image = tf.placeholder(tf.float32, shape=[None, 28, 28, 1])
    y = tf.placeholder(tf.float32, shape=[None, 10])

    if FLAGS.attack_type == 'fgsm':
        fgsm = FastGradientMethod(model)
        fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}
        adv_x = fgsm.generate(x_image, **fgsm_params)
    elif FLAGS.attack_type == 'bim':
        bim = BasicIterativeMethod(model)
        bim_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.,
                      'nb_iter': 50,
                      'eps_iter': .01}
        adv_x = bim.generate(x_image, **bim_params)
    else:
        raise ValueError(FLAGS.attack_type)
    preds_adv = model.get_probs(adv_x)

    saver = tf.train.Saver()

    with tf.Session() as sess:
        # Restore the checkpoint
        saver.restore(sess, checkpoint)

        # Evaluate the accuracy of the MNIST model on adversarial examples
        eval_par = {'batch_size': FLAGS.batch_size}
        t1 = time.time()
        acc = model_eval(
            sess, x_image, y, preds_adv, X_test, Y_test, args=eval_par)
        t2 = time.time()
        print("Took", t2 - t1, "seconds")
        print('Test accuracy on adversarial examples: %0.4f\n' % acc)
示例#9
0
def bim_attack(train_data, model, sess):
    wrap = KerasModelWrapper(model)
    bim = BasicIterativeMethod(wrap, sess=sess)
    bim_params = {
        'eps_iter': 0.01,
        'nb_iter': 10,
        'clip_min': 0.,
        'clip_max': 1.
    }
    adv_x = bim.generate_np(train_data, **bim_params)
    return adv_x
def generate_bim_examples(sess, model, x, y, X, Y, attack_params, verbose, attack_log_fpath):
    """
    Untargeted attack. Y is not needed.
    """
    bim = BasicIterativeMethod(model, back='tf', sess=sess)
    bim_params = {'eps': 0.1, 'eps_iter':0.05, 'nb_iter':10, 'y':y,   #y is y for untargeted, y is Y for targeted
                     'ord':np.inf, 'clip_min':0, 'clip_max':1}
    bim_params = override_params(bim_params, attack_params)

    X_adv = bim.generate_np(X, **bim_params)
    return X_adv
示例#11
0
文件: util.py 项目: charmse/cse478
def bim_attack(train_data,model,sess):

    adv_x = []
    wrap = KerasModelWrapper(model)
    bim = BasicIterativeMethod(wrap, sess=sess)
    bim_params = {'eps_iter': 0.01,
              'nb_iter': 10,
              'clip_min': 0.,
              'clip_max': 1.}
    for i in range(train_data.shape[0]//100):
        if i == 0:
            adv_x = bim.generate_np(train_data[i*100:(i+1)*100], **bim_params)
        else:
            adv_x = np.concatenate((adv_x,bim.generate_np(train_data[i*100:(i+1)*100], **bim_params)))
    return adv_x
示例#12
0
    def setUp(self):
        super(TestBasicIterativeMethod, self).setUp()
        import tensorflow as tf

        # The world's simplest neural network
        def my_model(x):
            W1 = tf.constant([[1.5, .3], [-2, 0.3]], dtype=tf.float32)
            h1 = tf.nn.sigmoid(tf.matmul(x, W1))
            W2 = tf.constant([[-2.4, 1.2], [0.5, -2.3]], dtype=tf.float32)
            res = tf.nn.softmax(tf.matmul(h1, W2))
            return res

        self.sess = tf.Session()
        self.model = my_model
        self.attack = BasicIterativeMethod(self.model, sess=self.sess)
示例#13
0
class BIMAttack(AdversarialAttack):
    def __init__(self,
                 model,
                 step_size_iter=0.05,
                 max_perturbation=0.3,
                 n_iterations=10,
                 targeted=False,
                 norm_order=np.inf,
                 rand_init=None,
                 rand_minmax=0.3,
                 clip_min=None,
                 clip_max=None,
                 sanity_checks=True):
        super().__init__(model=model, clip_min=clip_min, clip_max=clip_max)
        self._targeted = targeted
        self._step_size_iter = step_size_iter
        self._max_perturbation = max_perturbation
        self._n_iterations = n_iterations
        self._norm_order = norm_order
        self._rand_init = rand_init
        self._rand_minmax = rand_minmax
        self._sanity_checks = sanity_checks

        with self.graph.as_default():
            self._method = BasicIterativeMethod(
                self._model,
                sess=self.session,
                eps=self._max_perturbation,
                eps_iter=self._step_size_iter,
                nb_iter=self._n_iterations,
                ord=self._norm_order,
                clip_min=self._clip_min,
                clip_max=self._clip_max,
                rand_init=self._rand_init,
                sanity_checks=self._sanity_checks)

    def attack_method(self, labels):
        if labels is not None:
            if self._targeted:
                return self._method.generate(x=self._x_clean,
                                             y_target=labels,
                                             rand_minmax=self._rand_minmax)
            else:
                return self._method.generate(x=self._x_clean,
                                             y=labels,
                                             rand_minmax=self._rand_minmax)
        return self._method.generate(x=self._x_clean,
                                     rand_minmax=self._rand_minmax)
def get_BIM_samples(loaded_model, samples, nb_iter):

    sess = backend.get_session()
    wrap = KerasModelWrapper(loaded_model)

    bim = BasicIterativeMethod(wrap, sess=sess)
    bim_params = {
        'eps_iter': 0.05,
        'nb_iter': nb_iter,
        'clip_min': 0.,
        'clip_max': 1.
    }

    adv_x = bim.generate_np(samples, **bim_params)

    return adv_x
示例#15
0
def generate_bim_examples(sess, model, x, y, X, Y, attack_params, verbose,
                          attack_log_fpath):
    """
    Untargeted attack. Y is not needed.
    """
    bim = BasicIterativeMethod(KerasModelWrapper(model), back='tf', sess=sess)
    bim_params = {
        'eps': 0.1,
        'eps_iter': 0.05,
        'nb_iter': 10,
        'clip_min': 0,
        'clip_max': 1
    }

    X_adv = bim.generate_np(X, **bim_params)
    return X_adv
示例#16
0
def backtracking(sess, x, model, x_test, params, batch_size=128):
    from cleverhans.attacks import BasicIterativeMethod
    method = BasicIterativeMethod(model, sess=sess)

    adv_x = method.generate(x, **params)
    num_batch = x_test.shape[0] // batch_size
    adv_imgs = []
    for i in range(num_batch):
        if i + 1 == num_batch:
            x_feed = x_test[i*batch_size:]
        else:
            x_feed = x_test[i*batch_size:(i+1)*batch_size]
        adv_img = sess.run(adv_x, feed_dict={x: x_feed})
        adv_imgs.append(adv_img)

    adv_imgs = np.concatenate(adv_imgs, axis=0)
    return adv_imgs
示例#17
0
def evaluate_checkpoint(filename):
    if attack_method == 'BIM':
        bim = BasicIterativeMethod(model)
        bim_params = {
            'eps': 0.3,
            'clip_min': 0.,
            'clip_max': 1.,
            'nb_iter': 50,
            'eps_iter': .01
        }
        adv_x = bim.generate(x_image, **bim_params)
    elif attack_method == 'FGM':
        FGM_attack = FastGradientMethod(model)
        FGM_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}
        adv_x = FGM_attack.generate(x_image, **FGM_params)
    elif attack_method == 'PGD':
        pgd = ProjectedGradientDescent(model)
        pgd_params = {
            'eps': 0.09,
            'clip_min': 0.,
            'clip_max': 1.,
            'nb_iter': 40,
            'eps_iter': .01
        }
        adv_x = pgd.generate(x_image, **pgd_params)
    preds_adv = model.get_probs(adv_x)

    with tf.Session() as sess:
        # Restore the checkpoint
        saver = tf.train.Saver(var_list=model.all_variables)
        saver.restore(sess, filename)

        eval_par = {'batch_size': batch_size}
        t1 = time.time()
        acc = model_eval(sess,
                         x_image,
                         y,
                         preds_adv,
                         X_test,
                         Y_test,
                         args=eval_par)
        t2 = time.time()
        print("Took", t2 - t1, "seconds")
        print('Test accuracy on adversarial examples: %0.4f\n' % acc)
    def adversarial_training(epsilon=0.3,
                             eps_iter=0.05,
                             nb_iter=10,
                             order=np.inf):
        bim2 = BasicIterativeMethod(wrap_2, sess=sess)
        preds_2_adv = model_2(bim2.generate(x, **fgsm_params))

        def evaluate_2():
            # Accuracy of adversarially trained model on legitimate test inputs
            eval_params = {'batch_size': batch_size}
            accuracy = model_eval(sess,
                                  x,
                                  y,
                                  preds_2,
                                  X_test,
                                  Y_test,
                                  args=eval_params)
            print('Test accuracy on legitimate examples: %0.4f' % accuracy)
            report.adv_train_clean_eval = accuracy

            # Accuracy of the adversarially trained model on adversarial examples
            accuracy = model_eval(sess,
                                  x,
                                  y,
                                  preds_2_adv,
                                  X_test,
                                  Y_test,
                                  args=eval_params)
            print('Test accuracy on adversarial examples: %0.4f' % accuracy)
            report.adv_train_adv_eval = accuracy

        # Perform and evaluate adversarial training
        model_train(sess,
                    x,
                    y,
                    preds_2,
                    X_train,
                    Y_train,
                    predictions_adv=preds_2_adv,
                    evaluate=evaluate_2,
                    args=train_params,
                    save=False,
                    rng=rng)
示例#19
0
    def _BIM(self):
        bim_attack = BasicIterativeMethod(self.wrapped_model, sess=self.sess)
        eps = 0

        if self.dataset == 'MNIST':
            for _ in range(5):
                eps = eps + 0.1
                params = {
                    'eps': eps,
                    'eps_iter': eps / 10,
                    'nb_iter': 10,
                    'y': self.y,
                    'clip_min': 0.,
                    'clip_max': 1.
                }
                adv_x = bim_attack.generate(self.x, **params)
                adv_x = tf.stop_gradient(adv_x)

                print(f'Epsilon: {eps}')
                self.out_file.write(f'Epsilon: {eps}\n')
                self.save_images(adv_x, self.save_loc + f'_e{eps}')

        if self.dataset == 'CIFAR10':
            for _ in range(10):
                eps = eps + 1
                params = {
                    'eps': eps / 255,
                    'eps_iter': eps / 255 / 10,
                    'nb_iter': 10,
                    'y': self.y,
                    'clip_min': 0.,
                    'clip_max': 1.
                }
                adv_x = bim_attack.generate(self.x, **params)
                adv_x = tf.stop_gradient(adv_x)

                print(f'Epsilon: {eps}')
                self.out_file.write(f'Epsilon: {eps}\n')
                self.save_images(adv_x, self.save_loc + f'_e{eps}')
示例#20
0
def get_adversarial_attack_and_params(attack_name, wrap, sess):
    params = None
    stop_gradient = False

    if attack_name == "fgsm":
        attack = FastGradientMethod(wrap, sess=sess)
        params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}
        stop_gradient = True
    if attack_name == "deepfool":
        attack = DeepFool(wrap, sess=sess)
    if attack_name == "lbfgs":
        attack = LBFGS(wrap, sess=sess)
    if attack_name == "saliency":
        attack = SaliencyMapMethod(wrap, sess=sess)
    if attack_name == "bim":
        attack = BasicIterativeMethod(wrap, sess=sess)

    return attack, params, stop_gradient
示例#21
0
    def next_images():
        tf.logging.set_verbosity(tf.logging.INFO)
        graph_fgsm = tf.Graph()
        print("{} generator graph is ready!".format(mode))
        with graph_fgsm.as_default():
            x_input = tf.placeholder(tf.float32, shape=batch_shape)

            model = InceptionModelProb(importer.num_classes, x_input)
            params = {'eps': eps}
            if mode == 'fgsm':
                graph = FastGradientMethod(model)
            elif mode == 'ifgsm':
                params['nb_iter'] = 10
                graph = BasicIterativeMethod(model)
            else:
                raise Exception("Not supported mode")

            print('graph params: {}'.format(params))
            x_adv = graph.generate(x_input, **params)

            saver = tf.train.Saver(slim.get_model_variables())
            session_creator = tf.train.ChiefSessionCreator(
                scaffold=tf.train.Scaffold(saver=saver),
                checkpoint_filename_with_path=importer.checkpoint_path,
                master=importer.tensorflow_master)

            image_iterator = importer.load_images_generator(batch_shape)
            with tf.train.MonitoredSession(
                    session_creator=session_creator) as sess:
                while True:
                    filenames, images = next(image_iterator, (None, None))
                    if filenames is None: break
                    adversarial_images = sess.run(x_adv,
                                                  feed_dict={x_input: images})
                    #print("Image:{}, diff:{}".format(filenames[0],np.sum(np.abs(images[0]-adversarial_images[0]))))
                    if is_return_orig_images:
                        yield filenames, adversarial_images, images
                    else:
                        yield filenames, adversarial_images
示例#22
0
def mnist_tutorial(train_start=0,
                   train_end=60000,
                   test_start=0,
                   test_end=10000,
                   nb_epochs=6,
                   batch_size=128,
                   epsilon=0.3,
                   learning_rate=0.001,
                   train_dir="/tmp",
                   filename="mnist.ckpt",
                   load_model=False,
                   testing=False):
    """
    MNIST CleverHans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param train_dir: Directory storing the saved model
    :param filename: Filename to save model under
    :param load_model: True for load, False for not load
    :param testing: if true, test error is calculated
    :return: an AccuracyReport object
    """
    keras.layers.core.K.set_learning_phase(0)

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    if not hasattr(backend, "tf"):
        raise RuntimeError("This tutorial requires keras to be configured"
                           " to use the TensorFlow backend.")

    if keras.backend.image_dim_ordering() != 'tf':
        keras.backend.set_image_dim_ordering('tf')
        print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to "
              "'th', temporarily setting to 'tf'")

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    keras.backend.set_session(sess)

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Use label smoothing
    assert Y_train.shape[1] == 10
    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Define TF model graph
    model = cnn_model_BIM()
    preds = model(x)
    print("Defined TensorFlow model graph.")

    def evaluate():
        # Evaluate the accuracy of the MNIST model on legitimate test examples
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params)
        report.clean_train_clean_eval = acc
        assert X_test.shape[0] == test_end - test_start, X_test.shape
        print('Test accuracy on legitimate examples: %0.4f' % acc)

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': train_dir,
        'filename': filename
    }
    ckpt = tf.train.get_checkpoint_state(train_dir)
    ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path

    rng = np.random.RandomState([2017, 8, 30])
    if load_model and ckpt_path:
        saver = tf.train.Saver()
        saver.restore(sess, ckpt_path)
        print("Model loaded from: {}".format(ckpt_path))
        evaluate()
    else:
        print("Model was not loaded, training from scratch.")
        model_train(sess,
                    x,
                    y,
                    preds,
                    X_train,
                    Y_train,
                    evaluate=evaluate,
                    args=train_params,
                    save=False,
                    rng=rng)

    # Calculate training error
    if testing:
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds, X_train, Y_train, args=eval_params)
        report.train_clean_train_clean_eval = acc

    # Initialize the Fast Gradient Sign Method (FGSM) attack object and graph
    wrap = KerasModelWrapper(model)

    print("FastGradientMethod")
    fgsm1 = FastGradientMethod(wrap, sess=sess)
    for epsilon in [0.005, 0.01, 0.05, 0.1, 0.5, 1.0]:
        print("Epsilon =", epsilon),
        fgsm_params = {'eps': epsilon, 'clip_min': None, 'clip_max': None}
        adv_x = fgsm1.generate(x, **fgsm_params)
        # Consider the attack to be constant
        adv_x = tf.stop_gradient(adv_x)
        preds_adv = model(adv_x)

        # Evaluate the accuracy of the MNIST model on adversarial examples
        eval_par = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Test accuracy on adversarial examples: %0.4f\n' % acc)
        report.clean_train_adv_eval = acc

    print("BasicIterativeMethod")
    bim = BasicIterativeMethod(wrap, sess=sess)
    for epsilon, order in zip(
        [0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 0.5, 1.0],
        [np.inf, np.inf, np.inf, np.inf, np.inf, np.inf, 2, 2]):
        print("Epsilon =", epsilon),
        fgsm_params = {
            'eps': epsilon,
            'clip_min': 0.,
            'clip_max': 1.,
            'ord': order
        }
        adv_x = bim.generate(x, **fgsm_params)
        # Consider the attack to be constant
        adv_x = tf.stop_gradient(adv_x)
        preds_adv = model(adv_x)

        # Evaluate the accuracy of the MNIST model on adversarial examples
        eval_par = {'batch_size': batch_size}
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Test accuracy on adversarial examples: %0.4f\n' % acc)
        report.clean_train_adv_eval = acc

    # Calculating train error
    if testing:
        eval_par = {'batch_size': batch_size}
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv,
                         X_train,
                         Y_train,
                         args=eval_par)
        report.train_clean_train_adv_eval = acc
    return

    print("Repeating the process, using adversarial training")
    # Redefine TF model graph
    model_2 = cnn_model()
    preds_2 = model_2(x)
    wrap_2 = KerasModelWrapper(model_2)
    #fgsm2 = FastGradientMethod(wrap_2, sess=sess)
    bim2 = BasicIterativeMethod(wrap_2, sess=sess)
    preds_2_adv = model_2(bim2.generate(x, **fgsm_params))

    def evaluate_2():
        # Accuracy of adversarially trained model on legitimate test inputs
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2,
                              X_test,
                              Y_test,
                              args=eval_params)
        print('Test accuracy on legitimate examples: %0.4f' % accuracy)
        report.adv_train_clean_eval = accuracy

        # Accuracy of the adversarially trained model on adversarial examples
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2_adv,
                              X_test,
                              Y_test,
                              args=eval_params)
        print('Test accuracy on adversarial examples: %0.4f' % accuracy)
        report.adv_train_adv_eval = accuracy

    # Perform and evaluate adversarial training
    model_train(sess,
                x,
                y,
                preds_2,
                X_train,
                Y_train,
                predictions_adv=preds_2_adv,
                evaluate=evaluate_2,
                args=train_params,
                save=False,
                rng=rng)

    # Calculate training errors
    if testing:
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2,
                              X_train,
                              Y_train,
                              args=eval_params)
        report.train_adv_train_clean_eval = accuracy
        accuracy = model_eval(sess,
                              x,
                              y,
                              preds_2_adv,
                              X_train,
                              Y_train,
                              args=eval_params)
        report.train_adv_train_adv_eval = accuracy

    return report
示例#23
0
def JSMA_FGSM_BIM(train_start=0,
                  train_end=60000,
                  test_start=0,
                  test_end=10000,
                  nb_epochs=6,
                  batch_size=128,
                  learning_rate=0.001,
                  clean_train=True,
                  testing=False,
                  backprop_through_attack=False,
                  nb_filters=64):
    """
    MNIST cleverhans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param clean_train: perform normal training on clean examples only
                        before performing adversarial training.
    :param testing: if true, complete an AccuracyReport for unit tests
                    to verify that performance is adequate
    :param backprop_through_attack: If True, backprop through adversarial
                                    example construction process during
                                    adversarial training.
    :param clean_train: if true, train on clean examples
    :return: an AccuracyReport object
    """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    sess = tf.Session()

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)
    source_samples = batch_size
    # Use label smoothing
    # Hopefully this doesn't screw up JSMA...
    assert Y_train.shape[1] == 10
    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    model_path = "models/mnist"
    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    eval_par = {'batch_size': batch_size}
    rng = np.random.RandomState([2017, 8, 30])

    if clean_train:
        model = make_basic_cnn(nb_filters=nb_filters)
        preds = model.get_probs(x)

        def evaluate():
            # Evaluate the accuracy of the MNIST model on legitimate test
            # examples
            eval_params = {'batch_size': batch_size}
            acc = model_eval(sess,
                             x,
                             y,
                             preds,
                             X_test,
                             Y_test,
                             args=eval_params)
            report.clean_train_clean_eval = acc
            assert X_test.shape[0] == test_end - test_start, X_test.shape
            print('Test accuracy on legitimate examples: %0.4f' % acc)

        model_train(sess,
                    x,
                    y,
                    preds,
                    X_train,
                    Y_train,
                    evaluate=evaluate,
                    args=train_params,
                    rng=rng)
        print("#####Starting attacks on clean model#####")
        #################################################################
        #Clean test against JSMA
        jsma_params = {
            'theta': 1.,
            'gamma': 0.1,
            'clip_min': 0.,
            'clip_max': 1.,
            'y_target': None
        }

        jsma = SaliencyMapMethod(model, back='tf', sess=sess)
        adv_x = jsma.generate(x, **jsma_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Clean test accuracy on JSMA adversarial examples: %0.4f' % acc)
        ################################################################
        #Clean test against FGSM
        fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}

        fgsm = FastGradientMethod(model, sess=sess)
        adv_x = fgsm.generate(x, **fgsm_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Clean test accuracy on FGSM adversarial examples: %0.4f' % acc)
        ################################################################
        #Clean test against BIM
        bim_params = {
            'eps': 0.3,
            'eps_iter': 0.01,
            'nb_iter': 100,
            'clip_min': 0.,
            'clip_max': 1.
        }
        bim = BasicIterativeMethod(model, sess=sess)
        adv_x = bim.generate(x, **bim_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Clean test accuracy on BIM adversarial examples: %0.4f' % acc)
        ################################################################
        #Clean test against EN
        en_params = {
            'binary_search_steps': 1,
            #'y': None,
            'max_iterations': 100,
            'learning_rate': 0.1,
            'batch_size': source_samples,
            'initial_const': 10
        }
        en = ElasticNetMethod(model, back='tf', sess=sess)
        adv_x = en.generate(x, **en_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Clean test accuracy on EN adversarial examples: %0.4f' % acc)
        ################################################################
        #Clean test against DF
        deepfool_params = {
            'nb_candidate': 10,
            'overshoot': 0.02,
            'max_iter': 50,
            'clip_min': 0.,
            'clip_max': 1.
        }
        deepfool = DeepFool(model, sess=sess)
        adv_x = deepfool.generate(x, **deepfool_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Clean test accuracy on DF adversarial examples: %0.4f' % acc)
        ################################################################
        #Clean test against VAT
        vat_params = {
            'eps': 2.0,
            'num_iterations': 1,
            'xi': 1e-6,
            'clip_min': 0.,
            'clip_max': 1.
        }
        vat = VirtualAdversarialMethod(model, sess=sess)
        adv_x = vat.generate(x, **vat_params)
        preds_adv = model.get_probs(adv_x)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess, x, y, preds_adv, X_test, Y_test, args=eval_par)
        print('Clean test accuracy on VAT adversarial examples: %0.4f\n' % acc)
        ################################################################
        print("Repeating the process, using adversarial training\n")
    # Redefine TF model graph
    model_2 = make_basic_cnn(nb_filters=nb_filters)
    preds_2 = model_2(x)
    #################################################################
    #Adversarial test against JSMA
    jsma_params = {
        'theta': 1.,
        'gamma': 0.1,
        'clip_min': 0.,
        'clip_max': 1.,
        'y_target': None
    }

    jsma = SaliencyMapMethod(model, back='tf', sess=sess)
    adv_x = jsma.generate(x, **jsma_params)
    preds_adv_jsma = model.get_probs(adv_x)
    ################################################################
    #Adversarial test against FGSM
    fgsm_params = {'eps': 0.3, 'clip_min': 0., 'clip_max': 1.}

    fgsm = FastGradientMethod(model, sess=sess)
    adv_x = fgsm.generate(x, **fgsm_params)
    preds_adv_fgsm = model.get_probs(adv_x)
    ################################################################
    #Adversarial test against BIM
    bim_params = {
        'eps': 0.3,
        'eps_iter': 0.01,
        'nb_iter': 100,
        'clip_min': 0.,
        'clip_max': 1.
    }
    bim = BasicIterativeMethod(model, sess=sess)
    adv_x = bim.generate(x, **bim_params)
    preds_adv_bim = model.get_probs(adv_x)
    ################################################################
    #Adversarial test against EN
    en_params = {
        'binary_search_steps': 5,
        #'y': None,
        'max_iterations': 100,
        'learning_rate': 0.1,
        'batch_size': source_samples,
        'initial_const': 10
    }
    en = ElasticNetMethod(model, back='tf', sess=sess)
    adv_x = en.generate(x, **en_params)
    preds_adv_en = model.get_probs(adv_x)
    ################################################################
    #Adversarial test against DF
    deepfool_params = {
        'nb_candidate': 10,
        'overshoot': 0.02,
        'max_iter': 200,
        'clip_min': 0.,
        'clip_max': 1.
    }
    deepfool = DeepFool(model, sess=sess)
    adv_x = deepfool.generate(x, **deepfool_params)
    preds_adv_df = model.get_probs(adv_x)
    ################################################################
    #Adversarial test against VAT
    vat_params = {
        'eps': 2.0,
        'num_iterations': 1,
        'xi': 1e-6,
        'clip_min': 0.,
        'clip_max': 1.
    }
    vat = VirtualAdversarialMethod(model, sess=sess)
    adv_x = vat.generate(x, **vat_params)
    preds_adv_vat = model.get_probs(adv_x)
    ################################################################
    print("#####Evaluate trained model#####")

    def evaluate_2():
        # Evaluate the accuracy of the MNIST model on JSMA adversarial examples
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv_jsma,
                         X_test,
                         Y_test,
                         args=eval_par)
        print('Test accuracy on JSMA adversarial examples: %0.4f' % acc)

        # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv_fgsm,
                         X_test,
                         Y_test,
                         args=eval_par)
        print('Test accuracy on FGSM adversarial examples: %0.4f' % acc)

        # Evaluate the accuracy of the MNIST model on BIM adversarial examples
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv_bim,
                         X_test,
                         Y_test,
                         args=eval_par)
        print('Test accuracy on BIM adversarial examples: %0.4f' % acc)

        # Evaluate the accuracy of the MNIST model on EN adversarial examples
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv_en,
                         X_test,
                         Y_test,
                         args=eval_par)
        print('Test accuracy on EN adversarial examples: %0.4f' % acc)

        # Evaluate the accuracy of the MNIST model on DF adversarial examples
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv_df,
                         X_test,
                         Y_test,
                         args=eval_par)
        print('Test accuracy on DF adversarial examples: %0.4f' % acc)

        # Evaluate the accuracy of the MNIST model on VAT adversarial examples
        acc = model_eval(sess,
                         x,
                         y,
                         preds_adv_vat,
                         X_test,
                         Y_test,
                         args=eval_par)
        print('Test accuracy on VAT adversarial examples: %0.4f\n' % acc)

    preds_2_adv = [
        preds_adv_jsma, preds_adv_fgsm, preds_adv_bim
        # ,preds_adv_en
        # ,preds_adv_df
    ]

    model_train(sess,
                x,
                y,
                preds_2,
                X_train,
                Y_train,
                predictions_adv=preds_2_adv,
                evaluate=evaluate_2,
                args=train_params,
                rng=rng)
示例#24
0
def attack_classifier(sess, x, model, x_test, attack_method="fgsm", target=None, batch_size=128):

    if attack_method == "fgsm":
        from cleverhans.attacks import FastGradientMethod
        params = {'eps': 8/255,
                  'clip_min': 0.,
                  'clip_max': 1.
                 }
        if target is not None:
            params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0))
        method = FastGradientMethod(model, sess=sess)

    elif attack_method == "basic_iterative":
        from cleverhans.attacks import BasicIterativeMethod
        params = {'eps': 8./255,
                  'eps_iter': 1./255,
                  'nb_iter': 10,
                  'clip_min': 0.,
                  'clip_max': 1.,
                  'ord': np.inf
                 }
        if target is not None:
            params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0))
        method = BasicIterativeMethod(model,sess = sess)

    elif attack_method == "momentum_iterative":
        from cleverhans.attacks import MomentumIterativeMethod
        params = {'eps':8/255,
                  'eps_iter':1/255,
                  'nb_iter': 10,
                  'clip_min': 0.,
                  'clip_max': 1.
                 }
        if target is not None:
            params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0))
        method = MomentumIterativeMethod(model,sess = sess)

    elif attack_method == "saliency":
        from cleverhans.attacks import SaliencyMapMethod
        params = {'theta':8/255,
                  'gamma':0.1,
                  'clip_min': 0.,
                  'clip_max': 1.
                 }
        assert target is None
        method = SaliencyMapMethod(model,sess = sess)

    elif attack_method == "virtual":
        from cleverhans.attacks import VirtualAdversarialMethod
        params = {'eps':8/255,
                  'num_iterations':10,
                  'xi' :1e-6,
                  'clip_min': 0.,
                  'clip_max': 1.
                 }
        assert target is None
        method = VirtualAdversarialMethod(model,sess = sess)

    elif attack_method == "cw":
        from cleverhans.attacks import CarliniWagnerL2
        params = {
            "confidence":0,
            "batch_size":128,
            "learning_rate":1e-4,
            "binary_search_steps":10,
            "max_iterations":1000,
            "abort_early": True,
            "initial_const":1e-2,
            "clip_min":0,
            "clip_max":1
        }
        if target is not None:
            params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0))
        method = CarliniWagnerL2(model,sess = sess)

    elif attack_method == "elastic_net":
        from cleverhans.attacks import ElasticNetMethod
        params = {
            "fista": "FISTA",
            "beta": 0.1,
            "decision_rule":"EN",
            "confidence":0,
            "batch_size":128,
            "learning_rate":1e-4,
            "binary_search_steps":10,
            "max_iterations":1000,
            "abort_early": True,
            "initial_const":1e-2,
            "clip_min":0,
            "clip_max":1
        }
        if target is not None:
            params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0))
        method = ElasticNetMethod(model,sess = sess)

    elif attack_method == "deepfool":
        from cleverhans.attacks import DeepFool
        params = {
            "nb_candidate":10,
            "overshoot":1e-3,
            "max_iter":100,
            "nb_classes":10,
            "clip_min":0,
            "clip_max":1
        }
        assert target is None
        method = DeepFool(model,sess = sess)

    elif attack_method == "lbfgs":
        from cleverhans.attacks import LBFGS
        params = {
            'batch_size':128,
            "binary_search_steps":10,
            "max_iterations":1000,
            "initial_const":1e-2,
            'clip_min': 0.,
            'clip_max': 1.
        }
        assert target is not None
        params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0))
        method = LBFGS(model,sess = sess)

    elif attack_method == "madry":
        from cleverhans.attacks import MadryEtAl
        params = {'eps':8/255,
                  'eps_iter':1/255,
                  'nb_iter':10,
                  'ord':np.inf,
                  'clip_min': 0.,
                  'clip_max': 1.
                 }
        if target is not None:
            params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0))
        method = MadryEtAl(model, sess = sess)

    elif attack_method == "SPSA":
        from cleverhans.attacks import SPSA
        params = {
            'epsilon':1/255,
            'num_steps':10,
            'is_targeted':False,
            'early_stop_loss_threshold':None,
            'learning_rate':0.01,
            'delta':0.01,
            'batch_size':128,
            'spsa_iters':1,
            'is_debug':False
        }
        if target is not None:
            params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0))
            params["is_targeted"] = True
        method = SPSA(model, sess = sess)

    else:
        raise ValueError("Can not recognize this attack method: %s" % attack_method)

    adv_x = method.generate(x, **params)
    num_batch = x_test.shape[0] // batch_size
    adv_imgs = []
    for i in range(num_batch):
        x_feed = x_test[i*batch_size:(i+1)*batch_size]
        #y_feed = y_test[i*batch_size:(i+1)*batch_size]

        adv_img = sess.run(adv_x, feed_dict={x: x_feed})
        adv_imgs.append(adv_img)

    adv_imgs = np.concatenate(adv_imgs, axis=0)
    return adv_imgs
示例#25
0
    def setUp(self):
        super(TestBasicIterativeMethod, self).setUp()

        self.sess = tf.Session()
        self.model = SimpleModel()
        self.attack = BasicIterativeMethod(self.model, sess=self.sess)
def train(cifar10_data, epochs, L, learning_rate, scale3, Delta2, epsilon2,
          eps2_ratio, alpha, perturbFM, fgsm_eps, total_eps, logfile):
    logfile.write("fgsm_eps \t %g, LR \t %g, alpha \t %d , epsilon \t %d \n" %
                  (fgsm_eps, learning_rate, alpha, total_eps))
    """Train CIFAR-10 for a number of steps."""
    with tf.Graph().as_default():
        global_step = tf.Variable(0, trainable=False)

        eps_benign = 1 / (1 + eps2_ratio) * (epsilon2)
        eps_adv = eps2_ratio / (1 + eps2_ratio) * (epsilon2)

        # Parameters Declarification
        #with tf.variable_scope('conv1') as scope:
        kernel1 = _variable_with_weight_decay(
            'kernel1',
            shape=[4, 4, 3, 128],
            stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2),
            wd=0.0,
            collect=[AECODER_VARIABLES])
        biases1 = _bias_on_cpu('biases1', [128],
                               tf.constant_initializer(0.0),
                               collect=[AECODER_VARIABLES])

        shape = kernel1.get_shape().as_list()
        w_t = tf.reshape(kernel1, [-1, shape[-1]])
        w = tf.transpose(w_t)
        sing_vals = tf.svd(w, compute_uv=False)
        sensitivity = tf.reduce_max(sing_vals)
        gamma = 2 * Delta2 / (L * sensitivity
                              )  #2*3*(14*14 + 2)*16/(L*sensitivity)

        #with tf.variable_scope('conv2') as scope:
        kernel2 = _variable_with_weight_decay(
            'kernel2',
            shape=[5, 5, 128, 128],
            stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2),
            wd=0.0,
            collect=[CONV_VARIABLES])
        biases2 = _bias_on_cpu('biases2', [128],
                               tf.constant_initializer(0.1),
                               collect=[CONV_VARIABLES])
        #with tf.variable_scope('conv3') as scope:
        kernel3 = _variable_with_weight_decay(
            'kernel3',
            shape=[5, 5, 256, 256],
            stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2),
            wd=0.0,
            collect=[CONV_VARIABLES])
        biases3 = _bias_on_cpu('biases3', [256],
                               tf.constant_initializer(0.1),
                               collect=[CONV_VARIABLES])
        #with tf.variable_scope('local4') as scope:
        kernel4 = _variable_with_weight_decay(
            'kernel4',
            shape=[int(image_size / 4)**2 * 256, hk],
            stddev=0.04,
            wd=0.004,
            collect=[CONV_VARIABLES])
        biases4 = _bias_on_cpu('biases4', [hk],
                               tf.constant_initializer(0.1),
                               collect=[CONV_VARIABLES])
        #with tf.variable_scope('local5') as scope:
        kernel5 = _variable_with_weight_decay(
            'kernel5', [hk, 10],
            stddev=np.sqrt(2.0 /
                           (int(image_size / 4)**2 * 256)) / math.ceil(5 / 2),
            wd=0.0,
            collect=[CONV_VARIABLES])
        biases5 = _bias_on_cpu('biases5', [10],
                               tf.constant_initializer(0.1),
                               collect=[CONV_VARIABLES])

        #scale2 = tf.Variable(tf.ones([hk]))
        #beta2 = tf.Variable(tf.zeros([hk]))

        params = [
            kernel1, biases1, kernel2, biases2, kernel3, biases3, kernel4,
            biases4, kernel5, biases5
        ]
        ########

        # Build a Graph that computes the logits predictions from the
        # inference model.
        FM_h = tf.placeholder(tf.float32, [None, 14, 14, 128])
        noise = tf.placeholder(tf.float32, [None, image_size, image_size, 3])
        adv_noise = tf.placeholder(tf.float32,
                                   [None, image_size, image_size, 3])

        x = tf.placeholder(tf.float32, [None, image_size, image_size, 3])
        adv_x = tf.placeholder(tf.float32, [None, image_size, image_size, 3])

        # Auto-Encoder #
        Enc_Layer2 = EncLayer(inpt=adv_x,
                              n_filter_in=3,
                              n_filter_out=128,
                              filter_size=3,
                              W=kernel1,
                              b=biases1,
                              activation=tf.nn.relu)
        pretrain_adv = Enc_Layer2.get_train_ops2(xShape=tf.shape(adv_x)[0],
                                                 Delta=Delta2,
                                                 epsilon=epsilon2,
                                                 batch_size=L,
                                                 learning_rate=learning_rate,
                                                 W=kernel1,
                                                 b=biases1,
                                                 perturbFMx=adv_noise,
                                                 perturbFM_h=FM_h)
        Enc_Layer3 = EncLayer(inpt=x,
                              n_filter_in=3,
                              n_filter_out=128,
                              filter_size=3,
                              W=kernel1,
                              b=biases1,
                              activation=tf.nn.relu)
        pretrain_benign = Enc_Layer3.get_train_ops2(
            xShape=tf.shape(x)[0],
            Delta=Delta2,
            epsilon=epsilon2,
            batch_size=L,
            learning_rate=learning_rate,
            W=kernel1,
            b=biases1,
            perturbFMx=noise,
            perturbFM_h=FM_h)
        cost = tf.reduce_sum((Enc_Layer2.cost + Enc_Layer3.cost) / 2.0)
        ###

        x_image = x + noise
        y_conv = inference(x_image, FM_h, params)
        softmax_y_conv = tf.nn.softmax(y_conv)
        y_ = tf.placeholder(tf.float32, [None, 10])

        adv_x += adv_noise
        y_adv_conv = inference(adv_x, FM_h, params)
        adv_y_ = tf.placeholder(tf.float32, [None, 10])

        # Calculate loss. Apply Taylor Expansion for the output layer
        perturbW = perturbFM * params[8]
        loss = cifar10.TaylorExp(y_conv, y_, y_adv_conv, adv_y_, L, alpha,
                                 perturbW)

        # Build a Graph that trains the model with one batch of examples and
        # updates the model parameters.
        #pretrain_step = tf.train.AdamOptimizer(1e-4).minimize(pretrain_adv, global_step=global_step, var_list=[kernel1, biases1]);
        pretrain_var_list = tf.get_collection(AECODER_VARIABLES)
        train_var_list = tf.get_collection(CONV_VARIABLES)
        #print(pretrain_var_list)
        #print(train_var_list)
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            pretrain_step = tf.train.AdamOptimizer(learning_rate).minimize(
                pretrain_adv + pretrain_benign,
                global_step=global_step,
                var_list=pretrain_var_list)
            train_op = cifar10.train(loss,
                                     global_step,
                                     learning_rate,
                                     _var_list=train_var_list)
        sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))

        sess.run(kernel1.initializer)
        dp_epsilon = 1.0
        _gamma = sess.run(gamma)
        _gamma_x = Delta2 / L
        epsilon2_update = epsilon2 / (1.0 + 1.0 / _gamma + 1 / _gamma_x)
        print(epsilon2_update / _gamma + epsilon2_update / _gamma_x)
        print(epsilon2_update)
        delta_r = fgsm_eps * (image_size**2)
        _sensitivityW = sess.run(sensitivity)
        delta_h = _sensitivityW * (14**2)
        #delta_h = 1.0 * delta_r; #sensitivity*(14**2) = sensitivity*(\beta**2) can also be used
        #dp_mult = (Delta2/(L*epsilon2))/(delta_r / dp_epsilon) + (2*Delta2/(L*epsilon2))/(delta_h / dp_epsilon)
        dp_mult = (Delta2 / (L * epsilon2_update)) / (delta_r / dp_epsilon) + (
            2 * Delta2 / (L * epsilon2_update)) / (delta_h / dp_epsilon)

        dynamic_eps = tf.placeholder(tf.float32)
        """y_test = inference(x, FM_h, params)
    softmax_y = tf.nn.softmax(y_test);
    c_x_adv = fgsm(x, softmax_y, eps=dynamic_eps/3, clip_min=-1.0, clip_max=1.0)
    x_adv = tf.reshape(c_x_adv, [L, image_size, image_size, 3])"""

        attack_switch = {
            'fgsm': True,
            'ifgsm': True,
            'deepfool': False,
            'mim': True,
            'spsa': False,
            'cwl2': False,
            'madry': True,
            'stm': False
        }

        ch_model_probs = CustomCallableModelWrapper(
            callable_fn=inference_test_input_probs,
            output_layer='probs',
            params=params,
            image_size=image_size,
            adv_noise=adv_noise)

        # define each attack method's tensor
        mu_alpha = tf.placeholder(tf.float32, [1])
        attack_tensor_dict = {}
        # FastGradientMethod
        if attack_switch['fgsm']:
            print('creating attack tensor of FastGradientMethod')
            fgsm_obj = FastGradientMethod(model=ch_model_probs, sess=sess)
            #x_adv_test_fgsm = fgsm_obj.generate(x=x, eps=fgsm_eps, clip_min=-1.0, clip_max=1.0, ord=2) # testing now
            x_adv_test_fgsm = fgsm_obj.generate(x=x,
                                                eps=mu_alpha,
                                                clip_min=-1.0,
                                                clip_max=1.0)  # testing now
            attack_tensor_dict['fgsm'] = x_adv_test_fgsm

        # Iterative FGSM (BasicIterativeMethod/ProjectedGradientMethod with no random init)
        # default: eps_iter=0.05, nb_iter=10
        if attack_switch['ifgsm']:
            print('creating attack tensor of BasicIterativeMethod')
            ifgsm_obj = BasicIterativeMethod(model=ch_model_probs, sess=sess)
            #x_adv_test_ifgsm = ifgsm_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, clip_min=-1.0, clip_max=1.0, ord=2)
            x_adv_test_ifgsm = ifgsm_obj.generate(x=x,
                                                  eps=mu_alpha,
                                                  eps_iter=fgsm_eps / 3,
                                                  nb_iter=3,
                                                  clip_min=-1.0,
                                                  clip_max=1.0)
            attack_tensor_dict['ifgsm'] = x_adv_test_ifgsm

        # MomentumIterativeMethod
        # default: eps_iter=0.06, nb_iter=10
        if attack_switch['mim']:
            print('creating attack tensor of MomentumIterativeMethod')
            mim_obj = MomentumIterativeMethod(model=ch_model_probs, sess=sess)
            #x_adv_test_mim = mim_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, decay_factor=1.0, clip_min=-1.0, clip_max=1.0, ord=2)
            x_adv_test_mim = mim_obj.generate(x=x,
                                              eps=mu_alpha,
                                              eps_iter=fgsm_eps / 3,
                                              nb_iter=3,
                                              decay_factor=1.0,
                                              clip_min=-1.0,
                                              clip_max=1.0)
            attack_tensor_dict['mim'] = x_adv_test_mim

        # MadryEtAl (Projected Grdient with random init, same as rand+fgsm)
        # default: eps_iter=0.01, nb_iter=40
        if attack_switch['madry']:
            print('creating attack tensor of MadryEtAl')
            madry_obj = MadryEtAl(model=ch_model_probs, sess=sess)
            #x_adv_test_madry = madry_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, clip_min=-1.0, clip_max=1.0, ord=2)
            x_adv_test_madry = madry_obj.generate(x=x,
                                                  eps=mu_alpha,
                                                  eps_iter=fgsm_eps / 3,
                                                  nb_iter=3,
                                                  clip_min=-1.0,
                                                  clip_max=1.0)
            attack_tensor_dict['madry'] = x_adv_test_madry

        #====================== attack =========================

        #adv_logits, _ = inference(c_x_adv + W_conv1Noise, perturbFM, params)

        # Create a saver.
        saver = tf.train.Saver(tf.all_variables())

        # Build an initialization operation to run below.
        init = tf.initialize_all_variables()
        sess.run(init)

        # Start the queue runners.
        #tf.train.start_queue_runners(sess=sess)

        summary_writer = tf.summary.FileWriter(os.getcwd() + dirCheckpoint,
                                               sess.graph)

        # load the most recent models
        _global_step = 0
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
        if ckpt and ckpt.model_checkpoint_path:
            print(ckpt.model_checkpoint_path)
            saver.restore(sess, ckpt.model_checkpoint_path)
            _global_step = int(
                ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1])
        else:
            print('No checkpoint file found')

        T = int(int(math.ceil(D / L)) * epochs + 1)  # number of steps
        step_for_epoch = int(math.ceil(D / L))
        #number of steps for one epoch

        perturbH_test = np.random.laplace(0.0, 0, 14 * 14 * 128)
        perturbH_test = np.reshape(perturbH_test, [-1, 14, 14, 128])

        #W_conv1Noise = np.random.laplace(0.0, Delta2/(L*epsilon2), 32 * 32 * 3).astype(np.float32)
        #W_conv1Noise = np.reshape(_W_conv1Noise, [32, 32, 3])

        perturbFM_h = np.random.laplace(0.0,
                                        2 * Delta2 / (epsilon2_update * L),
                                        14 * 14 * 128)
        perturbFM_h = np.reshape(perturbFM_h, [-1, 14, 14, 128])

        #_W_adv = np.random.laplace(0.0, 0, 32 * 32 * 3).astype(np.float32)
        #_W_adv = np.reshape(_W_adv, [32, 32, 3])
        #_perturbFM_h_adv = np.random.laplace(0.0, 0, 10*10*128)
        #_perturbFM_h_adv = np.reshape(_perturbFM_h_adv, [10, 10, 128]);

        test_size = len(cifar10_data.test.images)
        #beta = redistributeNoise(os.getcwd() + '/LRP_0_25_v12.txt')
        #BenignLNoise = generateIdLMNoise(image_size, Delta2, eps_benign, L) #generateNoise(image_size, Delta2, eps_benign, L, beta);
        #AdvLnoise = generateIdLMNoise(image_size, Delta2, eps_adv, L)
        Noise = generateIdLMNoise(image_size, Delta2, epsilon2_update, L)
        #generateNoise(image_size, Delta2, eps_adv, L, beta);
        Noise_test = generateIdLMNoise(
            image_size, 0, epsilon2_update,
            L)  #generateNoise(image_size, 0, 2*epsilon2, test_size, beta);

        emsemble_L = int(L / 3)
        preT_epochs = 100
        pre_T = int(int(math.ceil(D / L)) * preT_epochs + 1)
        """logfile.write("pretrain: \n")
    for step in range(_global_step, _global_step + pre_T):
        d_eps = random.random()*0.5;
        batch = cifar10_data.train.next_batch(L); #Get a random batch.
        adv_images = sess.run(x_adv, feed_dict = {x: batch[0], dynamic_eps: d_eps, FM_h: perturbH_test})
        for iter in range(0, 2):
            adv_images = sess.run(x_adv, feed_dict = {x: adv_images, dynamic_eps: d_eps, FM_h: perturbH_test})
        #sess.run(pretrain_step, feed_dict = {x: batch[0], noise: AdvLnoise, FM_h: perturbFM_h});
        batch = cifar10_data.train.next_batch(L);
        sess.run(pretrain_step, feed_dict = {x: np.append(batch[0], adv_images, axis = 0), noise: Noise, FM_h: perturbFM_h});
        if step % int(25*step_for_epoch) == 0:
            cost_value = sess.run(cost, feed_dict={x: cifar10_data.test.images, noise: Noise_test, FM_h: perturbH_test})/(test_size*128)
            logfile.write("step \t %d \t %g \n"%(step, cost_value))
            print(cost_value)
    print('pre_train finished')"""

        _global_step = 0
        for step in xrange(_global_step, _global_step + T):
            start_time = time.time()
            d_eps = random.random() * 0.5
            batch = cifar10_data.train.next_batch(emsemble_L)
            #Get a random batch.
            y_adv_batch = batch[1]
            """adv_images = sess.run(x_adv, feed_dict = {x: batch[0], dynamic_eps: d_eps, FM_h: perturbH_test})
      for iter in range(0, 2):
          adv_images = sess.run(x_adv, feed_dict = {x: adv_images, dynamic_eps: d_eps, FM_h: perturbH_test})"""
            adv_images_ifgsm = sess.run(attack_tensor_dict['ifgsm'],
                                        feed_dict={
                                            x: batch[0],
                                            adv_noise: Noise,
                                            mu_alpha: [d_eps]
                                        })
            batch = cifar10_data.train.next_batch(emsemble_L)
            y_adv_batch = np.append(y_adv_batch, batch[1], axis=0)
            adv_images_mim = sess.run(attack_tensor_dict['mim'],
                                      feed_dict={
                                          x: batch[0],
                                          adv_noise: Noise,
                                          mu_alpha: [d_eps]
                                      })
            batch = cifar10_data.train.next_batch(emsemble_L)
            y_adv_batch = np.append(y_adv_batch, batch[1], axis=0)
            adv_images_madry = sess.run(attack_tensor_dict['madry'],
                                        feed_dict={
                                            x: batch[0],
                                            adv_noise: Noise,
                                            mu_alpha: [d_eps]
                                        })
            adv_images = np.append(np.append(adv_images_ifgsm,
                                             adv_images_mim,
                                             axis=0),
                                   adv_images_madry,
                                   axis=0)

            batch = cifar10_data.train.next_batch(L)
            #Get a random batch.

            sess.run(pretrain_step,
                     feed_dict={
                         x: batch[0],
                         adv_x: adv_images,
                         adv_noise: Noise_test,
                         noise: Noise,
                         FM_h: perturbFM_h
                     })
            _, loss_value = sess.run(
                [train_op, loss],
                feed_dict={
                    x: batch[0],
                    y_: batch[1],
                    adv_x: adv_images,
                    adv_y_: y_adv_batch,
                    noise: Noise,
                    adv_noise: Noise_test,
                    FM_h: perturbFM_h
                })
            duration = time.time() - start_time

            assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

            # report the result periodically
            if step % (50 * step_for_epoch) == 0 and step >= (300 *
                                                              step_for_epoch):
                '''predictions_form_argmax = np.zeros([test_size, 10])
          softmax_predictions = sess.run(softmax_y_conv, feed_dict={x: cifar10_data.test.images, noise: Noise_test, FM_h: perturbH_test})
          argmax_predictions = np.argmax(softmax_predictions, axis=1)
          """for n_draws in range(0, 2000):
            _BenignLNoise = generateIdLMNoise(image_size, Delta2, epsilon2, L)
            _perturbFM_h = np.random.laplace(0.0, 2*Delta2/(epsilon2*L), 14*14*128)
            _perturbFM_h = np.reshape(_perturbFM_h, [-1, 14, 14, 128]);"""
          for j in range(test_size):
            pred = argmax_predictions[j]
            predictions_form_argmax[j, pred] += 2000;
          """softmax_predictions = sess.run(softmax_y_conv, feed_dict={x: cifar10_data.test.images, noise: _BenignLNoise, FM_h: _perturbFM_h})
            argmax_predictions = np.argmax(softmax_predictions, axis=1)"""
          final_predictions = predictions_form_argmax;
          is_correct = []
          is_robust = []
          for j in range(test_size):
              is_correct.append(np.argmax(cifar10_data.test.labels[j]) == np.argmax(final_predictions[j]))
              robustness_from_argmax = robustness.robustness_size_argmax(counts=predictions_form_argmax[j],eta=0.05,dp_attack_size=fgsm_eps, dp_epsilon=1.0, dp_delta=0.05, dp_mechanism='laplace') / dp_mult
              is_robust.append(robustness_from_argmax >= fgsm_eps)
          acc = np.sum(is_correct)*1.0/test_size
          robust_acc = np.sum([a and b for a,b in zip(is_robust, is_correct)])*1.0/np.sum(is_robust)
          robust_utility = np.sum(is_robust)*1.0/test_size
          log_str = "step: {:.1f}\t epsilon: {:.1f}\t benign: {:.4f} \t {:.4f} \t {:.4f} \t {:.4f} \t".format(step, total_eps, acc, robust_acc, robust_utility, robust_acc*robust_utility)'''

                #===================adv samples=====================
                log_str = "step: {:.1f}\t epsilon: {:.1f}\t".format(
                    step, total_eps)
                """adv_images_dict = {}
          for atk in attack_switch.keys():
              if attack_switch[atk]:
                  adv_images_dict[atk] = sess.run(attack_tensor_dict[atk], feed_dict ={x:cifar10_data.test.images})
          print("Done with the generating of Adversarial samples")"""
                #===================adv samples=====================
                adv_acc_dict = {}
                robust_adv_acc_dict = {}
                robust_adv_utility_dict = {}
                test_bach_size = 5000
                for atk in attack_switch.keys():
                    print(atk)
                    if atk not in adv_acc_dict:
                        adv_acc_dict[atk] = -1
                        robust_adv_acc_dict[atk] = -1
                        robust_adv_utility_dict[atk] = -1
                    if attack_switch[atk]:
                        test_bach = cifar10_data.test.next_batch(
                            test_bach_size)
                        adv_images_dict = sess.run(attack_tensor_dict[atk],
                                                   feed_dict={
                                                       x: test_bach[0],
                                                       adv_noise: Noise_test,
                                                       mu_alpha: [fgsm_eps]
                                                   })
                        print("Done adversarial examples")
                        ### PixelDP Robustness ###
                        predictions_form_argmax = np.zeros(
                            [test_bach_size, 10])
                        softmax_predictions = sess.run(softmax_y_conv,
                                                       feed_dict={
                                                           x: adv_images_dict,
                                                           noise: Noise,
                                                           FM_h: perturbFM_h
                                                       })
                        argmax_predictions = np.argmax(softmax_predictions,
                                                       axis=1)
                        for n_draws in range(0, 1000):
                            _BenignLNoise = generateIdLMNoise(
                                image_size, Delta2, epsilon2_update, L)
                            _perturbFM_h = np.random.laplace(
                                0.0, 2 * Delta2 / (epsilon2_update * L),
                                14 * 14 * 128)
                            _perturbFM_h = np.reshape(_perturbFM_h,
                                                      [-1, 14, 14, 128])
                            if n_draws == 500:
                                print("n_draws = 500")
                            for j in range(test_bach_size):
                                pred = argmax_predictions[j]
                                predictions_form_argmax[j, pred] += 1
                            softmax_predictions = sess.run(
                                softmax_y_conv,
                                feed_dict={
                                    x: adv_images_dict,
                                    noise: (_BenignLNoise / 10 + Noise),
                                    FM_h: perturbFM_h
                                }) * sess.run(
                                    softmax_y_conv,
                                    feed_dict={
                                        x: adv_images_dict,
                                        noise: Noise,
                                        FM_h: (_perturbFM_h / 10 + perturbFM_h)
                                    })
                            #softmax_predictions = sess.run(softmax_y_conv, feed_dict={x: adv_images_dict, noise: (_BenignLNoise), FM_h: perturbFM_h}) * sess.run(softmax_y_conv, feed_dict={x: adv_images_dict, noise: Noise, FM_h: (_perturbFM_h)})
                            argmax_predictions = np.argmax(softmax_predictions,
                                                           axis=1)
                        final_predictions = predictions_form_argmax
                        is_correct = []
                        is_robust = []
                        for j in range(test_bach_size):
                            is_correct.append(
                                np.argmax(test_bach[1][j]) == np.argmax(
                                    final_predictions[j]))
                            robustness_from_argmax = robustness.robustness_size_argmax(
                                counts=predictions_form_argmax[j],
                                eta=0.05,
                                dp_attack_size=fgsm_eps,
                                dp_epsilon=dp_epsilon,
                                dp_delta=0.05,
                                dp_mechanism='laplace') / dp_mult
                            is_robust.append(
                                robustness_from_argmax >= fgsm_eps)
                        adv_acc_dict[atk] = np.sum(
                            is_correct) * 1.0 / test_bach_size
                        robust_adv_acc_dict[atk] = np.sum([
                            a and b for a, b in zip(is_robust, is_correct)
                        ]) * 1.0 / np.sum(is_robust)
                        robust_adv_utility_dict[atk] = np.sum(
                            is_robust) * 1.0 / test_bach_size
                        ##############################
                for atk in attack_switch.keys():
                    if attack_switch[atk]:
                        # added robust prediction
                        log_str += " {}: {:.4f} {:.4f} {:.4f} {:.4f}".format(
                            atk, adv_acc_dict[atk], robust_adv_acc_dict[atk],
                            robust_adv_utility_dict[atk],
                            robust_adv_acc_dict[atk] *
                            robust_adv_utility_dict[atk])
                print(log_str)
                logfile.write(log_str + '\n')

            # Save the model checkpoint periodically.
            if step % (10 * step_for_epoch) == 0 and (step > _global_step):
                num_examples_per_step = L
                examples_per_sec = num_examples_per_step / duration
                sec_per_batch = float(duration)
                format_str = (
                    '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                    'sec/batch)')
                print(format_str % (datetime.now(), step, loss_value,
                                    examples_per_sec, sec_per_batch))
            """if step % (50*step_for_epoch) == 0 and (step >= 900*step_for_epoch):
示例#27
0
def mnist_tutorial(train_start=0, train_end=60000, test_start=0,
                   test_end=10000, nb_epochs=NB_EPOCHS, batch_size=BATCH_SIZE,
                   learning_rate=LEARNING_RATE, train_dir=TRAIN_DIR,
                   filename=FILENAME, load_model=LOAD_MODEL,
                   testing=False, label_smoothing=0.1,
                   adversarial_training = ADVERSARIAL_TRAINING,
                   attacking = ATTACKING,origin_method=ORIGIN_METHOD,
                   save_model=SAVE_MODEL,model_type=MODEL_TYPE):
  """
  MNIST CleverHans tutorial
  :param train_start: index of first training set example
  :param train_end: index of last training set example
  :param test_start: index of first test set example
  :param test_end: index of last test set example
  :param nb_epochs: number of epochs to train model
  :param batch_size: size of training batches
  :param learning_rate: learning rate for training
  :param train_dir: Directory storing the saved model
  :param filename: Filename to save model under
  :param load_model: True for load, False for not load
  :param testing: if true, test error is calculated
  :param label_smoothing: float, amount of label smoothing for cross entropy
  :return: an AccuracyReport object
  """
  keras.layers.core.K.set_learning_phase(0)

  # Object used to keep track of (and return) key accuracies
  report = AccuracyReport()

  # Set TF random seed to improve reproducibility
  tf.set_random_seed(1234)

  if not hasattr(backend, "tf"):
    raise RuntimeError("This tutorial requires keras to be configured"
                       " to use the TensorFlow backend.")

  if keras.backend.image_dim_ordering() != 'tf':
    keras.backend.set_image_dim_ordering('tf')
    print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to "
          "'th', temporarily setting to 'tf'")

  # Create TF session and set as Keras backend session
  os.environ["CUDA_VISIBLE_DEVICES"] = '0'  # only use No.0 GPU
  config = tf.ConfigProto()
  config.allow_soft_placement=True
  config.gpu_options.allow_growth = True
  sess = tf.Session(config=config)
  keras.backend.set_session(sess)

  # Get MNIST test data
  mnist = MNIST(train_start=train_start, train_end=train_end,
                test_start=test_start, test_end=test_end)
  x_train, y_train = mnist.get_set('train')
  x_test, y_test = mnist.get_set('test')

  # Obtain Image Parameters
  img_rows, img_cols, nchannels = x_train.shape[1:4]
  nb_classes = y_train.shape[1]

  # Define input TF placeholder
  x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols,
                                        nchannels))
  y = tf.placeholder(tf.float32, shape=(None, nb_classes))


  # Define TF model graph
  the_model = modelA
  if model_type == 'a':
      the_model = modelA
  elif model_type == 'b':
      the_model = modelB
  elif model_type == 'c':
      the_model = modelC
  else:
      exit('the model type must be a or b or c.')
  model = the_model(img_rows=img_rows, img_cols=img_cols,
                 channels=nchannels, nb_filters=64,
                 nb_classes=nb_classes)
  wrap = KerasModelWrapper(model)
  preds = model(x)

  # Initialize the Fast Gradient Sign Method (FGSM) attack object and graph
  if origin_method == 'fgsm':
      att_method = FastGradientMethod(wrap, sess=sess)
      att_method_params = {'eps': 0.2,
                           'clip_min': 0.,
                           'clip_max': 1.}
  elif origin_method == 'bim':
      att_method = BasicIterativeMethod(wrap, sess=sess)
      att_method_params = {'eps': 0.2,
                           'eps_iter': 0.06,
                           'nb_iter': 10,
                           'clip_min': 0.,
                           'clip_max': 1.}
  elif origin_method == 'mifgsm':
      att_method = MomentumIterativeMethod(wrap, sess=sess)
      att_method_params = {'eps': 0.2,
                           'eps_iter': 0.08,
                           'nb_iter': 10,
                           'decay_factor': 0.4,
                           'clip_min': 0.,
                           'clip_max': 1.}
  else:
      exit("the attack method must be fgsm,bim,mifgsm")
  # Evaluate the accuracy of the MNIST model on adversarial examples
  print(att_method_params)
  adv_x = att_method.generate(x, **att_method_params)
  # Consider the attack to be constant
  adv_x = tf.stop_gradient(adv_x)
  preds_adv = model(adv_x)
  def attack(x):
      return att_method.generate(x, **att_method_params)

  def evaluate2():
      # Evaluate the accuracy of the MNIST model on legitimate test examples
      eval_params = {'batch_size': batch_size}
      acc = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
      report.clean_train_clean_eval = acc
      print('AT Test accuracy on legitimate examples: %0.4f' % acc)

      # Accuracy of the adversarially trained model on adversarial examples
      accuracy = model_eval(sess, x, y, preds_adv, x_test, y_test, args=eval_params)
      print('AT Test accuracy on adversarial examples: %0.4f' % accuracy)
      report.adv_train_adv_eval = accuracy

  # Train an MNIST model
  train_params = {
      'nb_epochs': nb_epochs,
      'batch_size': batch_size,
      'learning_rate': learning_rate,
      'train_dir': train_dir,
      'filename': filename
  }

  rng = np.random.RandomState([2017, 8, 30])
  train_dir = train_dir + '/' + model_type + '/' + origin_method
  if not os.path.exists(train_dir):
    os.makedirs(train_dir)

  ckpt = tf.train.get_checkpoint_state(train_dir)
  print(train_dir, ckpt)
  ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path


  if load_model and ckpt_path:
    saver = tf.train.Saver()
    print(ckpt_path)
    saver.restore(sess, ckpt_path)
    print("Model loaded from: {}".format(ckpt_path))
    evaluate2()
  else:
    print("Model was not loaded, training from scratch.")
    loss2 = CrossEntropy(wrap, smoothing=label_smoothing,attack=attack)
    train(sess, loss2, x_train, y_train, evaluate=evaluate2,
          args=train_params, rng=rng)
    if save_model:
        saver = tf.train.Saver(max_to_keep=1)
        saver.save(sess, '{}/{}.ckpt'.format(train_dir,origin_method), global_step=NB_EPOCHS)
        keras.models.save_model(model, '{}/{}_mnist.h5'.format(train_dir,origin_method))
        print("model has been saved")


  # >>> other method >>>
  if adversarial_training:
      method = ['fgsm','bim','mifgsm']
      for i in range(3):
          attacking = method[i]
          if attacking == 'fgsm':
            att_method = FastGradientMethod(wrap, sess=sess)
            att_method_params = {'eps': 0.2,
                         'clip_min': 0.,
                         'clip_max': 1.}
          elif attacking == 'bim':
            att_method = BasicIterativeMethod(wrap,sess=sess)
            att_method_params = {'eps': 0.2,
                        'eps_iter':0.06,
                        'nb_iter':10,
                         'clip_min': 0.,
                         'clip_max': 1.}
          elif attacking == 'mifgsm':
            att_method = MomentumIterativeMethod(wrap,sess=sess)
            att_method_params =  {'eps': 0.2,
                        'eps_iter':0.08,
                        'nb_iter':10,
                        'decay_factor':0.4,
                         'clip_min': 0.,
                         'clip_max': 1.}
          else:
              exit("the attack method must be fgsm,bim,mifgsm")
          # Evaluate the accuracy of the MNIST model on adversarial examples
          print(att_method_params)
          adv_x = att_method.generate(x, **att_method_params)
          # Consider the attack to be constant
          adv_x = tf.stop_gradient(adv_x)
          preds_adv = model(adv_x)

          eval_par = {'batch_size': batch_size}
          start_time = time.time()
          acc = model_eval(sess, x, y, preds_adv, x_test, y_test, args=eval_par)
          print('Test accuracy on adversarial examples: %0.4f' % acc)
          end_time = time.time()
          print("{} attack time is {}\n".format(attacking,end_time-start_time))
          report.clean_train_adv_eval = acc

  gc.collect()
示例#28
0
def test():
    """
    """
    tf.reset_default_graph()
    g = tf.get_default_graph()

    with g.as_default():
        # Placeholder nodes.
        images_holder = tf.placeholder(
            tf.float32,
            [None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS])
        label_holder = tf.placeholder(tf.float32, [None, FLAGS.NUM_CLASSES])
        is_training = tf.placeholder(tf.bool, ())

        # model
        model = model_cifar100.RDPCNN(images_holder, label_holder,
                                      FLAGS.INPUT_SIGMA,
                                      is_training)  # for adv examples

        model_loss = model.loss()
        model_acc = model.cnn_accuracy

        # robust
        def inference(x):
            logits, _ = model.cnn.prediction(x)
            return logits

        def inference_prob(x):
            _, probs = model.cnn.prediction(x)
            return probs

        graph_dict = {}
        graph_dict["images_holder"] = images_holder
        graph_dict["label_holder"] = label_holder
        graph_dict["is_training"] = is_training

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config, graph=g) as sess:
        sess.run(tf.global_variables_initializer())
        # load model
        model.tf_load(sess, name=FLAGS.CNN_CKPT_RESTORE_NAME)

        # adv test
        ####################################################################################################
        x_advs = {}
        ch_model_logits = CallableModelWrapper(callable_fn=inference,
                                               output_layer='logits')
        ch_model_probs = CallableModelWrapper(callable_fn=inference_prob,
                                              output_layer='probs')
        # FastGradientMethod
        fgsm_obj = FastGradientMethod(model=ch_model_probs, sess=sess)
        x_advs["fgsm"] = fgsm_obj.generate(x=images_holder,
                                           eps=FLAGS.ATTACK_SIZE,
                                           clip_min=0.0,
                                           clip_max=1.0)  # testing now

        # Iterative FGSM (BasicIterativeMethod/ProjectedGradientMethod with no random init)
        # default: eps_iter=0.05, nb_iter=10
        ifgsm_obj = BasicIterativeMethod(model=ch_model_probs, sess=sess)
        x_advs["ifgsm"] = ifgsm_obj.generate(x=images_holder,
                                             eps=FLAGS.ATTACK_SIZE,
                                             eps_iter=FLAGS.ATTACK_SIZE / 10,
                                             nb_iter=10,
                                             clip_min=0.0,
                                             clip_max=1.0)

        # MomentumIterativeMethod
        # default: eps_iter=0.06, nb_iter=10
        mim_obj = MomentumIterativeMethod(model=ch_model_probs, sess=sess)
        x_advs["mim"] = mim_obj.generate(x=images_holder,
                                         eps=FLAGS.ATTACK_SIZE,
                                         eps_iter=FLAGS.ATTACK_SIZE / 10,
                                         nb_iter=10,
                                         decay_factor=1.0,
                                         clip_min=0.0,
                                         clip_max=1.0)

        # MadryEtAl (Projected Grdient with random init, same as rand+fgsm)
        # default: eps_iter=0.01, nb_iter=40
        madry_obj = MadryEtAl(model=ch_model_probs, sess=sess)
        x_advs["madry"] = madry_obj.generate(x=images_holder,
                                             eps=FLAGS.ATTACK_SIZE,
                                             eps_iter=FLAGS.ATTACK_SIZE / 10,
                                             nb_iter=10,
                                             clip_min=0.0,
                                             clip_max=1.0)
        graph_dict["x_advs"] = x_advs
        ####################################################################################################

        # tensorboard writer
        #test_writer = model_utils.init_writer(FLAGS.TEST_LOG_PATH, g)
        print("\nTest")
        if FLAGS.local:
            total_test_batch = 2
        else:
            total_test_batch = None
        dp_info = np.load(FLAGS.DP_INFO_NPY, allow_pickle=True).item()
        test_info(sess,
                  model,
                  True,
                  graph_dict,
                  dp_info,
                  FLAGS.TEST_LOG_FILENAME,
                  total_batch=total_test_batch)
        robust_info(sess, model, graph_dict, FLAGS.ROBUST_LOG_FILENAME)
def b_dcgan(dataset, args):

    z_dim = args.z_dim
    x_dim = dataset.x_dim
    batch_size = args.batch_size
    dataset_size = dataset.dataset_size

    session = get_session()

    test_x = tf.placeholder(tf.float32, shape=(batch_size, 28, 28, 1))
    x = tf.placeholder(tf.float32, shape=(batch_size, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(batch_size, 10))

    unlabeled_batch_ph = tf.placeholder(tf.float32,
                                        shape=(batch_size, 28, 28, 1))
    labeled_image_ph = tf.placeholder(tf.float32,
                                      shape=(batch_size, 28, 28, 1))
    if args.random_seed is not None:
        tf.set_random_seed(args.random_seed)
    # due to how much the TF code sucks all functions take fixed batch_size at all times
    dcgan = BDCGAN(
        x_dim,
        z_dim,
        dataset_size,
        batch_size=batch_size,
        J=args.J,
        M=args.M,
        lr=args.lr,
        optimizer=args.optimizer,
        gen_observed=args.gen_observed,
        adv_train=args.adv_train,
        num_classes=dataset.num_classes if args.semi_supervised else 1)
    if args.adv_test and args.semi_supervised:
        if args.basic_iterative:
            fgsm = BasicIterativeMethod(dcgan, sess=session)
            dcgan.adv_constructor = fgsm
            fgsm_params = {
                'eps': args.eps,
                'eps_iter': float(args.eps / 4),
                'nb_iter': 4,
                'ord': np.inf,
                'clip_min': 0.,
                'clip_max': 1.
            }
            #,'y_target': None}
        else:
            fgsm = FastGradientMethod(dcgan, sess=session)
            dcgan.adv_constructor = fgsm
            eval_params = {'batch_size': batch_size}
            fgsm_params = {'eps': args.eps, 'clip_min': 0., 'clip_max': 1.}
        adv_x = fgsm.generate(x, **fgsm_params)
        adv_test_x = fgsm.generate(test_x, **fgsm_params)
        preds = dcgan.get_probs(adv_x)
    if args.adv_train:
        unlabeled_targets = np.zeros([batch_size, dcgan.K + 1])
        unlabeled_targets[:, 0] = 1
        fgsm_targeted_params = {
            'eps': args.eps,
            'clip_min': 0.,
            'clip_max': 1.,
            'y_target': unlabeled_targets
        }

    saver = tf.train.Saver()

    print("Starting session")
    session.run(tf.global_variables_initializer())

    prev_iters = 0
    if args.load_chkpt:
        saver.restore(session, args.chkpt)
        # Assume checkpoint is of the form "model_300"
        prev_iters = int(args.chkpt.split('/')[-1].split('_')[1])
        print("Model restored from iteration:", prev_iters)

    print("Starting training loop")
    num_train_iter = args.train_iter

    if hasattr(dataset, "supervised_batches"):
        # implement own data feeder if data doesnt fit in memory
        supervised_batches = dataset.supervised_batches(args.N, batch_size)
    else:
        supervised_batches = get_supervised_batches(
            dataset, args.N, batch_size, list(range(dataset.num_classes)))

    if args.semi_supervised:
        test_image_batches, test_label_batches = get_test_batches(
            dataset, batch_size)

        optimizer_dict = {
            "semi_d": dcgan.d_optim_semi_adam,
            "sup_d": dcgan.s_optim_adam,
            "adv_d": dcgan.d_optim_adam,
            "gen": dcgan.g_optims_adam
        }
    else:
        optimizer_dict = {
            "adv_d": dcgan.d_optim_adam,
            "gen": dcgan.g_optims_adam
        }

    base_learning_rate = args.lr  # for now we use same learning rate for Ds and Gs
    lr_decay_rate = args.lr_decay

    for train_iter in range(1 + prev_iters, 1 + num_train_iter):

        if train_iter == 5000:
            print("Switching to user-specified optimizer")
            if args.semi_supervised:
                optimizer_dict = {
                    "semi_d": dcgan.d_optim_semi,
                    "sup_d": dcgan.s_optim,
                    "adv_d": dcgan.d_optim,
                    "gen": dcgan.g_optims
                }
            else:
                optimizer_dict = {
                    "adv_d": dcgan.d_optim,
                    "gen": dcgan.g_optims
                }

        learning_rate = base_learning_rate * np.exp(-lr_decay_rate * min(
            1.0, (train_iter * batch_size) / float(dataset_size)))

        batch_z = np.random.uniform(-1, 1, [batch_size, z_dim])
        image_batch, batch_label = dataset.next_batch(batch_size,
                                                      class_id=None)
        batch_targets = np.zeros([batch_size, 11])
        batch_targets[:, 0] = 1

        if args.semi_supervised:
            labeled_image_batch, labels = next(supervised_batches)
            if args.adv_train:
                adv_labeled = session.run(
                    fgsm.generate(labeled_image_ph, **fgsm_targeted_params),
                    feed_dict={labeled_image_ph: labeled_image_batch})
                adv_unlabeled = session.run(
                    fgsm.generate(unlabeled_batch_ph, **fgsm_params),
                    feed_dict={unlabeled_batch_ph: image_batch})
                _, d_loss = session.run(
                    [optimizer_dict["semi_d"], dcgan.d_loss_semi],
                    feed_dict={
                        dcgan.labeled_inputs: labeled_image_batch,
                        dcgan.labels: get_gan_labels(labels),
                        dcgan.inputs: image_batch,
                        dcgan.z: batch_z,
                        dcgan.d_semi_learning_rate: learning_rate,
                        dcgan.adv_unlab: adv_unlabeled,
                        dcgan.adv_labeled: adv_labeled
                    })
            else:
                _, d_loss = session.run(
                    [optimizer_dict["semi_d"], dcgan.d_loss_semi],
                    feed_dict={
                        dcgan.labeled_inputs: labeled_image_batch,
                        dcgan.labels: get_gan_labels(labels),
                        dcgan.inputs: image_batch,
                        dcgan.z: batch_z,
                        dcgan.d_semi_learning_rate: learning_rate
                    })

            _, s_loss = session.run([optimizer_dict["sup_d"], dcgan.s_loss],
                                    feed_dict={
                                        dcgan.inputs: labeled_image_batch,
                                        dcgan.lbls: labels
                                    })

        else:
            # regular GAN
            _, d_loss = session.run(
                [optimizer_dict["adv_d"], dcgan.d_loss],
                feed_dict={
                    dcgan.inputs: image_batch,
                    dcgan.z: batch_z,
                    dcgan.d_learning_rate: learning_rate
                })

        if args.wasserstein:
            session.run(dcgan.clip_d, feed_dict={})

        g_losses = []
        for gi in range(dcgan.num_gen):

            # compute g_sample loss
            batch_z = np.random.uniform(-1, 1, [batch_size, z_dim])
            for m in range(dcgan.num_mcmc):
                _, g_loss = session.run([
                    optimizer_dict["gen"][gi * dcgan.num_mcmc + m],
                    dcgan.generation["g_losses"][gi * dcgan.num_mcmc + m]
                ],
                                        feed_dict={
                                            dcgan.z: batch_z,
                                            dcgan.g_learning_rate:
                                            learning_rate
                                        })
                g_losses.append(g_loss)

        # if args.adv_test:
        #     probs, logits = dcgan.discriminator(adv_x,dcgan.K+1,reuse = True)

        #     labels = tf.placeholder(tf.float32,
        #                              [args.batch_size, dcgan.K+1], name='real_targets')
        #     compare_labels = tf.convert_to_tensor(np.array([np.append(0,i) for i in batch_label]))

        #     print(session.run(model_loss(compare_labels,probs), feed_dict = {x:image_batch}))
        # if args.adv_test:
        #     #preds = dcgan.get_probs(adv_x)
        #     #eval_preds = session.run(preds, feed_dict = {x:image_batch})
        #     #print(eval_preds[0])
        #     #adv_exs = session.run(adv_test_x, feed_dict = {x:test_image_batches})
        #     # adv_acc = model_eval(
        #     #     session, x, y, preds, image_batch, batch_label, args=eval_params)
        #     # #print(session.run(model_loss(compare_labels,probs), feed_dict = {x:image_batch}))
        #     # print("Adversarial loss = %2.f" % (1-adv_acc))
        #     print(get_test_accuracy(session,dcgan,adv_set,test_label_batches))

        if train_iter > 0 and train_iter % args.n_save == 0:
            print("Iter %i" % train_iter)
            # collect samples
            if args.save_samples:  # saving samples
                all_sampled_imgs = []
                for gi in range(dcgan.num_gen):
                    _imgs, _ps = [], []
                    for _ in range(10):
                        sample_z = np.random.uniform(-1,
                                                     1,
                                                     size=(batch_size, z_dim))
                        sampled_imgs, sampled_probs = session.run([
                            dcgan.generation["gen_samplers"][gi *
                                                             dcgan.num_mcmc],
                            dcgan.generation["d_probs"][gi * dcgan.num_mcmc]
                        ],
                                                                  feed_dict={
                                                                      dcgan.z:
                                                                      sample_z
                                                                  })
                        _imgs.append(sampled_imgs)
                        _ps.append(sampled_probs)

                    sampled_imgs = np.concatenate(_imgs)
                    sampled_probs = np.concatenate(_ps)
                    all_sampled_imgs.append(
                        [sampled_imgs, sampled_probs[:, 1:].sum(1)])

            print("Disc loss = %.2f, Gen loss = %s" %
                  (d_loss, ", ".join(["%.2f" % gl for gl in g_losses])))

            #if args.adv_test:
            #preds = dcgan.get_probs(adv_x)
            #eval_preds = session.run(preds, feed_dict = {x:image_batch})
            #print(eval_preds[0])
            #adv_exs = session.run(adv_test_x, feed_dict = {x:test_image_batches})
            # adv_acc = model_eval(
            #     session, x, y, preds, image_batch, batch_label, args=eval_params)
            # #print(session.run(model_loss(compare_labels,probs), feed_dict = {x:image_batch}))
            # print("Adversarial loss = %2.f" % (1-adv_acc))
            #print(get_test_accuracy(session,dcgan,adv_set,test_label_batches))

            # adv_x = fgsm.generate(x,**fgsm_params)
            # preds = dcgan.get_probs(adv_x)
            # acc = model_eval(
            #     session, x, y, preds, image_batch, batch_label, args=eval_params)
            # print("Adversarial loss = %2.f" % (1-acc))

            if args.semi_supervised:
                # get test set performance on real labels only for both GAN-based classifier and standard one

                s_acc, ss_acc, non_adv_acc, ex_prob = get_test_accuracy(
                    session, dcgan, test_image_batches, test_label_batches)
                if args.adv_test:
                    adv_set = []
                    for test_images in test_image_batches:
                        adv_set.append(
                            session.run(adv_x, feed_dict={x: test_images}))
                    adv_sup_acc, adv_ss_acc, correct_uncertainty, incorrect_uncertainty, adv_acc, adv_ex_prob = get_adv_test_accuracy(
                        session, dcgan, adv_set, test_label_batches)
                    print("Adversarial semi-sup accuracy with filter: %.2f" %
                          adv_sup_acc)
                    print("Adverarial semi-sup accuracy: %.2f" % adv_ss_acc)
                    print("Uncertainty for correct predictions: %.2f" %
                          correct_uncertainty)
                    print("Uncertainty for incorrect predictions: %.2f" %
                          incorrect_uncertainty)
                    print("non_adversarial_classification_accuracy: %.2f" %
                          non_adv_acc)
                    print("adversarial_classification_accuracy: %.2f" %
                          adv_acc)

                    if args.save_samples:
                        print("saving adversarial test images and test images")
                        i = 0

                        for x, y in zip(adv_set[-1], test_image_batches[-1]):
                            np.save(
                                args.out_dir + '/adv_test' + str(train_iter) +
                                '_' + str(i), x)
                            np.save(
                                args.out_dir + '/test' + str(train_iter) +
                                '_' + str(i), y)
                            i = i + 1
                            if i == 5:  #save 5 adversarial images
                                break

                print("Supervised acc: %.2f" % (s_acc))
                print("Semi-sup acc: %.2f" % (ss_acc))

            print("saving results and samples")

            results = {
                "disc_loss": float(d_loss),
                "gen_losses": list(map(float, g_losses))
            }
            if args.semi_supervised:
                #results["example_non_adversarial_probs"] = list(ex_prob.flatten())
                #results["example_adversarial_probs"] = list(adv_ex_prob.flatten())
                results["non_adversarial_classification_accuracy"] = float(
                    non_adv_acc)
                results["adversarial_classification_accuracy"] = float(adv_acc)
                results["adversarial_uncertainty_correct"] = float(
                    correct_uncertainty)
                results["adversarial_uncertainty_incorrect"] = float(
                    incorrect_uncertainty)
                results["supervised_acc"] = float(s_acc)
                results['adversarial_filtered_semi_supervised_acc'] = float(
                    adv_sup_acc)
                results["adversarial_unfilted_semi_supervised_acc"] = float(
                    adv_ss_acc)
                results["semi_supervised_acc"] = float(ss_acc)
                results["timestamp"] = time.time()
                results["previous_chkpt"] = args.chkpt

            with open(
                    os.path.join(args.out_dir, 'results_%i.json' % train_iter),
                    'w') as fp:
                json.dump(results, fp)

            if args.save_samples:
                for gi in range(dcgan.num_gen):
                    print_images(all_sampled_imgs[gi],
                                 "B_DCGAN_%i_%.2f" %
                                 (gi, g_losses[gi * dcgan.num_mcmc]),
                                 train_iter,
                                 directory=args.out_dir)

                print_images(image_batch,
                             "RAW",
                             train_iter,
                             directory=args.out_dir)

            if args.save_weights:
                var_dict = {}
                for var in tf.trainable_variables():
                    var_dict[var.name] = session.run(var.name)

                np.savez_compressed(
                    os.path.join(args.out_dir, "weights_%i.npz" % train_iter),
                    **var_dict)

            print("Done saving weights")

        if train_iter > 0 and train_iter % args.save_chkpt == 0:
            save_path = saver.save(
                session, os.path.join(args.out_dir, "model_%i" % train_iter))
            print("Model checkpointed in file: %s" % save_path)

    session.close()
示例#30
0
    ])
    model.compile(optimizer=keras.optimizers.Adam(),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    model.fit(train_images, train_labels, epochs=5)
    model.save(filename)  # Save your model in case of retraining.

model.summary()

# evaluate accuracy
test_loss, test_acc = model.evaluate(test_images2, test_labels)
print('Test accuracy:', test_acc)

# - Step 4 - Implement untargeted attack using the Basic Iterative Method
wrap = KerasModelWrapper(model)
bim = BasicIterativeMethod(wrap, sess=sess)
bim_params = {'eps': 0.1, 'eps_iter': 0.01, 'clip_min': 0., 'clip_max': 1.}

# - Step 5 - From each category in the Fashion-MNIST test set,
# select 10 images to generate adversarial examples using 5 and 10 iterations, respectively.
orig_images = np.empty([0, 28, 28, 1])
orig_labels = np.empty([0])
for clz in range(nb_classes):
    idxs = np.where(test_labels == clz)[0][:10]
    orig_images = np.append(orig_images, test_images2[idxs], axis=0)
    orig_labels = np.append(orig_labels, test_labels[idxs], axis=0)

bim_params["nb_iter"] = 5
adv_images5 = bim.generate_np(orig_images, **bim_params)
bim_params["nb_iter"] = 10
adv_images10 = bim.generate_np(orig_images, **bim_params)
def mnist_tutorial_jsma(train_start=0, train_end=5500, test_start=0,
                        test_end=1000, nb_epochs=8,
                        batch_size=100, nb_classes=10,
                        nb_filters=64,
                        learning_rate=0.001):
    """
    MNIST tutorial for the Jacobian-based saliency map approach (JSMA)
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param nb_classes: number of output classes
    :param learning_rate: learning rate for training
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Define TF model graph
    model = make_basic_cnn()
    preds = model(x)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    # sess.run(tf.global_variables_initializer())
    rng = np.random.RandomState([2017, 8, 30])

    print("x_train shape: ", X_train.shape)
    print("y_train shape: ", Y_train.shape)

    # do not log
    model_train(sess, x, y, preds, X_train, Y_train, args=train_params,verbose=False,
                rng=rng)

    f_out_clean = open("Clean_jsma_elastic_against5.log", "w")

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params)
    assert X_test.shape[0] == test_end - test_start, X_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    f_out_clean.write('Test accuracy on legitimate test examples: ' + str(accuracy) + '\n')


    # Clean test against JSMA
    jsma_params = {'theta': 1., 'gamma': 0.1,
                   'clip_min': 0., 'clip_max': 1.,
                   'y_target': None}

    jsma = SaliencyMapMethod(model, back='tf', sess=sess)
    adv_x_jsma = jsma.generate(x, **jsma_params)
    preds_adv_jsma = model.get_probs(adv_x_jsma)

    # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
    acc = model_eval(sess, x, y, preds_adv_jsma, X_test, Y_test, args=eval_params)
    print('Clean test accuracy on JSMA adversarial examples: %0.4f' % acc)
    f_out_clean.write('Clean test accuracy on JSMA adversarial examples: ' + str(acc) + '\n')

    ################################################################
    # Clean test against FGSM
    fgsm_params = {'eps': 0.3,
                   'clip_min': 0.,
                   'clip_max': 1.}

    fgsm = FastGradientMethod(model, sess=sess)
    adv_x_fgsm = fgsm.generate(x, **fgsm_params)
    preds_adv_fgsm = model.get_probs(adv_x_fgsm)

    # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
    acc = model_eval(sess, x, y, preds_adv_fgsm, X_test, Y_test, args=eval_params)
    print('Clean test accuracy on FGSM adversarial examples: %0.4f' % acc)
    f_out_clean.write('Clean test accuracy on FGSM adversarial examples: ' + str(acc) + '\n')


    ################################################################
    # Clean test against BIM
    bim_params = {'eps': 0.3,
                  'eps_iter': 0.01,
                  'nb_iter': 100,
                  'clip_min': 0.,
                  'clip_max': 1.}
    bim = BasicIterativeMethod(model, sess=sess)
    adv_x_bim = bim.generate(x, **bim_params)
    preds_adv_bim = model.get_probs(adv_x_bim)

    # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
    acc = model_eval(sess, x, y, preds_adv_bim, X_test, Y_test, args=eval_params)
    print('Clean test accuracy on BIM adversarial examples: %0.4f' % acc)
    f_out_clean.write('Clean test accuracy on BIM adversarial examples: ' + str(acc) + '\n')

    ################################################################
    # Clean test against EN
    en_params = {'binary_search_steps': 1,
                 # 'y': None,
                 'max_iterations': 100,
                 'learning_rate': 0.1,
                 'batch_size': batch_size,
                 'initial_const': 10}
    en = ElasticNetMethod(model, back='tf', sess=sess)
    adv_x_en = en.generate(x, **en_params)
    preds_adv_en = model.get_probs(adv_x_en)

    # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
    acc = model_eval(sess, x, y, preds_adv_en, X_test, Y_test, args=eval_params)
    print('Clean test accuracy on EN adversarial examples: %0.4f' % acc)
    f_out_clean.write('Clean test accuracy on EN adversarial examples: ' + str(acc) + '\n')
    ################################################################
    # Clean test against DF
    deepfool_params = {'nb_candidate': 10,
                       'overshoot': 0.02,
                       'max_iter': 50,
                       'clip_min': 0.,
                       'clip_max': 1.}
    deepfool = DeepFool(model, sess=sess)
    adv_x_df = deepfool.generate(x, **deepfool_params)
    preds_adv_df = model.get_probs(adv_x_df)

    # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
    acc = model_eval(sess, x, y, preds_adv_df, X_test, Y_test, args=eval_params)
    print('Clean test accuracy on DF adversarial examples: %0.4f' % acc)
    f_out_clean.write('Clean test accuracy on DF adversarial examples: ' + str(acc) + '\n')

    ################################################################
    # Clean test against VAT
    vat_params = {'eps': 2.0,
                  'num_iterations': 1,
                  'xi': 1e-6,
                  'clip_min': 0.,
                  'clip_max': 1.}
    vat = VirtualAdversarialMethod(model, sess=sess)
    adv_x_vat = vat.generate(x, **vat_params)
    preds_adv_vat = model.get_probs(adv_x_vat)

    # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
    acc = model_eval(sess, x, y, preds_adv_vat, X_test, Y_test, args=eval_params)
    print('Clean test accuracy on VAT adversarial examples: %0.4f\n' % acc)
    f_out_clean.write('Clean test accuracy on VAT adversarial examples: ' + str(acc) + '\n')

    f_out_clean.close()

    ###########################################################################
    # Craft adversarial examples using the Jacobian-based saliency map approach
    ###########################################################################
    print('Crafting ' + str(X_train.shape[0]) + ' * ' + str(nb_classes-1) +
          ' adversarial examples')


    model_2 = make_basic_cnn()
    preds_2 = model(x)

    # need this for constructing the array
    sess.run(tf.global_variables_initializer())

    # run this again
    # sess.run(tf.global_variables_initializer())

    # 1. Instantiate a SaliencyMapMethod attack object
    jsma = SaliencyMapMethod(model_2, back='tf', sess=sess)
    jsma_params = {'theta': 1., 'gamma': 0.1,
                   'clip_min': 0., 'clip_max': 1.,
                   'y_target': None}
    adv_random = jsma.generate(x, **jsma_params)
    preds_adv_random = model_2.get_probs(adv_random)

    # 2. Instantiate FGSM attack
    fgsm_params = {'eps': 0.3,
                   'clip_min': 0.,
                   'clip_max': 1.}
    fgsm = FastGradientMethod(model_2, sess=sess)
    adv_x_fgsm = fgsm.generate(x, **fgsm_params)
    preds_adv_fgsm = model_2.get_probs(adv_x_fgsm)


    # 3. Instantiate Elastic net attack
    en_params = {'binary_search_steps': 5,
         #'y': None,
         'max_iterations': 100,
         'learning_rate': 0.1,
         'batch_size': batch_size,
         'initial_const': 10}
    enet = ElasticNetMethod(model_2, sess=sess)
    adv_x_en = enet.generate(x, **en_params)
    preds_adv_elastic_net = model_2.get_probs(adv_x_en)

    # 4. Deepfool
    deepfool_params = {'nb_candidate':10,
                       'overshoot':0.02,
                       'max_iter': 50,
                       'clip_min': 0.,
                       'clip_max': 1.}
    deepfool = DeepFool(model_2, sess=sess)
    adv_x_df = deepfool.generate(x, **deepfool_params)
    preds_adv_deepfool = model_2.get_probs(adv_x_df)

    # 5. Base Iterative
    bim_params = {'eps': 0.3,
                  'eps_iter': 0.01,
                  'nb_iter': 100,
                  'clip_min': 0.,
                  'clip_max': 1.}
    base_iter = BasicIterativeMethod(model_2, sess=sess)
    adv_x_bi = base_iter.generate(x, **bim_params)
    preds_adv_base_iter = model_2.get_probs(adv_x_bi)

    # 6. C & W Attack
    cw = CarliniWagnerL2(model_2, back='tf', sess=sess)
    cw_params = {'binary_search_steps': 1,
                 # 'y': None,
                 'max_iterations': 100,
                 'learning_rate': 0.1,
                 'batch_size': batch_size,
                 'initial_const': 10}
    adv_x_cw = cw.generate(x, **cw_params)
    preds_adv_cw = model_2.get_probs(adv_x_cw)

    #7
    vat_params = {'eps': 2.0,
                  'num_iterations': 1,
                  'xi': 1e-6,
                  'clip_min': 0.,
                  'clip_max': 1.}
    vat = VirtualAdversarialMethod(model_2, sess=sess)
    adv_x = vat.generate(x, **vat_params)
    preds_adv_vat = model_2.get_probs(adv_x)


    # ==> generate 10 targeted classes for every train data regardless
    # This call runs the Jacobian-based saliency map approach
    # Loop over the samples we want to perturb into adversarial examples

    X_train_adv_set = []
    Y_train_adv_set = []
    for index in range(X_train.shape[0]):
        print('--------------------------------------')
        x_val = X_train[index:(index+1)]
        y_val = Y_train[index]


        # add normal sample in!!!!
        X_train_adv_set.append(x_val)
        Y_train_adv_set.append(y_val)

        # We want to find an adversarial example for each possible target class
        # (i.e. all classes that differ from the label given in the dataset)
        current_class = int(np.argmax(y_val))
        target_classes = other_classes(nb_classes, current_class)
        # Loop over all target classes
        for target in target_classes:
            # print('Generating adv. example for target class %i' % target)
            # This call runs the Jacobian-based saliency map approach

            one_hot_target = np.zeros((1, nb_classes), dtype=np.float32)
            one_hot_target[0, target] = 1
            jsma_params['y_target'] = one_hot_target
            adv_x = jsma.generate_np(x_val, **jsma_params)

            # append to X_train_adv_set and Y_train_adv_set
            X_train_adv_set.append(adv_x)
            Y_train_adv_set.append(y_val)

            # shape is: (1, 28, 28, 1)
            # print("adv_x shape is: ", adv_x.shape)

            # check for success rate
            # res = int(model_argmax(sess, x, preds, adv_x) == target)

    print('-------------Finished Generating Np Adversarial Data-------------------------')

    X_train_data = np.concatenate(X_train_adv_set, axis=0)
    Y_train_data = np.stack(Y_train_adv_set, axis=0)
    print("X_train_data shape is: ", X_train_data.shape)
    print("Y_train_data shape is: ", Y_train_data.shape)

    # saves the output so later no need to re-fun file
    np.savez("jsma_training_data.npz", x_train=X_train_data
             , y_train=Y_train_data)

    # >>> data = np.load('/tmp/123.npz')
    # >>> data['a']

    f_out = open("Adversarial_jsma_elastic_against5.log", "w")

    # evaluate the function against 5 attacks
    # fgsm, base iterative, jsma, elastic net, and deepfool
    def evaluate_against_all():
            # 1 Clean Data
            eval_params = {'batch_size': batch_size}
            accuracy = model_eval(sess, x, y, preds, X_test, Y_test,
                                  args=eval_params)
            print('Legitimate accuracy: %0.4f' % accuracy)

            tmp = 'Legitimate accuracy: '+ str(accuracy) + "\n"
            f_out.write(tmp)


            # 2 JSMA
            accuracy = model_eval(sess, x, y, preds_adv_random, X_test,
                                  Y_test, args=eval_params)

            print('JSMA accuracy: %0.4f' % accuracy)
            tmp = 'JSMA accuracy:'+ str(accuracy) + "\n"
            f_out.write(tmp)


            # 3 FGSM
            accuracy = model_eval(sess, x, y, preds_adv_fgsm, X_test,
                                  Y_test, args=eval_params)

            print('FGSM accuracy: %0.4f' % accuracy)
            tmp = 'FGSM accuracy:' + str(accuracy) + "\n"
            f_out.write(tmp)

            # 4 Base Iterative
            accuracy = model_eval(sess, x, y, preds_adv_base_iter, X_test,
                                  Y_test, args=eval_params)

            print('Base Iterative accuracy: %0.4f' % accuracy)
            tmp = 'Base Iterative accuracy:' + str(accuracy) + "\n"
            f_out.write(tmp)

            # 5 Elastic Net
            accuracy = model_eval(sess, x, y, preds_adv_elastic_net, X_test,
                                  Y_test, args=eval_params)

            print('Elastic Net accuracy: %0.4f' % accuracy)
            tmp = 'Elastic Net accuracy:' + str(accuracy) + "\n"
            f_out.write(tmp)

            # 6 DeepFool
            accuracy = model_eval(sess, x, y, preds_adv_deepfool, X_test,
                                  Y_test, args=eval_params)
            print('DeepFool accuracy: %0.4f' % accuracy)
            tmp = 'DeepFool accuracy:' + str(accuracy) + "\n"
            f_out.write(tmp)

            # 7 C & W Attack
            accuracy = model_eval(sess, x, y, preds_adv_cw, X_test,
                                  Y_test, args=eval_params)
            print('C & W accuracy: %0.4f' % accuracy)
            tmp = 'C & W  accuracy:' + str(accuracy) + "\n"
            f_out.write(tmp)
            f_out.write("*******End of Epoch***********\n\n")

            # 8 Virtual Adversarial
            accuracy = model_eval(sess, x, y, preds_adv_vat, X_test,
                                  Y_test, args=eval_params)
            print('VAT accuracy: %0.4f' % accuracy)
            tmp = 'VAT accuracy:' + str(accuracy) + "\n"
            f_out.write(tmp)
            f_out.write("*******End of Epoch***********\n\n")

            print("*******End of Epoch***********\n\n")

        # report.adv_train_adv_eval = accuracy

    print("Now Adversarial Training with Elastic Net  + modified X_train and Y_train")
    # trained_model.out
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': '/home/stephen/PycharmProjects/jsma-runall-mac/',
        'filename': 'trained_model.out'
    }
    model_train(sess, x, y, preds_2, X_train_data, Y_train_data,
                 predictions_adv=preds_adv_elastic_net,
                evaluate=evaluate_against_all, verbose=False,
                args=train_params, rng=rng)


    # Close TF session
    sess.close()
    return report
示例#32
0
class TestBasicIterativeMethod(TestFastGradientMethod):
    def setUp(self):
        super(TestBasicIterativeMethod, self).setUp()

        self.sess = tf.Session()
        self.model = SimpleModel()
        self.attack = BasicIterativeMethod(self.model, sess=self.sess)

    def test_generate_np_gives_adversarial_example_linfinity(self):
        self.help_generate_np_gives_adversarial_example(ord=np.infty, eps=.5,
                                                        nb_iter=20)

    def test_generate_np_gives_adversarial_example_l1(self):
        self.help_generate_np_gives_adversarial_example(ord=1, eps=.5,
                                                        nb_iter=20)

    def test_generate_np_gives_adversarial_example_l2(self):
        self.help_generate_np_gives_adversarial_example(ord=2, eps=.5,
                                                        nb_iter=20)

    def test_do_not_reach_lp_boundary(self):
        """
        Make sure that iterative attack don't reach boundary of Lp
        neighbourhood if nb_iter * eps_iter is relatively small compared to
        epsilon.
        """
        for ord in [1, 2, np.infty]:
            _, _, delta = self.generate_adversarial_examples_np(
                ord=ord, eps=.5, nb_iter=10, eps_iter=.01)
            self.assertTrue(np.max(0.5 - delta) > 0.25)

    def test_attack_strength(self):
        """
        If clipping is not done at each iteration (not passing clip_min and
        clip_max to fgm), this attack fails by
        np.mean(orig_labels == new_labels) == .39.
        """
        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        x_adv = self.attack.generate_np(x_val, eps=1.0, ord=np.inf,
                                        clip_min=0.5, clip_max=0.7,
                                        nb_iter=5)

        orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1)
        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)
        self.assertTrue(np.mean(orig_labs == new_labs) < 0.1)

    def test_generate_np_does_not_cache_graph_computation_for_nb_iter(self):

        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        x_adv = self.attack.generate_np(x_val, eps=1.0, ord=np.inf,
                                        clip_min=-5.0, clip_max=5.0,
                                        nb_iter=10)

        orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1)
        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)
        self.assertTrue(np.mean(orig_labs == new_labs) < 0.1)

        ok = [False]
        old_grads = tf.gradients

        def fn(*x, **y):
            ok[0] = True
            return old_grads(*x, **y)
        tf.gradients = fn

        x_adv = self.attack.generate_np(x_val, eps=1.0, ord=np.inf,
                                        clip_min=-5.0, clip_max=5.0,
                                        nb_iter=11)

        orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1)
        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)
        self.assertTrue(np.mean(orig_labs == new_labs) < 0.1)

        tf.gradients = old_grads

        self.assertTrue(ok[0])
示例#33
0
def train_zero_knowledge_gandef_model(train_start=0,
                                      train_end=60000,
                                      test_start=0,
                                      test_end=10000,
                                      smoke_test=True,
                                      save=False,
                                      testing=False,
                                      backprop_through_attack=False,
                                      num_threads=None):
    """
    MNIST cleverhans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param train_batch_size: size of training batches
    :param test_batch_size: size of testing batches
    :param learning_rate: learning rate for training
    :param save: if true, the final model will be saved
    :param testing: if true, complete an AccuracyReport for unit tests
                    to verify that performance is adequate
    :param backprop_through_attack: If True, backprop through adversarial
                                    example construction process during
                                    adversarial training.
    :return: an AccuracyReport object
    """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_fashion_mnist()
    if smoke_test:
        X_train, Y_train, X_test, Y_test = X_train[:
                                                   256], Y_train[:
                                                                 256], X_test[:
                                                                              256], Y_test[:
                                                                                           256]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y_soft = tf.placeholder(tf.float32, shape=(None, 10))

    # Prepare optimizer
    learning_rate = 1e-4
    clf_opt = tf.train.AdamOptimizer(learning_rate)
    dic_opt = tf.train.AdamOptimizer(learning_rate * 10)

    # Train an MNIST model
    train_params = {
        'nb_epochs': 80,
        'batch_size': 128,
        'trade_off': 2,
        'inner_epochs': 1
    }
    rng = np.random.RandomState([2017, 8, 30])

    # Adversarial training
    print("Start adversarial training")
    zero_knowledge_gandef_model = make_zero_knowledge_gandef_model(
        name="model_zero_knowledge_gandef")
    aug_x = gaussian_augment(x, std=1)
    preds_clean = zero_knowledge_gandef_model(x)
    preds_aug = zero_knowledge_gandef_model(aug_x)

    def cross_entropy(truth, preds, mean=True):
        # Get the logits operator
        op = preds.op
        if op.type == "Softmax":
            logits, = op.inputs
        else:
            logits = preds

        # Calculate cross entropy loss
        out = tf.nn.softmax_cross_entropy_with_logits(logits=logits,
                                                      labels=truth)

        # Take average loss and return
        if mean:
            out = tf.reduce_mean(out)
        return out

    def sigmoid_entropy(truth, preds, mean=True):
        # Get the logits operator
        op = preds.op
        if op.type == "Softmax":
            logits, = op.inputs
        else:
            logits = preds

        # Calculate cross entropy loss
        out = tf.nn.sigmoid_cross_entropy_with_logits(logits=logits,
                                                      labels=truth)

        # Take average loss and return
        if mean:
            out = tf.reduce_mean(out)
        return out

    # Perform and evaluate adversarial training
    gan_train_v2(sess,
                 x,
                 y_soft,
                 preds_clean,
                 X_train,
                 Y_train,
                 loss_func=[cross_entropy, sigmoid_entropy],
                 optimizer=[clf_opt, dic_opt],
                 predictions_adv=preds_aug,
                 evaluate=None,
                 args=train_params,
                 rng=rng,
                 var_list=zero_knowledge_gandef_model.get_gan_params())

    # Evaluate the accuracy of the MNIST model on Clean examples
    preds_clean = zero_knowledge_gandef_model(x)
    eval_params = {
        'batch_size': 128,
        'use_dic': False,
        'is_clean': True,
        'reject_threshold': 0.5
    }
    clean_acc = confident_model_eval(sess,
                                     x,
                                     y_soft,
                                     preds_clean,
                                     X_test,
                                     Y_test,
                                     args=eval_params)
    print('Test accuracy on Clean test examples: %0.4f\n' % clean_acc)
    report.adv_train_clean_eval = clean_acc

    # Evaluate the accuracy of the MNIST model on FGSM examples
    fgsm_params = {'eps': 0.6, 'clip_min': -1., 'clip_max': 1.}
    fgsm_att = FastGradientMethod(zero_knowledge_gandef_model, sess=sess)
    fgsm_adv = fgsm_att.generate(x, **fgsm_params)
    preds_fgsm_adv = zero_knowledge_gandef_model(fgsm_adv)
    eval_params = {
        'batch_size': 128,
        'use_dic': False,
        'is_clean': False,
        'reject_threshold': 0.5
    }
    fgsm_acc = confident_model_eval(sess,
                                    x,
                                    y_soft,
                                    preds_fgsm_adv,
                                    X_test,
                                    Y_test,
                                    args=eval_params)
    print('Test accuracy on FGSM test examples: %0.4f\n' % fgsm_acc)
    report.adv_train_adv_eval = fgsm_acc

    # Evaluate the accuracy of the MNIST model on BIM examples
    bim_params = {'eps': 0.6, 'eps_iter': 0.1, 'clip_min': -1., 'clip_max': 1.}
    bim_att = BasicIterativeMethod(zero_knowledge_gandef_model, sess=sess)
    bim_adv = bim_att.generate(x, **bim_params)
    preds_bim_adv = zero_knowledge_gandef_model(bim_adv)
    eval_params = {
        'batch_size': 128,
        'use_dic': False,
        'is_clean': False,
        'reject_threshold': 0.5
    }
    bim_acc = confident_model_eval(sess,
                                   x,
                                   y_soft,
                                   preds_bim_adv,
                                   X_test,
                                   Y_test,
                                   args=eval_params)
    print('Test accuracy on BIM test examples: %0.4f\n' % bim_acc)
    report.adv_train_adv_eval = bim_acc

    # Evaluate the accuracy of the MNIST model on PGD examples
    pgd_params = {
        'eps': 0.6,
        'eps_iter': 0.02,
        'nb_iter': 40,
        'clip_min': -1.,
        'clip_max': 1.,
        'rand_init': True
    }
    pgd_att = MadryEtAl(zero_knowledge_gandef_model, sess=sess)
    pgd_adv = pgd_att.generate(x, **bim_params)
    preds_pgd_adv = zero_knowledge_gandef_model(pgd_adv)
    eval_params = {
        'batch_size': 128,
        'use_dic': False,
        'is_clean': False,
        'reject_threshold': 0.5
    }
    pgd_acc = confident_model_eval(sess,
                                   x,
                                   y_soft,
                                   preds_pgd_adv,
                                   X_test,
                                   Y_test,
                                   args=eval_params)
    print('Test accuracy on PGD test examples: %0.4f\n' % pgd_acc)
    report.adv_train_adv_eval = pgd_acc

    # Save model
    if save:
        model_path = "models/zero_knowledge_gandef"
        vars_to_save = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                         scope='model_zero_knowledge_gandef*')
        assert len(vars_to_save) > 0
        saver = tf.train.Saver(var_list=vars_to_save)
        saver.save(sess, model_path)
        print('Model saved\n')
    else:
        print('Model not saved\n')