Пример #1
0
def load_attack(sess, attack_method, model, targeted, adv_ys, eps, batch_size):

    if attack_method == 'fgsm':
        from cleverhans.attacks import FastGradientMethod
        model_prob = lambda x: model.predict(x, softmax=True)
        attack = FastGradientMethod(model_prob, sess=sess)
        attack_params, yname = config_fgsm(targeted, adv_ys, eps, batch_size)

    if attack_method == 'pgd':
        from cleverhans.attacks import MadryEtAl
        model_prob = lambda x: model.predict(x, softmax=True)
        attack = MadryEtAl(model_prob, sess=sess)
        attack_params, yname = config_madry(targeted, adv_ys, eps, batch_size)

    if attack_method == 'mim':
        from cleverhans.attacks import MomentumIterativeMethod
        model_prob = lambda x: model.predict(x, softmax=True)
        attack = MomentumIterativeMethod(model_prob, sess=sess)
        attack_params, yname = config_mim(targeted, adv_ys, eps, batch_size)

    if attack_method == 'cw':
        from cleverhans.attacks import CarliniWagnerL2
        model_logit = lambda x: model.predict(x, softmax=False)
        attack = CarliniWagnerL2(model_logit, sess=sess)
        attack_params, yname = config_cw(targeted, adv_ys, eps, batch_size)

    return attack, attack_params, yname
Пример #2
0
    def test_generate_np_high_confidence_untargeted_examples(self):
        import tensorflow as tf

        def trivial_model(x):
            W1 = tf.constant([[1, -1]], dtype=tf.float32)
            res = tf.matmul(x, W1)
            return res

        for CONFIDENCE in [0, 2.3]:
            x_val = np.random.rand(10, 1) - .5
            x_val = np.array(x_val, dtype=np.float32)

            orig_labs = np.argmax(self.sess.run(trivial_model(x_val)), axis=1)
            attack = CarliniWagnerL2(trivial_model, sess=self.sess)
            x_adv = attack.generate_np(x_val,
                                       max_iterations=100,
                                       binary_search_steps=2,
                                       learning_rate=1e-2,
                                       initial_const=1,
                                       clip_min=-10,
                                       clip_max=10,
                                       confidence=CONFIDENCE,
                                       batch_size=10)

            new_labs = self.sess.run(trivial_model(x_adv))

            good_labs = new_labs[np.arange(10), 1 - orig_labs]
            bad_labs = new_labs[np.arange(10), orig_labs]

            assert np.mean(np.argmax(new_labs, axis=1) == orig_labs) == 0
            assert np.isclose(0,
                              np.min(good_labs - (bad_labs + CONFIDENCE)),
                              atol=1e-1)
Пример #3
0
    def test_generate_np_high_confidence_untargeted_examples(self):

        trivial_model = TrivialModel()

        for CONFIDENCE in [0, 2.3]:
            x_val = np.random.rand(10, 1) - .5
            x_val = np.array(x_val, dtype=np.float32)

            orig_labs = np.argmax(self.sess.run(
                trivial_model.get_logits(x_val)),
                                  axis=1)
            attack = CarliniWagnerL2(trivial_model, sess=self.sess)
            x_adv = attack.generate_np(x_val,
                                       max_iterations=100,
                                       binary_search_steps=2,
                                       learning_rate=1e-2,
                                       initial_const=1,
                                       clip_min=-10,
                                       clip_max=10,
                                       confidence=CONFIDENCE,
                                       batch_size=10)

            new_labs = self.sess.run(trivial_model.get_logits(x_adv))

            good_labs = new_labs[np.arange(10), 1 - orig_labs]
            bad_labs = new_labs[np.arange(10), orig_labs]

            self.assertTrue(
                np.mean(np.argmax(new_labs, axis=1) == orig_labs) == 0)
            self.assertTrue(
                np.isclose(0,
                           np.min(good_labs - (bad_labs + CONFIDENCE)),
                           atol=1e-1))
Пример #4
0
def carlini_wagner_attack(image_index):
    import logging

    # create logger with 'spam_application'
    logger = logging.getLogger('cleverhans')
    logger.setLevel(logging.DEBUG)
    filename, orig_image = importer.load_images_at_index(image_index)
    tf.reset_default_graph()
    sess = tf.Session()
    folder_path = os.path.join(config.ADVERSARIAL_FOLDER,
                               "carlini_wagner_base")
    os.makedirs(folder_path, exist_ok=True)
    with tf.Session() as sess:
        model = Inception_V3_Model(np.float32(orig_image))
        attack = CarliniWagnerL2(model, sess=sess)
        params = {
            'confidence': 0,
            'y': None,
            'max_iterations': 200,
            'learning_rate': 0.01,
            "num_labels": 1001,
            'initial_const': 10,
            'clip_min': -1,
            'clip_max': 1
        }

        variables = tf.get_collection(tf.GraphKeys.VARIABLES)
        saver = tf.train.Saver(variables)
        saver.restore(sess, importer.checkpoint_path)
        x_adv = attack.generate_np(orig_image, **params)
        print("generated shape:{}".format(x_adv.shape))
        utils.image_saver(x_adv, [filename], folder_path)
Пример #5
0
    def __init__(self,
                 model,
                 source_samples=2,
                 binary_search_steps=5,
                 cw_learning_rate=5e-3,
                 confidence=0,
                 attack_iterations=1000,
                 attack_initial_const=1e-2):
        super(Attack, self).__init__()

        model_wrap = KerasModelWrapper(model.model)
        self.model = model_wrap
        self.sess = model.sess

        self.x = model.input_ph
        self.y = Input(shape=(model.num_classes, ), dtype='float32')

        abort_early = True
        self.cw = CarliniWagnerL2(self.model, sess=self.sess)
        self.cw_params = {
            'binary_search_steps': binary_search_steps,
            "y": None,
            'abort_early': True,
            'max_iterations': attack_iterations,
            'learning_rate': cw_learning_rate,
            'batch_size': source_samples,
            'initial_const': attack_initial_const,
            'confidence': confidence,
            'clip_min': 0.0,
        }
def generate_adv_samples(wrap, generator, sess, file_names=None):
    if ATTACK_NAME == 'fgsm':
        attack_instance_graph = FastGradientMethod(wrap, sess)
        attack_instance = fgsm
    else:
        attack_instance_graph = CarliniWagnerL2(wrap, sess)
        attack_instance = cw

    diff_L2 = []

    if file_names is None:
        file_names = generator.get_file_names()
    image_index = 0

    TEN_LABEL = convert_to_one_hot(10, NB_CLASSES)
    NINETY_LABEL = convert_to_one_hot(90, NB_CLASSES)
    for legit_sample, legit_label in generator:

        ground_truth = np.argmax(legit_label)

        if ground_truth > 50:
            adv_x = attack_instance.attack(legit_sample, None, attack_instance_graph)
        else:
            adv_x = attack_instance.attack(legit_sample, None, attack_instance_graph)

        diff_L2.append(L2_distance(legit_sample, adv_x))

        save_image(ADV_DATASET_PATH + 'test/' + file_names[image_index], adv_x[0, :, :, :])
        image_index += 1

    print("Obtaining statistical information for L2 perturbation summed by channels")
    print_statistical_information(diff_L2)
Пример #7
0
def mnist_cw_attack(sample,
                    target,
                    model,
                    sess,
                    targeted=True,
                    attack_iterations=100):
    cw = CarliniWagnerL2(model, back='tf', sess=sess)

    if targeted:
        adv_input = sample
        adv_ys = target
        yname = "y_target"
    else:
        adv_input = sample
        adv_ys = None
        yname = "y"
    cw_params = {
        'binary_search_steps': 1,
        'abort_early': False,
        yname: adv_ys,
        'confidence': 1,
        'max_iterations': attack_iterations,
        'learning_rate': 0.1,
        #'batch_size': 1,
        'clip_min': 0.,
        'clip_max': 1.,
        'initial_const': 10
    }

    adv = cw.generate_np(adv_input, **cw_params)
    return adv
Пример #8
0
    def test_generate_np_high_confidence_targeted_examples(self):

        trivial_model = TrivialModel()

        for CONFIDENCE in [0, 2.3]:
            x_val = np.random.rand(10, 1) - .5
            x_val = np.array(x_val, dtype=np.float32)

            feed_labs = np.zeros((10, 2))
            feed_labs[np.arange(10), np.random.randint(0, 2, 10)] = 1
            attack = CarliniWagnerL2(trivial_model, sess=self.sess)
            x_adv = attack.generate_np(x_val,
                                       max_iterations=100,
                                       binary_search_steps=2,
                                       learning_rate=1e-2,
                                       initial_const=1,
                                       clip_min=-10,
                                       clip_max=10,
                                       confidence=CONFIDENCE,
                                       y_target=feed_labs,
                                       batch_size=10)

            new_labs = self.sess.run(trivial_model.get_logits(x_adv))

            good_labs = new_labs[np.arange(10), np.argmax(feed_labs, axis=1)]
            bad_labs = new_labs[np.arange(10),
                                1 - np.argmax(feed_labs, axis=1)]

            self.assertClose(CONFIDENCE,
                             np.min(good_labs - bad_labs),
                             atol=1e-1)
            self.assertTrue(
                np.mean(
                    np.argmax(new_labs, axis=1) == np.argmax(feed_labs,
                                                             axis=1)) > .9)
def prepare_attack(sess, args, model, adv_input, target_embeddings):
    if args.attack_type == 'FGSM':
        # Define FGSM for the model
        steps = 1
        # eps = args.eps
        alpha = args.eps / steps
        fgsm = FastGradientMethod(model)
        fgsm_params = {'eps': alpha, 'clip_min': 0., 'clip_max': 1.}
        adv_x = fgsm.generate(model.face_input, **fgsm_params)
    elif args.attack_type == 'CW':
        model.face_input.set_shape(np.shape(adv_input))
        # Instantiate a CW attack object
        cw = CarliniWagnerL2(model, sess)
        cw_params = {
            'binary_search_steps': 0,  # 1
            'max_iterations': 0,  # 100
            'learning_rate': .1,  # .2
            'batch_size': args.lfw_batch_size,
            'initial_const': 10
        }  # initial_const: 10, The initial tradeoff-constant to use to tune the
        # relative importance of size of the perturbation confidence of classification
        # adv_x = cw.generate_np(adv_input, **cw_params)
        feed_dict = {
            model.face_input: adv_input,
            model.victim_embedding_input: target_embeddings,
            model.batch_size: 10,
            model.phase_train: False
        }
        adv_x = cw.generate(model.face_input, feed_dict, **cw_params)

    return adv_x
def build_adv(make_obs_tf, q_func, num_actions, epsilon, noisy, attack=None):
    with tf.variable_scope('deepq', reuse=tf.AUTO_REUSE):
        obs_tf_in = U.ensure_tf_input(make_obs_tf("observation"))
        stochastic_ph_adv = tf.placeholder(tf.bool, (), name="stochastic_adv")
        update_eps_ph_adv = tf.placeholder(tf.float32, (),
                                           name="update_eps_adv")
        eps = tf.get_variable("eps", (),
                              initializer=tf.constant_initializer(0))
        update_eps_expr_adv = eps.assign(
            tf.cond(update_eps_ph_adv >= 0, lambda: update_eps_ph_adv,
                    lambda: eps))
        print("==========================================")

        #def wrapper(x):
        #    return q_func(x, num_actions, scope="q_func", reuse=True, concat_softmax=True, noisy=noisy)
        if attack == 'fgsm':
            adversary = FastGradientMethod(q_func(obs_tf_in.get(),
                                                  num_actions,
                                                  scope="q_func",
                                                  reuse=True,
                                                  concat_softmax=True,
                                                  noisy=noisy),
                                           sess=U.get_session())
            adv_observations = adversary.generate(
                obs_tf_in.get(), eps=epsilon, clip_min=0, clip_max=1.0) * 255.0
            print("----")
            print(adv_observations.shape)

        else:
            adversary = CarliniWagnerL2(q_func(obs_tf_in.get(),
                                               num_actions,
                                               scope="q_func",
                                               reuse=True,
                                               concat_softmax=True,
                                               noisy=noisy),
                                        sess=U.get_session())
            cw_params = {
                'binary_search_steps': 1,
                'max_iterations': 100,
                'learning_rate': 0.1,
                'initial_const': 10,
                'clip_min': 0,
                'clip_max': 1.0
            }
            adv_observations = adversary.generate(obs_tf_in.get(), **
                                                  cw_params) * 255.0
        # saveScreenPNG(b'test_image.png')

        craft_adv_obs = U.function(
            inputs=[obs_tf_in, stochastic_ph_adv, update_eps_ph_adv],
            outputs=adv_observations,
            givens={
                update_eps_ph_adv: -1.0,
                stochastic_ph_adv: True
            },
            updates=[update_eps_expr_adv])
        return craft_adv_obs
Пример #11
0
def init_attack(model, attack_params_dict, sess):
    """
    Initialize the adversarial attack using the cleverhans toolbox

    Parameters
    ----------
    model : Keras Model
        The model to attack

    attack_params_dict : dict
        Self-defined dictionary specifying the attack and its parameters

    sess : Session
        The current tf session

    Returns
    -------
    attack : cleverhans Attack
        The Attack object

    attack_params
        Dictionary with the value of the attack parameters, valid to generate
        adversarial examples with cleverhans.
    """

    # Wrapper for the Keras model
    model_wrap = KerasModelWrapper(model)

    # Initialize attack
    batch_size = None
    if attack_params_dict['attack'] == 'fgsm':
        attack = FastGradientMethod(model_wrap, sess=sess)
        attack_params = {'eps': attack_params_dict['eps'], 'clip_min': 0., 
                         'clip_max': 1.}
    elif attack_params_dict['attack'] == 'spsa':
        attack = SPSA(model_wrap, sess=sess)
        attack_params = {'epsilon': attack_params_dict['eps'], 
                         'num_steps': attack_params_dict['n_steps']}
        batch_size = 1
    elif attack_params_dict['attack'] == 'deepfool':
        attack = DeepFool(model_wrap, sess=sess)
        attack_params = {'clip_min': 0., 'clip_max': 1.}
    elif attack_params_dict['attack'] == 'pgd':
        attack = ProjectedGradientDescent(model_wrap, sess=sess)
        attack_params = {'eps': attack_params_dict['eps'], 
                         'eps_iter': attack_params_dict['eps_iter'],
                         'nb_iter': attack_params_dict['n_steps'],
                         'clip_min': 0., 'clip_max': 1.}
    elif attack_params_dict['attack'] == 'carlini':
        attack = CarliniWagnerL2(model_wrap, sess=sess)
        attack_params = {'clip_min': 0., 'clip_max': 1.}
    else:
        raise NotImplementedError()

    return attack, attack_params, batch_size
Пример #12
0
def eval_cleverhans():

    # Set test phase
    learning_phase = K.learning_phase()
    K.set_learning_phase(0)

    # Pre-process images
    images_tf = images.astype(K.floatx())
    images_tf /= 255.

    # Wrapper for the Keras model
    model_wrap = KerasModelWrapper(model)

    # Initialize attack
    if attack_params_dict['attack'] == 'fgsm':
        attack = FastGradientMethod(model_wrap, sess=K.get_session())
        attack_params = {'eps': attack_params_dict['eps'], 'clip_min': 0., 
                         'clip_max': 1.}
    elif attack_params_dict['attack'] == 'deepfool':
        attack = DeepFool(model_wrap, sess=K.get_session())
        attack_params = {'clip_min': 0., 'clip_max': 1.}
    elif attack_params_dict['attack'] == 'madry':
        attack = ProjectedGradientDescent(model_wrap, sess=K.get_session())
        attack_params = {'clip_min': 0., 'clip_max': 1.}
    elif attack_params_dict['attack'] == 'carlini':
        attack = CarliniWagnerL2(model_wrap, sess=K.get_session())
        attack_params = {'clip_min': 0., 'clip_max': 1.}
    else:
        raise NotImplementedError()

    # Define input TF placeholder
    x = tf.placeholder(K.floatx(), shape=(None,) + images.shape[1:])
    y = tf.placeholder(K.floatx(), shape=(None,) + (labels.shape[-1],))

    # Define adversarial predictions symbolically
    x_adv = attack.generate(x, **attack_params)
    x_adv = tf.stop_gradient(x_adv)
    predictions_adv = model(x_adv)

    # Evaluate the accuracy of the model on adversarial examples
    eval_par = {'batch_size': batch_size}
    # feed_dict = {K.learning_phase(): attack_params_dict['learning_phase']}
    # acc_adv = model_eval(K.get_session(), x, y, predictions_adv, images, 
    #                      labels, feed=feed_dict, args=eval_par)
    acc_adv = model_eval(K.get_session(), x, y, predictions_adv, images_tf, 
                         labels, args=eval_par)

    print('Aversarial accuracy against %s: %.4f\n' %
          (attack_params_dict['attack'], acc_adv))

    # Set original phase
    K.set_learning_phase(learning_phase)

    return acc_adv
Пример #13
0
 def _CW(self):
     cw_attack = CarliniWagnerL2(self.wrapped_model, sess=self.sess)
     params = {
         'batch_size': 1,
         'max_iterations': 1000,
         'binary_search_steps': 9,
         'initial_const': 1e-3,
         'learning_rate': 5e-3,
         'clip_min': 0.,
         'clip_max': 1.
     }
     adv_x = cw_attack.generate(self.x, **params)
     self.save_images(adv_x, self.save_loc)
Пример #14
0
def CW_attack_l2():
    tf_config = tf.ConfigProto()
    tf_config.gpu_options.allow_growth = True
    sess = tf.Session(config=tf_config)
    x_op = tf.placeholder(tf.float32, shape=(
        None,
        3,
        32,
        32,
    ))
    y_op = tf.placeholder(tf.float32, shape=(None, 10))

    # Convert pytorch model to a tf_model and wrap it in cleverhans
    tf_model_fn = convert_pytorch_model_to_tf(model)
    cleverhans_model = CallableModelWrapper(tf_model_fn, output_layer='logits')

    # Create an CW attack
    cw = CarliniWagnerL2(cleverhans_model, sess=sess)
    cw_params = {
        'binary_search_steps': 1,
        'max_iterations': 100,
        'batch_size': args.b,
        'clip_min': 0.,
        'clip_max': 1.,
        'y': y_op
    }

    adv_x_op = cw.generate(x_op, **cw_params)
    adv_preds_op = tf_model_fn(adv_x_op)

    # Evaluation against PGD attacks
    correct = 0
    total = 0
    for batch_idx, (inputs, targets) in enumerate(test_loader):
        adv = sess.run(adv_x_op,
                       feed_dict={
                           x_op: inputs,
                           y_op: torch.nn.functional.one_hot(targets, 10)
                       })
        diff = (torch.tensor(adv) - inputs).renorm(p=2, dim=0, maxnorm=0.5)
        adv = (inputs + diff).clamp(0., 1.)
        correct += model(adv).topk(1)[1][:, 0].eq(
            targets.cuda()).cpu().sum().item()
        total += len(inputs)

        sys.stdout.write("\rWhite-box CW l2 attack... Acc: %.3f%% (%d/%d)" %
                         (100. * correct / total, correct, total))
        sys.stdout.flush()

    print('Accuracy under CW l2 attack: %.3f%%' % (100. * correct / total))
Пример #15
0
    def setUp(self):
        super(TestCarliniWagnerL2, self).setUp()
        import tensorflow as tf

        # The world's simplest neural network
        def my_model(x):
            W1 = tf.constant([[1.5, .3], [-2, 0.3]], dtype=tf.float32)
            h1 = tf.nn.sigmoid(tf.matmul(x, W1))
            W2 = tf.constant([[-2.4, 1.2], [0.5, -2.3]], dtype=tf.float32)
            res = tf.matmul(x, W2)
            return res

        self.sess = tf.Session()
        self.model = my_model
        self.attack = CarliniWagnerL2(self.model, sess=self.sess)
def generate(sess,
             model,
             data_feeder,
             source,
             target,
             adv_dump_dir,
             nb_samples,
             learning_rate=0.1,
             confidence=0):
    wrap = KerasModelWrapper(model.model)
    cwl2 = CarliniWagnerL2(wrap, sess=sess)

    batch_size = 32
    max_iterations = 450
    abort_early = True
    bin_search_steps = 1
    cwl2_params = {
        'confidence': confidence,
        'learning_rate': learning_rate,
        'binary_search_steps': bin_search_steps,
        'batch_size': batch_size,
        'max_iterations': max_iterations,
        'abort_early': abort_early,
        'initial_const': 10,
        'clip_min': 0.0,
        'clip_max': 1.0,
        'y_target': data_feeder.get_labels(target, nb_samples)
    }

    craft_data = data_feeder.get_evasion_craft_data(source_class=source,
                                                    total_count=nb_samples)
    adv_data = cwl2.generate_np(craft_data, **cwl2_params)

    # Commit data
    adv_writer = AdversarialWriterEvasion(source_class=source,
                                          target_class=target,
                                          attack_params={
                                              'confidence': confidence,
                                              'learning_rate': learning_rate,
                                              'binary_search_steps':
                                              bin_search_steps,
                                              'batch_size': batch_size,
                                              'max_iterations': max_iterations,
                                              'abort_early': abort_early
                                          },
                                          adversarial_data_path=adv_dump_dir)
    adv_writer.batch_put(craft_data, adv_data)
    adv_writer.commit()
Пример #17
0
def main(_):
    # Images for inception classifier are normalized to be in [-1, 1] interval,
    # eps is a difference between pixels so it should be in [0, 2] interval.
    # Renormalizing epsilon from [0, 255] to [0, 2].
    batch_size = FLAGS.batch_size
    batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3]
    num_classes = 1001
    targeted = False
    tf.logging.set_verbosity(tf.logging.DEBUG)

    with tf.Graph().as_default():
    # Prepare graph
        x_input = tf.placeholder(tf.float32, shape=batch_shape)
        model = InceptionModel(num_classes)
        with tf.Session() as sess:
            cw_params = {'binary_search_steps': 10,
                         'confidence': 20,
                         # "y": np.random.randint(1000, size=(8, 1001)),
                         "y": np.zeros((16, 1001)),
                         'learning_rate': 0.1,
                         'max_iterations': 20000,
                         'abort_early': True,
                         'batch_size': batch_size * num_classes if
                         targeted else batch_size,
                         'initial_const': 0.01}

            cw = CarliniWagnerL2(model, back='tf', sess=sess)
            x_adv = cw.generate(x_input, **cw_params)
            saver = tf.train.Saver(slim.get_model_variables())
            session_creator = tf.train.ChiefSessionCreator(
                    scaffold=tf.train.Scaffold(saver=saver),
                    checkpoint_filename_with_path=FLAGS.checkpoint_path,
                    master=FLAGS.master)
            saver.restore(sess, FLAGS.checkpoint_path)
            sess.run(tf.global_variables_initializer())
            # with tf.train.MonitoredSession(session_creator=session_creator) as sess:
            i = 0
            for filenames, images in load_images(FLAGS.input_dir, batch_shape):
                adv_images = sess.run(x_adv, feed_dict={x_input: images})
                print "input images: ", images.shape
                #adv_images = cw.generate_np(images, **cw_params)
                i += 8
                print i
                print filenames
                print adv_images.shape
                # adv_images = cw.generate_np(
                save_images(adv_images, filenames, FLAGS.output_dir)
Пример #18
0
def attack(model, x_input, input_img):
    wrap = KerasModelWrapper(model)
    cw_params = {'binary_search_steps': 1,
                    'max_iterations': 5,
                    'learning_rate': 2e-3,
                    'batch_size': 1,
                    'initial_const': 0.1,
                    'confidence' : 0,
                    'clip_min': -1.,
                    'clip_max': 1.}
    cw = CarliniWagnerL2(wrap, sess=sess)
    adv = cw.generate(x=x_input, initial_const=0.1,
                        learning_rate=2e-3, clip_min=-1., clip_max=1., max_iterations=5)
    adv_img = sess.run(adv, feed_dict={x_input: input_img})
    #for i in range(2):
    #    print('iter:', i)
    #    adv_img = sess.run(adv, feed_dict={x_input: adv_img})
    return adv_img
Пример #19
0
def vgg16_evaluate():

    sess = tf.Session()
    keras.backend.set_session(sess)
    
    ##Load images for evaluation. Took Stanford 231n tiny set for testing (goldfish)
    images=[]
    target=np.ones(100)
    for index,myfile in enumerate(os.listdir(path)):
        if index==100:
            break
        if myfile.endswith('JPEG'):
            image=load_img(path+'/'+myfile,target_size=(224,224))
            inputs=img_to_array(image)
            inputs=inputs.reshape(1,inputs.shape[0],inputs.shape[1],inputs.shape[2])
            images.append(inputs)
            #target.append(np.zeros(1000))
            #target[-1][1]=1

    target=utils.to_categorical(target,1000)
    x_input=np.vstack(images)
    x_input=preprocess_input(x_input)
    model=VGG16(include_top=True, weights='imagenet', input_tensor=None, input_shape=None, pooling=None, classes=1000)
    model.compile(loss='categorical_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])

#    y=model.evaluate(x_input,target,verbose=1)
#    print(y)

    cw_attack=CarliniWagnerL2(model=model,back='tf',sess=sess)
    
    ##Untargeted cw_attack parameters
    cw_params = {'binary_search_steps': 1,
                 'y_target': None,
                 'max_iterations': 10,
                 'learning_rate': 0.1,
                 'batch_size': 100,
                 'initial_const': 10}
    adv_inputs=x_input[:]
    adv=cw_attack.generate_np(adv_inputs,**cw_params)
    
    adv_y=model.evaluate(adv,target,verbose=1)
    print(adv_y)
Пример #20
0
def get_CWL2_adversarial(targeted, xs, y_target, classifier, batch_size,
                         cwl2_confidence):

    #print(xs.shape, y_target.shape)
    #exit()
    ATTACK_BATCH = batch_size
    samples_range = int(xs.shape[0] / ATTACK_BATCH)

    wrap = KerasModelWrapper(classifier)
    attack = CarliniWagnerL2(wrap, sess=K.get_session())
    fgsm_params = {
        'confidence': cwl2_confidence,
        'max_iterations': 1000,
        'binary_search_steps': 9,
        'initial_const': 1,
        'clip_min': -5,
        'clip_max': 5,
        'batch_size': ATTACK_BATCH
    }

    if targeted:
        y_target = np.expand_dims(y_target, axis=1)

        attack_xs = attack.generate_np(xs[:ATTACK_BATCH, :, :, :],
                                       y_target=y_target[:ATTACK_BATCH],
                                       **fgsm_params)
        for ii in range(1, samples_range):
            print('iter', ii)
            new_attack_batch = attack.generate_np(
                xs[ii * ATTACK_BATCH:(ii + 1) * ATTACK_BATCH, :, :, :],
                y_target=y_target[ii * ATTACK_BATCH:(ii + 1) * ATTACK_BATCH],
                **fgsm_params)
            attack_xs = np.concatenate((attack_xs, new_attack_batch), axis=0)
    else:
        attack_xs = attack.generate_np(xs[:ATTACK_BATCH, :, :, :],
                                       **fgsm_params)
        for ii in range(1, samples_range):
            print('iter', ii)
            new_attack_batch = attack.generate_np(
                xs[ii * ATTACK_BATCH:(ii + 1) * ATTACK_BATCH, :, :, :],
                **fgsm_params)
            attack_xs = np.concatenate((attack_xs, new_attack_batch), axis=0)
    return attack_xs
    def __init__(self, model, targeted=False, confidence=0, batch_size=1, learning_rate=5e-3, binary_search_steps=5,
                 max_iterations=1000, abort_early=True, initial_const=1e-2, clip_min=-1, clip_max=1):
        super().__init__(model=model, clip_min=clip_min, clip_max=clip_max)
        self._targeted = targeted
        self._confidence = confidence
        self._batch_size = batch_size
        self._learning_rate = learning_rate
        self._binary_search_steps = binary_search_steps
        self._max_iterations = max_iterations
        self._abort_early = abort_early
        self._initial_const = initial_const

        with self.graph.as_default():
            self._method = CarliniWagnerL2(self._model, sess=self.session, confidence=self._confidence,
                                           batch_size=self._batch_size, learning_rate=self._learning_rate,
                                           binary_search_steps=self._binary_search_steps,
                                           max_iterations=self._max_iterations, abort_early=self._abort_early,
                                           initial_const=self._initial_const, clip_min=self._clip_min,
                                           clip_max=self._clip_max, targeted=self._targeted)
Пример #22
0
def cw_attack(sess, x, logits, n_ae, final=False):
    cw_attack_obj = CarliniWagnerL2(logits, sess=sess, back='tf')
    if final:
        cw_params = {'binary_search_steps': 9,
                     'max_iterations': 2000,
                     'learning_rate': 0.01,
                     'initial_const': 1.0,
                     'abort_early': True,
                     'batch_size': n_ae
                     }
    else:
        cw_params = {'binary_search_steps': 5,
                     'max_iterations': 500,
                     'learning_rate': 0.01,
                     'initial_const': 1.0,
                     'batch_size': n_ae  # need to specify, since CarliniWagnerL2 is not completely symbolic
                     }
    adv_ex_tensor = cw_attack_obj.generate(x, **cw_params)
    adv_ex_tensor = tf.stop_gradient(adv_ex_tensor)
    return adv_ex_tensor
Пример #23
0
def prepare_attack(sess, args, model, adv_input, target_embeddings):
    if args.attack_type == 'FGSM':
        # Define FGSM for the model
        steps = 1
        alpha = args.eps / steps
        fgsm = FastGradientMethod(model)
        fgsm_params = {'eps': alpha, 'clip_min': 0., 'clip_max': 1.}
        adv_x = fgsm.generate(model.face_input, **fgsm_params)
    elif args.attack_type == 'CW':
        model.face_input.set_shape(np.shape(adv_input))
        # Instantiate a CW attack object
        cw = CarliniWagnerL2(model, sess)
        cw_params = {
            'binary_search_steps': 1,  # 1
            'max_iterations': 100,  # 100
            'learning_rate': .2,  # .2
            'batch_size': args.lfw_batch_size,
            'initial_const': args.init_c,  # 10
            'confidence': 10
        }
        #              # model.batch_size: 10, model.phase_train: False}
        feed_dict = {
            model.face_input: adv_input,
            model.victim_embedding_input: target_embeddings
        }
        #              # model.batch_size: 10, model.phase_train: False}
        # adv_x = cw.generate(model.face_input, feed_dict, **cw_params)
        adv_x = cw.generate(model.face_input, **cw_params)
        # adv_x = cw.generate_np(adv_input, **cw_params)
        print('hello')
    elif args.attack_type == 'random':
        random_attack = Noise(model, sess)
        noise_params = {
            'eps': args.eps,
            'ord': np.inf,
            'clip_min': 0,
            'clip_max': 1
        }
        adv_x = random_attack.generate(model.face_input, **noise_params)

    return adv_x
Пример #24
0
def main(_):
    # Images for inception classifier are normalized to be in [-1, 1] interval,
    # eps is a difference between pixels so it should be in [0, 2] interval.
    # Renormalizing epsilon from [0, 255] to [0, 2].
    batch_shape = [1, FLAGS.image_height, FLAGS.image_width, 3]
    num_classes = 1001

    tf.logging.set_verbosity(tf.logging.INFO)

    with tf.Graph().as_default():
        # Prepare graph
        x_input = tf.placeholder(tf.float32, shape=batch_shape)
        model = InceptionModel(num_classes)
        probs = model(x_input)

        cw = CarliniWagnerL2(model)
        cw_params = {
            'binary_search_steps': 1,
            'max_iterations': 5,
            'learning_rate': 0.1,
            'batch_size': 1,
            'initial_const': 10,
            'clip_min': -1.,
            'clip_max': 1.
        }

        x_adv = cw.generate(x=x_input, **cw_params)

        # Run computation
        saver = tf.train.Saver(slim.get_model_variables())
        session_creator = tf.train.ChiefSessionCreator(
            scaffold=tf.train.Scaffold(saver=saver),
            checkpoint_filename_with_path=FLAGS.checkpoint_path,
            master=FLAGS.master)

        with tf.train.MonitoredSession(
                session_creator=session_creator) as sess:
            for filenames, images in load_images(FLAGS.input_dir, batch_shape):
                adv_images = sess.run(x_adv, feed_dict={x_input: images})
Пример #25
0
    def calc_radius(self, input, label, norm_type, upper=0.5, eps=1e-2):

        # only support L2 norm
        assert norm_type == '2'

        xs = input.unsqueeze(0)
        clean_preds = self.model(xs.cuda()).detach().cpu().numpy()
        clean_pred = np.argmax(clean_preds[0])
        if clean_pred != label:
            return 0.

        x_op = tf.placeholder(tf.float32,
                              shape=(
                                  None,
                                  input.shape[0],
                                  input.shape[1],
                                  input.shape[2],
                              ))
        attk = CarliniWagnerL2(self.ch_model, sess=self.sess)
        params = {
            'y': tf.one_hot([label], get_num_classes(self.dataset)),
            'clip_min': 0.0,
            'clip_max': 1.0,
            'max_iterations': 1000
        }
        adv_x = attk.generate(x_op, **params)
        adv_preds_op = self.tf_model(adv_x)

        (adv_preds, adv_xsamp) = self.sess.run((adv_preds_op, adv_x),
                                               feed_dict={x_op: xs})

        adv_pred = np.argmax(adv_preds[0])
        if adv_pred == label:
            # fail to find out adv example, return the radius to be the maximum one
            return la.norm(np.ones_like(adv_xsamp.reshape(-1)) * 0.5, 2)
        else:
            dist = la.norm(adv_xsamp.reshape(-1) - xs.numpy().reshape(-1), 2)
            return dist
def mnist_tutorial_jsma(train_start=0, train_end=5500, test_start=0,
                        test_end=1000, nb_epochs=8,
                        batch_size=100, nb_classes=10,
                        nb_filters=64,
                        learning_rate=0.001):
    """
    MNIST tutorial for the Jacobian-based saliency map approach (JSMA)
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param nb_classes: number of output classes
    :param learning_rate: learning rate for training
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y = tf.placeholder(tf.float32, shape=(None, 10))

    # Define TF model graph
    model = make_basic_cnn()
    preds = model(x)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    # sess.run(tf.global_variables_initializer())
    rng = np.random.RandomState([2017, 8, 30])

    print("x_train shape: ", X_train.shape)
    print("y_train shape: ", Y_train.shape)

    # do not log
    model_train(sess, x, y, preds, X_train, Y_train, args=train_params,verbose=False,
                rng=rng)

    f_out_clean = open("Clean_jsma_elastic_against5.log", "w")

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, X_test, Y_test, args=eval_params)
    assert X_test.shape[0] == test_end - test_start, X_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    f_out_clean.write('Test accuracy on legitimate test examples: ' + str(accuracy) + '\n')


    # Clean test against JSMA
    jsma_params = {'theta': 1., 'gamma': 0.1,
                   'clip_min': 0., 'clip_max': 1.,
                   'y_target': None}

    jsma = SaliencyMapMethod(model, back='tf', sess=sess)
    adv_x_jsma = jsma.generate(x, **jsma_params)
    preds_adv_jsma = model.get_probs(adv_x_jsma)

    # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
    acc = model_eval(sess, x, y, preds_adv_jsma, X_test, Y_test, args=eval_params)
    print('Clean test accuracy on JSMA adversarial examples: %0.4f' % acc)
    f_out_clean.write('Clean test accuracy on JSMA adversarial examples: ' + str(acc) + '\n')

    ################################################################
    # Clean test against FGSM
    fgsm_params = {'eps': 0.3,
                   'clip_min': 0.,
                   'clip_max': 1.}

    fgsm = FastGradientMethod(model, sess=sess)
    adv_x_fgsm = fgsm.generate(x, **fgsm_params)
    preds_adv_fgsm = model.get_probs(adv_x_fgsm)

    # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
    acc = model_eval(sess, x, y, preds_adv_fgsm, X_test, Y_test, args=eval_params)
    print('Clean test accuracy on FGSM adversarial examples: %0.4f' % acc)
    f_out_clean.write('Clean test accuracy on FGSM adversarial examples: ' + str(acc) + '\n')


    ################################################################
    # Clean test against BIM
    bim_params = {'eps': 0.3,
                  'eps_iter': 0.01,
                  'nb_iter': 100,
                  'clip_min': 0.,
                  'clip_max': 1.}
    bim = BasicIterativeMethod(model, sess=sess)
    adv_x_bim = bim.generate(x, **bim_params)
    preds_adv_bim = model.get_probs(adv_x_bim)

    # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
    acc = model_eval(sess, x, y, preds_adv_bim, X_test, Y_test, args=eval_params)
    print('Clean test accuracy on BIM adversarial examples: %0.4f' % acc)
    f_out_clean.write('Clean test accuracy on BIM adversarial examples: ' + str(acc) + '\n')

    ################################################################
    # Clean test against EN
    en_params = {'binary_search_steps': 1,
                 # 'y': None,
                 'max_iterations': 100,
                 'learning_rate': 0.1,
                 'batch_size': batch_size,
                 'initial_const': 10}
    en = ElasticNetMethod(model, back='tf', sess=sess)
    adv_x_en = en.generate(x, **en_params)
    preds_adv_en = model.get_probs(adv_x_en)

    # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
    acc = model_eval(sess, x, y, preds_adv_en, X_test, Y_test, args=eval_params)
    print('Clean test accuracy on EN adversarial examples: %0.4f' % acc)
    f_out_clean.write('Clean test accuracy on EN adversarial examples: ' + str(acc) + '\n')
    ################################################################
    # Clean test against DF
    deepfool_params = {'nb_candidate': 10,
                       'overshoot': 0.02,
                       'max_iter': 50,
                       'clip_min': 0.,
                       'clip_max': 1.}
    deepfool = DeepFool(model, sess=sess)
    adv_x_df = deepfool.generate(x, **deepfool_params)
    preds_adv_df = model.get_probs(adv_x_df)

    # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
    acc = model_eval(sess, x, y, preds_adv_df, X_test, Y_test, args=eval_params)
    print('Clean test accuracy on DF adversarial examples: %0.4f' % acc)
    f_out_clean.write('Clean test accuracy on DF adversarial examples: ' + str(acc) + '\n')

    ################################################################
    # Clean test against VAT
    vat_params = {'eps': 2.0,
                  'num_iterations': 1,
                  'xi': 1e-6,
                  'clip_min': 0.,
                  'clip_max': 1.}
    vat = VirtualAdversarialMethod(model, sess=sess)
    adv_x_vat = vat.generate(x, **vat_params)
    preds_adv_vat = model.get_probs(adv_x_vat)

    # Evaluate the accuracy of the MNIST model on FGSM adversarial examples
    acc = model_eval(sess, x, y, preds_adv_vat, X_test, Y_test, args=eval_params)
    print('Clean test accuracy on VAT adversarial examples: %0.4f\n' % acc)
    f_out_clean.write('Clean test accuracy on VAT adversarial examples: ' + str(acc) + '\n')

    f_out_clean.close()

    ###########################################################################
    # Craft adversarial examples using the Jacobian-based saliency map approach
    ###########################################################################
    print('Crafting ' + str(X_train.shape[0]) + ' * ' + str(nb_classes-1) +
          ' adversarial examples')


    model_2 = make_basic_cnn()
    preds_2 = model(x)

    # need this for constructing the array
    sess.run(tf.global_variables_initializer())

    # run this again
    # sess.run(tf.global_variables_initializer())

    # 1. Instantiate a SaliencyMapMethod attack object
    jsma = SaliencyMapMethod(model_2, back='tf', sess=sess)
    jsma_params = {'theta': 1., 'gamma': 0.1,
                   'clip_min': 0., 'clip_max': 1.,
                   'y_target': None}
    adv_random = jsma.generate(x, **jsma_params)
    preds_adv_random = model_2.get_probs(adv_random)

    # 2. Instantiate FGSM attack
    fgsm_params = {'eps': 0.3,
                   'clip_min': 0.,
                   'clip_max': 1.}
    fgsm = FastGradientMethod(model_2, sess=sess)
    adv_x_fgsm = fgsm.generate(x, **fgsm_params)
    preds_adv_fgsm = model_2.get_probs(adv_x_fgsm)


    # 3. Instantiate Elastic net attack
    en_params = {'binary_search_steps': 5,
         #'y': None,
         'max_iterations': 100,
         'learning_rate': 0.1,
         'batch_size': batch_size,
         'initial_const': 10}
    enet = ElasticNetMethod(model_2, sess=sess)
    adv_x_en = enet.generate(x, **en_params)
    preds_adv_elastic_net = model_2.get_probs(adv_x_en)

    # 4. Deepfool
    deepfool_params = {'nb_candidate':10,
                       'overshoot':0.02,
                       'max_iter': 50,
                       'clip_min': 0.,
                       'clip_max': 1.}
    deepfool = DeepFool(model_2, sess=sess)
    adv_x_df = deepfool.generate(x, **deepfool_params)
    preds_adv_deepfool = model_2.get_probs(adv_x_df)

    # 5. Base Iterative
    bim_params = {'eps': 0.3,
                  'eps_iter': 0.01,
                  'nb_iter': 100,
                  'clip_min': 0.,
                  'clip_max': 1.}
    base_iter = BasicIterativeMethod(model_2, sess=sess)
    adv_x_bi = base_iter.generate(x, **bim_params)
    preds_adv_base_iter = model_2.get_probs(adv_x_bi)

    # 6. C & W Attack
    cw = CarliniWagnerL2(model_2, back='tf', sess=sess)
    cw_params = {'binary_search_steps': 1,
                 # 'y': None,
                 'max_iterations': 100,
                 'learning_rate': 0.1,
                 'batch_size': batch_size,
                 'initial_const': 10}
    adv_x_cw = cw.generate(x, **cw_params)
    preds_adv_cw = model_2.get_probs(adv_x_cw)

    #7
    vat_params = {'eps': 2.0,
                  'num_iterations': 1,
                  'xi': 1e-6,
                  'clip_min': 0.,
                  'clip_max': 1.}
    vat = VirtualAdversarialMethod(model_2, sess=sess)
    adv_x = vat.generate(x, **vat_params)
    preds_adv_vat = model_2.get_probs(adv_x)


    # ==> generate 10 targeted classes for every train data regardless
    # This call runs the Jacobian-based saliency map approach
    # Loop over the samples we want to perturb into adversarial examples

    X_train_adv_set = []
    Y_train_adv_set = []
    for index in range(X_train.shape[0]):
        print('--------------------------------------')
        x_val = X_train[index:(index+1)]
        y_val = Y_train[index]


        # add normal sample in!!!!
        X_train_adv_set.append(x_val)
        Y_train_adv_set.append(y_val)

        # We want to find an adversarial example for each possible target class
        # (i.e. all classes that differ from the label given in the dataset)
        current_class = int(np.argmax(y_val))
        target_classes = other_classes(nb_classes, current_class)
        # Loop over all target classes
        for target in target_classes:
            # print('Generating adv. example for target class %i' % target)
            # This call runs the Jacobian-based saliency map approach

            one_hot_target = np.zeros((1, nb_classes), dtype=np.float32)
            one_hot_target[0, target] = 1
            jsma_params['y_target'] = one_hot_target
            adv_x = jsma.generate_np(x_val, **jsma_params)

            # append to X_train_adv_set and Y_train_adv_set
            X_train_adv_set.append(adv_x)
            Y_train_adv_set.append(y_val)

            # shape is: (1, 28, 28, 1)
            # print("adv_x shape is: ", adv_x.shape)

            # check for success rate
            # res = int(model_argmax(sess, x, preds, adv_x) == target)

    print('-------------Finished Generating Np Adversarial Data-------------------------')

    X_train_data = np.concatenate(X_train_adv_set, axis=0)
    Y_train_data = np.stack(Y_train_adv_set, axis=0)
    print("X_train_data shape is: ", X_train_data.shape)
    print("Y_train_data shape is: ", Y_train_data.shape)

    # saves the output so later no need to re-fun file
    np.savez("jsma_training_data.npz", x_train=X_train_data
             , y_train=Y_train_data)

    # >>> data = np.load('/tmp/123.npz')
    # >>> data['a']

    f_out = open("Adversarial_jsma_elastic_against5.log", "w")

    # evaluate the function against 5 attacks
    # fgsm, base iterative, jsma, elastic net, and deepfool
    def evaluate_against_all():
            # 1 Clean Data
            eval_params = {'batch_size': batch_size}
            accuracy = model_eval(sess, x, y, preds, X_test, Y_test,
                                  args=eval_params)
            print('Legitimate accuracy: %0.4f' % accuracy)

            tmp = 'Legitimate accuracy: '+ str(accuracy) + "\n"
            f_out.write(tmp)


            # 2 JSMA
            accuracy = model_eval(sess, x, y, preds_adv_random, X_test,
                                  Y_test, args=eval_params)

            print('JSMA accuracy: %0.4f' % accuracy)
            tmp = 'JSMA accuracy:'+ str(accuracy) + "\n"
            f_out.write(tmp)


            # 3 FGSM
            accuracy = model_eval(sess, x, y, preds_adv_fgsm, X_test,
                                  Y_test, args=eval_params)

            print('FGSM accuracy: %0.4f' % accuracy)
            tmp = 'FGSM accuracy:' + str(accuracy) + "\n"
            f_out.write(tmp)

            # 4 Base Iterative
            accuracy = model_eval(sess, x, y, preds_adv_base_iter, X_test,
                                  Y_test, args=eval_params)

            print('Base Iterative accuracy: %0.4f' % accuracy)
            tmp = 'Base Iterative accuracy:' + str(accuracy) + "\n"
            f_out.write(tmp)

            # 5 Elastic Net
            accuracy = model_eval(sess, x, y, preds_adv_elastic_net, X_test,
                                  Y_test, args=eval_params)

            print('Elastic Net accuracy: %0.4f' % accuracy)
            tmp = 'Elastic Net accuracy:' + str(accuracy) + "\n"
            f_out.write(tmp)

            # 6 DeepFool
            accuracy = model_eval(sess, x, y, preds_adv_deepfool, X_test,
                                  Y_test, args=eval_params)
            print('DeepFool accuracy: %0.4f' % accuracy)
            tmp = 'DeepFool accuracy:' + str(accuracy) + "\n"
            f_out.write(tmp)

            # 7 C & W Attack
            accuracy = model_eval(sess, x, y, preds_adv_cw, X_test,
                                  Y_test, args=eval_params)
            print('C & W accuracy: %0.4f' % accuracy)
            tmp = 'C & W  accuracy:' + str(accuracy) + "\n"
            f_out.write(tmp)
            f_out.write("*******End of Epoch***********\n\n")

            # 8 Virtual Adversarial
            accuracy = model_eval(sess, x, y, preds_adv_vat, X_test,
                                  Y_test, args=eval_params)
            print('VAT accuracy: %0.4f' % accuracy)
            tmp = 'VAT accuracy:' + str(accuracy) + "\n"
            f_out.write(tmp)
            f_out.write("*******End of Epoch***********\n\n")

            print("*******End of Epoch***********\n\n")

        # report.adv_train_adv_eval = accuracy

    print("Now Adversarial Training with Elastic Net  + modified X_train and Y_train")
    # trained_model.out
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': '/home/stephen/PycharmProjects/jsma-runall-mac/',
        'filename': 'trained_model.out'
    }
    model_train(sess, x, y, preds_2, X_train_data, Y_train_data,
                 predictions_adv=preds_adv_elastic_net,
                evaluate=evaluate_against_all, verbose=False,
                args=train_params, rng=rng)


    # Close TF session
    sess.close()
    return report
Пример #27
0
def main(type="Resnet", dataset="CIFAR10", attack_type="FGM"):

    size = 256
    eval_params = {'batch_size': 128}

    ############################################# Prepare the Data #####################################################

    if dataset == 'CIFAR10':
        (_, _), (x_test, y_test) = prepare_CIFAR10()
        num_classes = 10
        input_dim = 32
    elif dataset == 'CIFAR100':
        (_, _), (x_test, y_test) = prepare_CIFAR100()
        num_classes = 100
        input_dim = 32
    else:
        (_, _), (x_test, y_test) = prepare_SVHN("./Data/")
        num_classes = 10
        input_dim = 32

    x_test = x_test / 255.
    y_test = keras.utils.to_categorical(y_test, num_classes)

    ############################################# Prepare the Data #####################################################


    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:

        # prepare the placeholders
        x = tf.placeholder(tf.float32, [None, input_dim, input_dim, 3])
        y = tf.placeholder(tf.float32, [None, num_classes])

        input_output = []
        def modelBuilder(x, num_classes, dataset, type, sess, input_output):

            if len(input_output) == 0:

                reuse = False

                # Model/Graph
                if type == 'End2End':
                    _, tf_model = \
                        prepare_GBP_End2End(num_classes,
                                            inputT=x, sess=sess,
                                            checkpoint_dir='./{}_{}/'.format(dataset, type), reuse=reuse)
                else:

                    _, tf_model = \
                        prepare_Resnet(num_classes,
                                       inputT=x, sess=sess,
                                       checkpoint_dir='./{}_{}/'.format(dataset, type), reuse=reuse)

                input_output.append(x)
                input_output.append(tf_model.logits)

            else:

                reuse = True

                # Model/Graph
                if type == 'End2End':
                    _, tf_model = \
                        prepare_GBP_End2End(num_classes, inputT=x, reuse=reuse)
                else:
                    _, tf_model = \
                        prepare_Resnet(num_classes, inputT=x, reuse=reuse)

                input_output.append(x)
                input_output.append(tf_model.logits)


            return tf_model.logits

        # create an attackable model for the cleverhans
        model = CallableModelWrapper(lambda placeholder: modelBuilder(placeholder, num_classes, dataset, type, sess, input_output), 'logits')

        # TODO: check the configurations
        if attack_type == "FGM": # pass
            attack = FastGradientMethod(model, back='tf', sess=sess)
            params = {
                'eps' : 0.06,
                'clip_min': 0.,
                'clip_max': 1.
            }
        elif attack_type == "CWL2": # pass
            attack = CarliniWagnerL2(model, back='tf', sess=sess)
            params = {
                'confidence': 0.9,
                'batch_size': 128,
                'learning_rate': 0.005,
            }
        elif attack_type == "DF": # pass
            attack = DeepFool(model, back='tf', sess=sess)
            params = {
            }
        elif attack_type == "ENM": # configurations checked, quickly tested
            attack = ElasticNetMethod(model, back='tf', sess=sess)
            params = {
                'confidence': 0.9,
                'batch_size': 128,
                'learning_rate': 0.005,
            }
        elif attack_type == "FFA": # configuration checked
            attack = FastFeatureAdversaries(model, back='tf', sess=sess)
            params = {
                'eps': 0.06,
                'eps_iter': 0.005,
                'clip_min': 0.,
                'clip_max': 1.
            }
        elif attack_type == "LBFGS":
            attack = LBFGS(model, back='tf', sess=sess)
            params = {
                'eps': 0.06,
                'clip_min': 0.,
                'clip_max': 1.
            }
        elif attack_type == "MEA":
            attack = MadryEtAl(model, back='tf', sess=sess)
            params = {
                'eps': 0.06,
                'clip_min': 0.,
                'clip_max': 1.
            }
        elif attack_type == "MIM":
            attack = MomentumIterativeMethod(model, back='tf', sess=sess)
            params = {
                'eps': 0.06,
                'clip_min': 0.,
                'clip_max': 1.
            }
        elif attack_type == "SMM":
            attack = SaliencyMapMethod(model, back='tf', sess=sess)
            params = {
                'eps': 0.06,
                'clip_min': 0.,
                'clip_max': 1.
            }
        elif attack_type == "SPSA":
            attack = SPSA(model, back='tf', sess=sess)
            params = {
                'eps': 0.06,
                'clip_min': 0.,
                'clip_max': 1.
            }
        elif attack_type == "VATM":
            attack = vatm(model, back='tf', sess=sess)
            params = {
                'eps': 0.06,
                'clip_min': 0.,
                'clip_max': 1.
            }
        elif attack_type == "VAM":
            attack = VirtualAdversarialMethod(model, back='tf', sess=sess)
            params = {
                'eps': 0.06,
                'clip_min': 0.,
                'clip_max': 1.
            }
        else:
            raise Exception("I don't recognize {} this attack type. I will use FGM instead.".format(attack_type))

        # tf operation
        adv_x = attack.generate(x, **params)

        # generate the adversarial examples
        adv_vals = sess.run(adv_x, feed_dict={x: x_test[:size]})

        # notice that "adv_vals" may contain NANs because of the failure of the attack
        # also the input may not be perturbed at all because of the failure of the attack
        to_delete = []
        for idx, adv in enumerate(adv_vals):
            # for nan
            if np.isnan(adv).any():
                to_delete.append(idx)
            # for no perturbation
            if np.array_equiv(adv, x_test[idx]):
                to_delete.append(idx)

        # cleanings
        adv_vals_cleaned = np.delete(adv_vals, to_delete, axis=0)
        ori_cleaned = np.delete(x_test[:size], to_delete, axis=0)
        y_cleaned = np.delete(y_test[:size], to_delete, axis=0)

        if len(adv_vals_cleaned) == 0:
            print("No adversarial example is generated!")
            return

        print("{} out of {} adversarial examples are generated.".format(len(adv_vals_cleaned), size))

        print("The average L_inf distortion is {}".format(
            np.mean([np.max(np.abs(adv - ori_cleaned[idx])) for idx, adv in enumerate(adv_vals_cleaned)])))

        # TODO: visualize the adv_vals

        accuracy = model_eval(sess, input_output[0], y, tf.nn.softmax(input_output[1]), x_test[:size], y_test[:size],
                              args=eval_params)
        print('Test accuracy on normal examples: %0.4f' % accuracy)

        accuracy = model_eval(sess, input_output[0], y, tf.nn.softmax(input_output[1]), adv_vals_cleaned, y_cleaned,
                              args=eval_params)
        print('Test accuracy on adversarial examples: %0.4f' % accuracy)
Пример #28
0
def iterate_through_cwl2_attacks():
    tf.logging.set_verbosity(tf.logging.INFO)
    input_dir = FLAGS.input_image_dir
    metadata_file_path = FLAGS.metadata_file_path
    num_images = len(os.listdir(input_dir))
    batch_shape = (num_images, 299, 299, 3)
    num_classes = 1001
    batch_size = attack_name_to_params[ATTACKS.CARLINI_WAGNER]['batch_size']
    images, labels, target_classes = load_images(input_dir, metadata_file_path, batch_shape,
                                                 num_classes)

    list_param_dict = expand_param_dict(
        attack_name_to_params[ATTACKS.CARLINI_WAGNER],
        attack_name_to_configurable_params[ATTACKS.CARLINI_WAGNER]
    )

    save_dir = 'saves'
    os.makedirs(save_dir, exist_ok=True)

    for idx, params in enumerate(list_param_dict):
        tf.reset_default_graph()

        logger.info('Running attack with parameters: {}'.format(params))
        logger.info('Current index of parameters: {}/{}'.format(idx, len(list_param_dict)))

        # Get save path
        adv_imgs_save_path = get_attack_images_filename_prefix(
            attack_name=ATTACKS.CARLINI_WAGNER,
            params=params,
            model='inception',
            targeted_prefix='targeted'
        )
        adv_imgs_save_path = os.path.join(save_dir, adv_imgs_save_path)

        # Run inference
        graph = tf.Graph()
        with graph.as_default():
            sess = tf.Session(graph=graph)
            # Prepare graph
            x_input = tf.placeholder(tf.float32, shape=(batch_size,) + batch_shape[1:])
            y_label = tf.placeholder(tf.int32, shape=(batch_size, num_classes))
            y_target = tf.placeholder(tf.int32, shape=(batch_size, num_classes))
            model = InceptionModel(num_classes)

            cwl2 = True
            if cwl2:
                attack = CarliniWagnerL2(model=model, sess=sess)
                x_adv = attack.generate(x_input, y_target=y_target, **params)
            else:
                attack = SPSA(model=model)
                x_adv = attack.generate(x_input, y_target=y_label, epsilon=4. / 255, num_steps=30,
                                        early_stop_loss_threshold=-1., batch_size=32, spsa_iters=16,
                                        is_debug=True)

            logits = model.get_logits(x_input)
            acc = _top_k_accuracy(logits, tf.argmax(y_label, axis=1), k=1)
            success_rate = _top_k_accuracy(logits, tf.argmax(y_target, axis=1), k=1)

            # Run computation
            saver = tf.train.Saver(slim.get_model_variables())
            saver.restore(sess, save_path=FLAGS.checkpoint_path)

            list_adv_images = []

            if num_images % batch_size == 0:
                num_batches = int(num_images / batch_size)
            else:
                num_batches = int(num_images / batch_size + 1)

            for i in tqdm.tqdm(range(num_batches)):
                feed_dict_i = {x_input: images[i * batch_size:(i + 1) * batch_size],
                               y_target: target_classes[i * batch_size:(i + 1) * batch_size]}
                adv_img = sess.run(x_adv, feed_dict=feed_dict_i)
                list_adv_images.append(adv_img)

            adv_images = np.concatenate((list_adv_images))
            np.save(adv_imgs_save_path, adv_images)

            acc_store = []
            succ_store = []
            for i in tqdm.tqdm(range(num_batches)):
                feed_dict_i = {x_input: adv_images[i * batch_size:(i + 1) * batch_size],
                               y_target: target_classes[i * batch_size:(i + 1) * batch_size],
                               y_label: labels[i * batch_size:(i + 1) * batch_size]}
                succ_batch, acc_batch = sess.run([success_rate, acc],
                                                 feed_dict=feed_dict_i)
                acc_store.extend(acc_batch)
                succ_store.extend(succ_batch)

            logger.info('Accuracy is: {:.4f}'.format(np.mean(acc_store)))
            logger.info('Success Rate is: {:.4f}'.format(np.mean(succ_store)))
Пример #29
0
def mnist_tutorial_cw(train_start=0,
                      train_end=60000,
                      test_start=0,
                      test_end=10000,
                      viz_enabled=True,
                      nb_epochs=6,
                      batch_size=128,
                      source_samples=10,
                      learning_rate=0.001,
                      attack_iterations=100,
                      model_path=os.path.join("models", "mnist"),
                      targeted=True):
    """
    MNIST tutorial for Carlini and Wagner's attack
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param viz_enabled: (boolean) activate plots of adversarial examples
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param nb_classes: number of output classes
    :param source_samples: number of test inputs to attack
    :param learning_rate: learning rate for training
    :param model_path: path to the model file
    :param targeted: should we run a targeted attack? or untargeted?
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Create TF session
    sess = tf.Session()
    print("Created TensorFlow session.")

    set_log_level(logging.DEBUG)

    # Get MNIST test data
    x_train, y_train, x_test, y_test = data_mnist(train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Obtain Image Parameters
    img_rows, img_cols, nchannels = x_train.shape[1:4]
    nb_classes = y_train.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    nb_filters = 64

    # Define TF model graph
    model = ModelBasicCNN('model1', nb_classes, nb_filters)
    preds = model.get_logits(x)
    loss = LossCrossEntropy(model, smoothing=0.1)
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'train_dir': os.path.join(*os.path.split(model_path)[:-1]),
        'filename': os.path.split(model_path)[-1]
    }

    rng = np.random.RandomState([2017, 8, 30])
    # check if we've trained before, and if we have, use that pre-trained model
    if os.path.exists(model_path + ".meta"):
        tf_model_load(sess, model_path)
    else:
        train(sess,
              loss,
              x,
              y,
              x_train,
              y_train,
              args=train_params,
              save=os.path.exists("models"),
              rng=rng)

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, x_test, y_test, args=eval_params)
    assert x_test.shape[0] == test_end - test_start, x_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Craft adversarial examples using Carlini and Wagner's approach
    ###########################################################################
    nb_adv_per_sample = str(nb_classes - 1) if targeted else '1'
    print('Crafting ' + str(source_samples) + ' * ' + nb_adv_per_sample +
          ' adversarial examples')
    print("This could take some time ...")

    # Instantiate a CW attack object
    cw = CarliniWagnerL2(model, back='tf', sess=sess)

    if viz_enabled:
        assert source_samples == nb_classes
        idxs = [
            np.where(np.argmax(y_test, axis=1) == i)[0][0]
            for i in range(nb_classes)
        ]
    if targeted:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, nb_classes, img_rows, img_cols,
                          nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            adv_inputs = np.array([[instance] * nb_classes
                                   for instance in x_test[idxs]],
                                  dtype=np.float32)
        else:
            adv_inputs = np.array([[instance] * nb_classes
                                   for instance in x_test[:source_samples]],
                                  dtype=np.float32)

        one_hot = np.zeros((nb_classes, nb_classes))
        one_hot[np.arange(nb_classes), np.arange(nb_classes)] = 1

        adv_inputs = adv_inputs.reshape(
            (source_samples * nb_classes, img_rows, img_cols, nchannels))
        adv_ys = np.array([one_hot] * source_samples,
                          dtype=np.float32).reshape(
                              (source_samples * nb_classes, nb_classes))
        yname = "y_target"
    else:
        if viz_enabled:
            # Initialize our array for grid visualization
            grid_shape = (nb_classes, 2, img_rows, img_cols, nchannels)
            grid_viz_data = np.zeros(grid_shape, dtype='f')

            adv_inputs = x_test[idxs]
        else:
            adv_inputs = x_test[:source_samples]

        adv_ys = None
        yname = "y"

    cw_params = {
        'binary_search_steps': 1,
        yname: adv_ys,
        'max_iterations': attack_iterations,
        'learning_rate': 0.1,
        'batch_size':
        source_samples * nb_classes if targeted else source_samples,
        'initial_const': 10
    }

    adv = cw.generate_np(adv_inputs, **cw_params)

    eval_params = {'batch_size': np.minimum(nb_classes, source_samples)}
    if targeted:
        adv_accuracy = model_eval(sess,
                                  x,
                                  y,
                                  preds,
                                  adv,
                                  adv_ys,
                                  args=eval_params)
    else:
        if viz_enabled:
            adv_accuracy = 1 - \
                model_eval(sess, x, y, preds, adv, y_test[
                           idxs], args=eval_params)
        else:
            adv_accuracy = 1 - \
                model_eval(sess, x, y, preds, adv, y_test[
                           :source_samples], args=eval_params)

    if viz_enabled:
        for j in range(nb_classes):
            if targeted:
                for i in range(nb_classes):
                    grid_viz_data[i, j] = adv[i * nb_classes + j]
            else:
                grid_viz_data[j, 0] = adv_inputs[j]
                grid_viz_data[j, 1] = adv[j]

        print(grid_viz_data.shape)

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    print('Avg. rate of successful adv. examples {0:.4f}'.format(adv_accuracy))
    report.clean_train_adv_eval = 1. - adv_accuracy

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(
        np.sum((adv - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

    # Close TF session
    sess.close()

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
        import matplotlib.pyplot as plt
        _ = grid_visual(grid_viz_data)

    return report
Пример #30
0
def attack_classifier(sess, x, model, x_test, attack_method="fgsm", target=None, batch_size=128):

    if attack_method == "fgsm":
        from cleverhans.attacks import FastGradientMethod
        params = {'eps': 8/255,
                  'clip_min': 0.,
                  'clip_max': 1.
                 }
        if target is not None:
            params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0))
        method = FastGradientMethod(model, sess=sess)

    elif attack_method == "basic_iterative":
        from cleverhans.attacks import BasicIterativeMethod
        params = {'eps': 8./255,
                  'eps_iter': 1./255,
                  'nb_iter': 10,
                  'clip_min': 0.,
                  'clip_max': 1.,
                  'ord': np.inf
                 }
        if target is not None:
            params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0))
        method = BasicIterativeMethod(model,sess = sess)

    elif attack_method == "momentum_iterative":
        from cleverhans.attacks import MomentumIterativeMethod
        params = {'eps':8/255,
                  'eps_iter':1/255,
                  'nb_iter': 10,
                  'clip_min': 0.,
                  'clip_max': 1.
                 }
        if target is not None:
            params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0))
        method = MomentumIterativeMethod(model,sess = sess)

    elif attack_method == "saliency":
        from cleverhans.attacks import SaliencyMapMethod
        params = {'theta':8/255,
                  'gamma':0.1,
                  'clip_min': 0.,
                  'clip_max': 1.
                 }
        assert target is None
        method = SaliencyMapMethod(model,sess = sess)

    elif attack_method == "virtual":
        from cleverhans.attacks import VirtualAdversarialMethod
        params = {'eps':8/255,
                  'num_iterations':10,
                  'xi' :1e-6,
                  'clip_min': 0.,
                  'clip_max': 1.
                 }
        assert target is None
        method = VirtualAdversarialMethod(model,sess = sess)

    elif attack_method == "cw":
        from cleverhans.attacks import CarliniWagnerL2
        params = {
            "confidence":0,
            "batch_size":128,
            "learning_rate":1e-4,
            "binary_search_steps":10,
            "max_iterations":1000,
            "abort_early": True,
            "initial_const":1e-2,
            "clip_min":0,
            "clip_max":1
        }
        if target is not None:
            params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0))
        method = CarliniWagnerL2(model,sess = sess)

    elif attack_method == "elastic_net":
        from cleverhans.attacks import ElasticNetMethod
        params = {
            "fista": "FISTA",
            "beta": 0.1,
            "decision_rule":"EN",
            "confidence":0,
            "batch_size":128,
            "learning_rate":1e-4,
            "binary_search_steps":10,
            "max_iterations":1000,
            "abort_early": True,
            "initial_const":1e-2,
            "clip_min":0,
            "clip_max":1
        }
        if target is not None:
            params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0))
        method = ElasticNetMethod(model,sess = sess)

    elif attack_method == "deepfool":
        from cleverhans.attacks import DeepFool
        params = {
            "nb_candidate":10,
            "overshoot":1e-3,
            "max_iter":100,
            "nb_classes":10,
            "clip_min":0,
            "clip_max":1
        }
        assert target is None
        method = DeepFool(model,sess = sess)

    elif attack_method == "lbfgs":
        from cleverhans.attacks import LBFGS
        params = {
            'batch_size':128,
            "binary_search_steps":10,
            "max_iterations":1000,
            "initial_const":1e-2,
            'clip_min': 0.,
            'clip_max': 1.
        }
        assert target is not None
        params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0))
        method = LBFGS(model,sess = sess)

    elif attack_method == "madry":
        from cleverhans.attacks import MadryEtAl
        params = {'eps':8/255,
                  'eps_iter':1/255,
                  'nb_iter':10,
                  'ord':np.inf,
                  'clip_min': 0.,
                  'clip_max': 1.
                 }
        if target is not None:
            params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0))
        method = MadryEtAl(model, sess = sess)

    elif attack_method == "SPSA":
        from cleverhans.attacks import SPSA
        params = {
            'epsilon':1/255,
            'num_steps':10,
            'is_targeted':False,
            'early_stop_loss_threshold':None,
            'learning_rate':0.01,
            'delta':0.01,
            'batch_size':128,
            'spsa_iters':1,
            'is_debug':False
        }
        if target is not None:
            params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0))
            params["is_targeted"] = True
        method = SPSA(model, sess = sess)

    else:
        raise ValueError("Can not recognize this attack method: %s" % attack_method)

    adv_x = method.generate(x, **params)
    num_batch = x_test.shape[0] // batch_size
    adv_imgs = []
    for i in range(num_batch):
        x_feed = x_test[i*batch_size:(i+1)*batch_size]
        #y_feed = y_test[i*batch_size:(i+1)*batch_size]

        adv_img = sess.run(adv_x, feed_dict={x: x_feed})
        adv_imgs.append(adv_img)

    adv_imgs = np.concatenate(adv_imgs, axis=0)
    return adv_imgs