def createAttack(model, sess, x, y, X_test, y_test, eps = 0.02):
    from cleverhans.attacks import MadryEtAl

    print("Beginning PGD attack")
    pgd = MadryEtAl(model, back='tf', sess=sess)
    preds = model(x)

    t0 = time.time()
    batch_size = 64

    # Incredibly horrible and ugly way to iterate over X_test.  Sorry.
    X_test_adv_pgd = np.zeros(X_test.shape)
    num_batches = X_test.shape[0] // batch_size
    for i in range(X_test.shape[0] // batch_size):
        batch_start = batch_size*i 
        batch_end = batch_size*(i+1)
        batch = X_test[batch_start:batch_end]
        if not (i % 20):
            print("attacking batch", i, "from ", batch_start, " to ", batch_end, file=sys.stderr)
        attack_target = 1 - y_test[batch_start:batch_end]
        pgd_params = {'eps': eps,
                  'eps_iter': 0.01,
                  'clip_min': -1.,
                  'clip_max': 1.,
                  'nb_iter': 20,
                  'y_target': attack_target}
        X_test_adv_pgd[batch_start:batch_end] = pgd.generate_np(batch, **pgd_params)
    if X_test.shape[0] % batch_size:
        batch_start = (num_batches * batch_size )
        batch_end = X_test.shape[0]
        batch = X_test[batch_start:batch_end].reshape((-1,224,224,3))
        print("attacking residual batch from ", batch_start, " to ", batch_end, file=sys.stderr)
        attack_target = 1 - y_test[batch_start:batch_end].reshape((-1,2))
        pgd_params = {'eps': eps,
                  'eps_iter': 0.01,
                  'clip_min': -1.,
                  'clip_max': 1.,
                  'nb_iter': 20,
                  'y_target': attack_target}
        X_test_adv_pgd[batch_start:batch_end] = pgd.generate_np(batch, **pgd_params)

    # Report on timing
    t1 = time.time()
    total = t1-t0
    m, s = divmod(total, 60)
    h, m = divmod(m, 60)
    print ("Completed attack in %d:%02d:%02d" % (h, m, s))
    
    return X_test_adv_pgd
示例#2
0
    def setUp(self):
        super(TestMadryEtAl, self).setUp()
        import tensorflow as tf

        # The world's simplest neural network
        def my_model(x):
            W1 = tf.constant([[1.5, .3], [-2, 0.3]], dtype=tf.float32)
            h1 = tf.nn.sigmoid(tf.matmul(x, W1))
            W2 = tf.constant([[-2.4, 1.2], [0.5, -2.3]], dtype=tf.float32)
            res = tf.matmul(h1, W2)
            return res

        self.sess = tf.Session()
        self.model = my_model
        self.attack = MadryEtAl(self.model, sess=self.sess)
示例#3
0
def load_attack(sess, attack_method, model, targeted, adv_ys, eps, batch_size):

    if attack_method == 'fgsm':
        from cleverhans.attacks import FastGradientMethod
        model_prob = lambda x: model.predict(x, softmax=True)
        attack = FastGradientMethod(model_prob, sess=sess)
        attack_params, yname = config_fgsm(targeted, adv_ys, eps, batch_size)

    if attack_method == 'pgd':
        from cleverhans.attacks import MadryEtAl
        model_prob = lambda x: model.predict(x, softmax=True)
        attack = MadryEtAl(model_prob, sess=sess)
        attack_params, yname = config_madry(targeted, adv_ys, eps, batch_size)

    if attack_method == 'mim':
        from cleverhans.attacks import MomentumIterativeMethod
        model_prob = lambda x: model.predict(x, softmax=True)
        attack = MomentumIterativeMethod(model_prob, sess=sess)
        attack_params, yname = config_mim(targeted, adv_ys, eps, batch_size)

    if attack_method == 'cw':
        from cleverhans.attacks import CarliniWagnerL2
        model_logit = lambda x: model.predict(x, softmax=False)
        attack = CarliniWagnerL2(model_logit, sess=sess)
        attack_params, yname = config_cw(targeted, adv_ys, eps, batch_size)

    return attack, attack_params, yname
def initialize_attack(model, sess):
    from cleverhans.attacks import MadryEtAl
    from cleverhans.attacks import FastGradientMethod
    from cleverhans.utils_keras import KerasModelWrapper

    wrap = KerasModelWrapper(model)
    #fgsm = FastGradientMethod(wrap, sess=sess)
    fgsm = MadryEtAl(wrap, sess=sess)
    #fgsm = FastGradientMethod(model, sess=sess)

    del model

    return fgsm
示例#5
0
def attack_classifier(sess, x, model, x_test, attack_method="fgsm", target=None, batch_size=128):

    if attack_method == "fgsm":
        from cleverhans.attacks import FastGradientMethod
        params = {'eps': 8/255,
                  'clip_min': 0.,
                  'clip_max': 1.
                 }
        if target is not None:
            params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0))
        method = FastGradientMethod(model, sess=sess)

    elif attack_method == "basic_iterative":
        from cleverhans.attacks import BasicIterativeMethod
        params = {'eps': 8./255,
                  'eps_iter': 1./255,
                  'nb_iter': 10,
                  'clip_min': 0.,
                  'clip_max': 1.,
                  'ord': np.inf
                 }
        if target is not None:
            params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0))
        method = BasicIterativeMethod(model,sess = sess)

    elif attack_method == "momentum_iterative":
        from cleverhans.attacks import MomentumIterativeMethod
        params = {'eps':8/255,
                  'eps_iter':1/255,
                  'nb_iter': 10,
                  'clip_min': 0.,
                  'clip_max': 1.
                 }
        if target is not None:
            params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0))
        method = MomentumIterativeMethod(model,sess = sess)

    elif attack_method == "saliency":
        from cleverhans.attacks import SaliencyMapMethod
        params = {'theta':8/255,
                  'gamma':0.1,
                  'clip_min': 0.,
                  'clip_max': 1.
                 }
        assert target is None
        method = SaliencyMapMethod(model,sess = sess)

    elif attack_method == "virtual":
        from cleverhans.attacks import VirtualAdversarialMethod
        params = {'eps':8/255,
                  'num_iterations':10,
                  'xi' :1e-6,
                  'clip_min': 0.,
                  'clip_max': 1.
                 }
        assert target is None
        method = VirtualAdversarialMethod(model,sess = sess)

    elif attack_method == "cw":
        from cleverhans.attacks import CarliniWagnerL2
        params = {
            "confidence":0,
            "batch_size":128,
            "learning_rate":1e-4,
            "binary_search_steps":10,
            "max_iterations":1000,
            "abort_early": True,
            "initial_const":1e-2,
            "clip_min":0,
            "clip_max":1
        }
        if target is not None:
            params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0))
        method = CarliniWagnerL2(model,sess = sess)

    elif attack_method == "elastic_net":
        from cleverhans.attacks import ElasticNetMethod
        params = {
            "fista": "FISTA",
            "beta": 0.1,
            "decision_rule":"EN",
            "confidence":0,
            "batch_size":128,
            "learning_rate":1e-4,
            "binary_search_steps":10,
            "max_iterations":1000,
            "abort_early": True,
            "initial_const":1e-2,
            "clip_min":0,
            "clip_max":1
        }
        if target is not None:
            params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0))
        method = ElasticNetMethod(model,sess = sess)

    elif attack_method == "deepfool":
        from cleverhans.attacks import DeepFool
        params = {
            "nb_candidate":10,
            "overshoot":1e-3,
            "max_iter":100,
            "nb_classes":10,
            "clip_min":0,
            "clip_max":1
        }
        assert target is None
        method = DeepFool(model,sess = sess)

    elif attack_method == "lbfgs":
        from cleverhans.attacks import LBFGS
        params = {
            'batch_size':128,
            "binary_search_steps":10,
            "max_iterations":1000,
            "initial_const":1e-2,
            'clip_min': 0.,
            'clip_max': 1.
        }
        assert target is not None
        params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0))
        method = LBFGS(model,sess = sess)

    elif attack_method == "madry":
        from cleverhans.attacks import MadryEtAl
        params = {'eps':8/255,
                  'eps_iter':1/255,
                  'nb_iter':10,
                  'ord':np.inf,
                  'clip_min': 0.,
                  'clip_max': 1.
                 }
        if target is not None:
            params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0))
        method = MadryEtAl(model, sess = sess)

    elif attack_method == "SPSA":
        from cleverhans.attacks import SPSA
        params = {
            'epsilon':1/255,
            'num_steps':10,
            'is_targeted':False,
            'early_stop_loss_threshold':None,
            'learning_rate':0.01,
            'delta':0.01,
            'batch_size':128,
            'spsa_iters':1,
            'is_debug':False
        }
        if target is not None:
            params["y_target"] = tf.constant(np.repeat(np.eye(10)[target:target+1], batch_size, axis = 0))
            params["is_targeted"] = True
        method = SPSA(model, sess = sess)

    else:
        raise ValueError("Can not recognize this attack method: %s" % attack_method)

    adv_x = method.generate(x, **params)
    num_batch = x_test.shape[0] // batch_size
    adv_imgs = []
    for i in range(num_batch):
        x_feed = x_test[i*batch_size:(i+1)*batch_size]
        #y_feed = y_test[i*batch_size:(i+1)*batch_size]

        adv_img = sess.run(adv_x, feed_dict={x: x_feed})
        adv_imgs.append(adv_img)

    adv_imgs = np.concatenate(adv_imgs, axis=0)
    return adv_imgs
def train(cifar10_data, epochs, L, learning_rate, scale3, Delta2, epsilon2,
          eps2_ratio, alpha, perturbFM, fgsm_eps, total_eps, logfile):
    logfile.write("fgsm_eps \t %g, LR \t %g, alpha \t %d , epsilon \t %d \n" %
                  (fgsm_eps, learning_rate, alpha, total_eps))
    """Train CIFAR-10 for a number of steps."""
    with tf.Graph().as_default():
        global_step = tf.Variable(0, trainable=False)

        eps_benign = 1 / (1 + eps2_ratio) * (epsilon2)
        eps_adv = eps2_ratio / (1 + eps2_ratio) * (epsilon2)

        # Parameters Declarification
        #with tf.variable_scope('conv1') as scope:
        kernel1 = _variable_with_weight_decay(
            'kernel1',
            shape=[4, 4, 3, 128],
            stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2),
            wd=0.0,
            collect=[AECODER_VARIABLES])
        biases1 = _bias_on_cpu('biases1', [128],
                               tf.constant_initializer(0.0),
                               collect=[AECODER_VARIABLES])

        shape = kernel1.get_shape().as_list()
        w_t = tf.reshape(kernel1, [-1, shape[-1]])
        w = tf.transpose(w_t)
        sing_vals = tf.svd(w, compute_uv=False)
        sensitivity = tf.reduce_max(sing_vals)
        gamma = 2 * Delta2 / (L * sensitivity
                              )  #2*3*(14*14 + 2)*16/(L*sensitivity)

        #with tf.variable_scope('conv2') as scope:
        kernel2 = _variable_with_weight_decay(
            'kernel2',
            shape=[5, 5, 128, 128],
            stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2),
            wd=0.0,
            collect=[CONV_VARIABLES])
        biases2 = _bias_on_cpu('biases2', [128],
                               tf.constant_initializer(0.1),
                               collect=[CONV_VARIABLES])
        #with tf.variable_scope('conv3') as scope:
        kernel3 = _variable_with_weight_decay(
            'kernel3',
            shape=[5, 5, 256, 256],
            stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2),
            wd=0.0,
            collect=[CONV_VARIABLES])
        biases3 = _bias_on_cpu('biases3', [256],
                               tf.constant_initializer(0.1),
                               collect=[CONV_VARIABLES])
        #with tf.variable_scope('local4') as scope:
        kernel4 = _variable_with_weight_decay(
            'kernel4',
            shape=[int(image_size / 4)**2 * 256, hk],
            stddev=0.04,
            wd=0.004,
            collect=[CONV_VARIABLES])
        biases4 = _bias_on_cpu('biases4', [hk],
                               tf.constant_initializer(0.1),
                               collect=[CONV_VARIABLES])
        #with tf.variable_scope('local5') as scope:
        kernel5 = _variable_with_weight_decay(
            'kernel5', [hk, 10],
            stddev=np.sqrt(2.0 /
                           (int(image_size / 4)**2 * 256)) / math.ceil(5 / 2),
            wd=0.0,
            collect=[CONV_VARIABLES])
        biases5 = _bias_on_cpu('biases5', [10],
                               tf.constant_initializer(0.1),
                               collect=[CONV_VARIABLES])

        #scale2 = tf.Variable(tf.ones([hk]))
        #beta2 = tf.Variable(tf.zeros([hk]))

        params = [
            kernel1, biases1, kernel2, biases2, kernel3, biases3, kernel4,
            biases4, kernel5, biases5
        ]
        ########

        # Build a Graph that computes the logits predictions from the
        # inference model.
        FM_h = tf.placeholder(tf.float32, [None, 14, 14, 128])
        noise = tf.placeholder(tf.float32, [None, image_size, image_size, 3])
        adv_noise = tf.placeholder(tf.float32,
                                   [None, image_size, image_size, 3])

        x = tf.placeholder(tf.float32, [None, image_size, image_size, 3])
        adv_x = tf.placeholder(tf.float32, [None, image_size, image_size, 3])

        # Auto-Encoder #
        Enc_Layer2 = EncLayer(inpt=adv_x,
                              n_filter_in=3,
                              n_filter_out=128,
                              filter_size=3,
                              W=kernel1,
                              b=biases1,
                              activation=tf.nn.relu)
        pretrain_adv = Enc_Layer2.get_train_ops2(xShape=tf.shape(adv_x)[0],
                                                 Delta=Delta2,
                                                 epsilon=epsilon2,
                                                 batch_size=L,
                                                 learning_rate=learning_rate,
                                                 W=kernel1,
                                                 b=biases1,
                                                 perturbFMx=adv_noise,
                                                 perturbFM_h=FM_h)
        Enc_Layer3 = EncLayer(inpt=x,
                              n_filter_in=3,
                              n_filter_out=128,
                              filter_size=3,
                              W=kernel1,
                              b=biases1,
                              activation=tf.nn.relu)
        pretrain_benign = Enc_Layer3.get_train_ops2(
            xShape=tf.shape(x)[0],
            Delta=Delta2,
            epsilon=epsilon2,
            batch_size=L,
            learning_rate=learning_rate,
            W=kernel1,
            b=biases1,
            perturbFMx=noise,
            perturbFM_h=FM_h)
        cost = tf.reduce_sum((Enc_Layer2.cost + Enc_Layer3.cost) / 2.0)
        ###

        x_image = x + noise
        y_conv = inference(x_image, FM_h, params)
        softmax_y_conv = tf.nn.softmax(y_conv)
        y_ = tf.placeholder(tf.float32, [None, 10])

        adv_x += adv_noise
        y_adv_conv = inference(adv_x, FM_h, params)
        adv_y_ = tf.placeholder(tf.float32, [None, 10])

        # Calculate loss. Apply Taylor Expansion for the output layer
        perturbW = perturbFM * params[8]
        loss = cifar10.TaylorExp(y_conv, y_, y_adv_conv, adv_y_, L, alpha,
                                 perturbW)

        # Build a Graph that trains the model with one batch of examples and
        # updates the model parameters.
        #pretrain_step = tf.train.AdamOptimizer(1e-4).minimize(pretrain_adv, global_step=global_step, var_list=[kernel1, biases1]);
        pretrain_var_list = tf.get_collection(AECODER_VARIABLES)
        train_var_list = tf.get_collection(CONV_VARIABLES)
        #print(pretrain_var_list)
        #print(train_var_list)
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            pretrain_step = tf.train.AdamOptimizer(learning_rate).minimize(
                pretrain_adv + pretrain_benign,
                global_step=global_step,
                var_list=pretrain_var_list)
            train_op = cifar10.train(loss,
                                     global_step,
                                     learning_rate,
                                     _var_list=train_var_list)
        sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))

        sess.run(kernel1.initializer)
        dp_epsilon = 1.0
        _gamma = sess.run(gamma)
        _gamma_x = Delta2 / L
        epsilon2_update = epsilon2 / (1.0 + 1.0 / _gamma + 1 / _gamma_x)
        print(epsilon2_update / _gamma + epsilon2_update / _gamma_x)
        print(epsilon2_update)
        delta_r = fgsm_eps * (image_size**2)
        _sensitivityW = sess.run(sensitivity)
        delta_h = _sensitivityW * (14**2)
        #delta_h = 1.0 * delta_r; #sensitivity*(14**2) = sensitivity*(\beta**2) can also be used
        #dp_mult = (Delta2/(L*epsilon2))/(delta_r / dp_epsilon) + (2*Delta2/(L*epsilon2))/(delta_h / dp_epsilon)
        dp_mult = (Delta2 / (L * epsilon2_update)) / (delta_r / dp_epsilon) + (
            2 * Delta2 / (L * epsilon2_update)) / (delta_h / dp_epsilon)

        dynamic_eps = tf.placeholder(tf.float32)
        """y_test = inference(x, FM_h, params)
    softmax_y = tf.nn.softmax(y_test);
    c_x_adv = fgsm(x, softmax_y, eps=dynamic_eps/3, clip_min=-1.0, clip_max=1.0)
    x_adv = tf.reshape(c_x_adv, [L, image_size, image_size, 3])"""

        attack_switch = {
            'fgsm': True,
            'ifgsm': True,
            'deepfool': False,
            'mim': True,
            'spsa': False,
            'cwl2': False,
            'madry': True,
            'stm': False
        }

        ch_model_probs = CustomCallableModelWrapper(
            callable_fn=inference_test_input_probs,
            output_layer='probs',
            params=params,
            image_size=image_size,
            adv_noise=adv_noise)

        # define each attack method's tensor
        mu_alpha = tf.placeholder(tf.float32, [1])
        attack_tensor_dict = {}
        # FastGradientMethod
        if attack_switch['fgsm']:
            print('creating attack tensor of FastGradientMethod')
            fgsm_obj = FastGradientMethod(model=ch_model_probs, sess=sess)
            #x_adv_test_fgsm = fgsm_obj.generate(x=x, eps=fgsm_eps, clip_min=-1.0, clip_max=1.0, ord=2) # testing now
            x_adv_test_fgsm = fgsm_obj.generate(x=x,
                                                eps=mu_alpha,
                                                clip_min=-1.0,
                                                clip_max=1.0)  # testing now
            attack_tensor_dict['fgsm'] = x_adv_test_fgsm

        # Iterative FGSM (BasicIterativeMethod/ProjectedGradientMethod with no random init)
        # default: eps_iter=0.05, nb_iter=10
        if attack_switch['ifgsm']:
            print('creating attack tensor of BasicIterativeMethod')
            ifgsm_obj = BasicIterativeMethod(model=ch_model_probs, sess=sess)
            #x_adv_test_ifgsm = ifgsm_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, clip_min=-1.0, clip_max=1.0, ord=2)
            x_adv_test_ifgsm = ifgsm_obj.generate(x=x,
                                                  eps=mu_alpha,
                                                  eps_iter=fgsm_eps / 3,
                                                  nb_iter=3,
                                                  clip_min=-1.0,
                                                  clip_max=1.0)
            attack_tensor_dict['ifgsm'] = x_adv_test_ifgsm

        # MomentumIterativeMethod
        # default: eps_iter=0.06, nb_iter=10
        if attack_switch['mim']:
            print('creating attack tensor of MomentumIterativeMethod')
            mim_obj = MomentumIterativeMethod(model=ch_model_probs, sess=sess)
            #x_adv_test_mim = mim_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, decay_factor=1.0, clip_min=-1.0, clip_max=1.0, ord=2)
            x_adv_test_mim = mim_obj.generate(x=x,
                                              eps=mu_alpha,
                                              eps_iter=fgsm_eps / 3,
                                              nb_iter=3,
                                              decay_factor=1.0,
                                              clip_min=-1.0,
                                              clip_max=1.0)
            attack_tensor_dict['mim'] = x_adv_test_mim

        # MadryEtAl (Projected Grdient with random init, same as rand+fgsm)
        # default: eps_iter=0.01, nb_iter=40
        if attack_switch['madry']:
            print('creating attack tensor of MadryEtAl')
            madry_obj = MadryEtAl(model=ch_model_probs, sess=sess)
            #x_adv_test_madry = madry_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, clip_min=-1.0, clip_max=1.0, ord=2)
            x_adv_test_madry = madry_obj.generate(x=x,
                                                  eps=mu_alpha,
                                                  eps_iter=fgsm_eps / 3,
                                                  nb_iter=3,
                                                  clip_min=-1.0,
                                                  clip_max=1.0)
            attack_tensor_dict['madry'] = x_adv_test_madry

        #====================== attack =========================

        #adv_logits, _ = inference(c_x_adv + W_conv1Noise, perturbFM, params)

        # Create a saver.
        saver = tf.train.Saver(tf.all_variables())

        # Build an initialization operation to run below.
        init = tf.initialize_all_variables()
        sess.run(init)

        # Start the queue runners.
        #tf.train.start_queue_runners(sess=sess)

        summary_writer = tf.summary.FileWriter(os.getcwd() + dirCheckpoint,
                                               sess.graph)

        # load the most recent models
        _global_step = 0
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
        if ckpt and ckpt.model_checkpoint_path:
            print(ckpt.model_checkpoint_path)
            saver.restore(sess, ckpt.model_checkpoint_path)
            _global_step = int(
                ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1])
        else:
            print('No checkpoint file found')

        T = int(int(math.ceil(D / L)) * epochs + 1)  # number of steps
        step_for_epoch = int(math.ceil(D / L))
        #number of steps for one epoch

        perturbH_test = np.random.laplace(0.0, 0, 14 * 14 * 128)
        perturbH_test = np.reshape(perturbH_test, [-1, 14, 14, 128])

        #W_conv1Noise = np.random.laplace(0.0, Delta2/(L*epsilon2), 32 * 32 * 3).astype(np.float32)
        #W_conv1Noise = np.reshape(_W_conv1Noise, [32, 32, 3])

        perturbFM_h = np.random.laplace(0.0,
                                        2 * Delta2 / (epsilon2_update * L),
                                        14 * 14 * 128)
        perturbFM_h = np.reshape(perturbFM_h, [-1, 14, 14, 128])

        #_W_adv = np.random.laplace(0.0, 0, 32 * 32 * 3).astype(np.float32)
        #_W_adv = np.reshape(_W_adv, [32, 32, 3])
        #_perturbFM_h_adv = np.random.laplace(0.0, 0, 10*10*128)
        #_perturbFM_h_adv = np.reshape(_perturbFM_h_adv, [10, 10, 128]);

        test_size = len(cifar10_data.test.images)
        #beta = redistributeNoise(os.getcwd() + '/LRP_0_25_v12.txt')
        #BenignLNoise = generateIdLMNoise(image_size, Delta2, eps_benign, L) #generateNoise(image_size, Delta2, eps_benign, L, beta);
        #AdvLnoise = generateIdLMNoise(image_size, Delta2, eps_adv, L)
        Noise = generateIdLMNoise(image_size, Delta2, epsilon2_update, L)
        #generateNoise(image_size, Delta2, eps_adv, L, beta);
        Noise_test = generateIdLMNoise(
            image_size, 0, epsilon2_update,
            L)  #generateNoise(image_size, 0, 2*epsilon2, test_size, beta);

        emsemble_L = int(L / 3)
        preT_epochs = 100
        pre_T = int(int(math.ceil(D / L)) * preT_epochs + 1)
        """logfile.write("pretrain: \n")
    for step in range(_global_step, _global_step + pre_T):
        d_eps = random.random()*0.5;
        batch = cifar10_data.train.next_batch(L); #Get a random batch.
        adv_images = sess.run(x_adv, feed_dict = {x: batch[0], dynamic_eps: d_eps, FM_h: perturbH_test})
        for iter in range(0, 2):
            adv_images = sess.run(x_adv, feed_dict = {x: adv_images, dynamic_eps: d_eps, FM_h: perturbH_test})
        #sess.run(pretrain_step, feed_dict = {x: batch[0], noise: AdvLnoise, FM_h: perturbFM_h});
        batch = cifar10_data.train.next_batch(L);
        sess.run(pretrain_step, feed_dict = {x: np.append(batch[0], adv_images, axis = 0), noise: Noise, FM_h: perturbFM_h});
        if step % int(25*step_for_epoch) == 0:
            cost_value = sess.run(cost, feed_dict={x: cifar10_data.test.images, noise: Noise_test, FM_h: perturbH_test})/(test_size*128)
            logfile.write("step \t %d \t %g \n"%(step, cost_value))
            print(cost_value)
    print('pre_train finished')"""

        _global_step = 0
        for step in xrange(_global_step, _global_step + T):
            start_time = time.time()
            d_eps = random.random() * 0.5
            batch = cifar10_data.train.next_batch(emsemble_L)
            #Get a random batch.
            y_adv_batch = batch[1]
            """adv_images = sess.run(x_adv, feed_dict = {x: batch[0], dynamic_eps: d_eps, FM_h: perturbH_test})
      for iter in range(0, 2):
          adv_images = sess.run(x_adv, feed_dict = {x: adv_images, dynamic_eps: d_eps, FM_h: perturbH_test})"""
            adv_images_ifgsm = sess.run(attack_tensor_dict['ifgsm'],
                                        feed_dict={
                                            x: batch[0],
                                            adv_noise: Noise,
                                            mu_alpha: [d_eps]
                                        })
            batch = cifar10_data.train.next_batch(emsemble_L)
            y_adv_batch = np.append(y_adv_batch, batch[1], axis=0)
            adv_images_mim = sess.run(attack_tensor_dict['mim'],
                                      feed_dict={
                                          x: batch[0],
                                          adv_noise: Noise,
                                          mu_alpha: [d_eps]
                                      })
            batch = cifar10_data.train.next_batch(emsemble_L)
            y_adv_batch = np.append(y_adv_batch, batch[1], axis=0)
            adv_images_madry = sess.run(attack_tensor_dict['madry'],
                                        feed_dict={
                                            x: batch[0],
                                            adv_noise: Noise,
                                            mu_alpha: [d_eps]
                                        })
            adv_images = np.append(np.append(adv_images_ifgsm,
                                             adv_images_mim,
                                             axis=0),
                                   adv_images_madry,
                                   axis=0)

            batch = cifar10_data.train.next_batch(L)
            #Get a random batch.

            sess.run(pretrain_step,
                     feed_dict={
                         x: batch[0],
                         adv_x: adv_images,
                         adv_noise: Noise_test,
                         noise: Noise,
                         FM_h: perturbFM_h
                     })
            _, loss_value = sess.run(
                [train_op, loss],
                feed_dict={
                    x: batch[0],
                    y_: batch[1],
                    adv_x: adv_images,
                    adv_y_: y_adv_batch,
                    noise: Noise,
                    adv_noise: Noise_test,
                    FM_h: perturbFM_h
                })
            duration = time.time() - start_time

            assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

            # report the result periodically
            if step % (50 * step_for_epoch) == 0 and step >= (300 *
                                                              step_for_epoch):
                '''predictions_form_argmax = np.zeros([test_size, 10])
          softmax_predictions = sess.run(softmax_y_conv, feed_dict={x: cifar10_data.test.images, noise: Noise_test, FM_h: perturbH_test})
          argmax_predictions = np.argmax(softmax_predictions, axis=1)
          """for n_draws in range(0, 2000):
            _BenignLNoise = generateIdLMNoise(image_size, Delta2, epsilon2, L)
            _perturbFM_h = np.random.laplace(0.0, 2*Delta2/(epsilon2*L), 14*14*128)
            _perturbFM_h = np.reshape(_perturbFM_h, [-1, 14, 14, 128]);"""
          for j in range(test_size):
            pred = argmax_predictions[j]
            predictions_form_argmax[j, pred] += 2000;
          """softmax_predictions = sess.run(softmax_y_conv, feed_dict={x: cifar10_data.test.images, noise: _BenignLNoise, FM_h: _perturbFM_h})
            argmax_predictions = np.argmax(softmax_predictions, axis=1)"""
          final_predictions = predictions_form_argmax;
          is_correct = []
          is_robust = []
          for j in range(test_size):
              is_correct.append(np.argmax(cifar10_data.test.labels[j]) == np.argmax(final_predictions[j]))
              robustness_from_argmax = robustness.robustness_size_argmax(counts=predictions_form_argmax[j],eta=0.05,dp_attack_size=fgsm_eps, dp_epsilon=1.0, dp_delta=0.05, dp_mechanism='laplace') / dp_mult
              is_robust.append(robustness_from_argmax >= fgsm_eps)
          acc = np.sum(is_correct)*1.0/test_size
          robust_acc = np.sum([a and b for a,b in zip(is_robust, is_correct)])*1.0/np.sum(is_robust)
          robust_utility = np.sum(is_robust)*1.0/test_size
          log_str = "step: {:.1f}\t epsilon: {:.1f}\t benign: {:.4f} \t {:.4f} \t {:.4f} \t {:.4f} \t".format(step, total_eps, acc, robust_acc, robust_utility, robust_acc*robust_utility)'''

                #===================adv samples=====================
                log_str = "step: {:.1f}\t epsilon: {:.1f}\t".format(
                    step, total_eps)
                """adv_images_dict = {}
          for atk in attack_switch.keys():
              if attack_switch[atk]:
                  adv_images_dict[atk] = sess.run(attack_tensor_dict[atk], feed_dict ={x:cifar10_data.test.images})
          print("Done with the generating of Adversarial samples")"""
                #===================adv samples=====================
                adv_acc_dict = {}
                robust_adv_acc_dict = {}
                robust_adv_utility_dict = {}
                test_bach_size = 5000
                for atk in attack_switch.keys():
                    print(atk)
                    if atk not in adv_acc_dict:
                        adv_acc_dict[atk] = -1
                        robust_adv_acc_dict[atk] = -1
                        robust_adv_utility_dict[atk] = -1
                    if attack_switch[atk]:
                        test_bach = cifar10_data.test.next_batch(
                            test_bach_size)
                        adv_images_dict = sess.run(attack_tensor_dict[atk],
                                                   feed_dict={
                                                       x: test_bach[0],
                                                       adv_noise: Noise_test,
                                                       mu_alpha: [fgsm_eps]
                                                   })
                        print("Done adversarial examples")
                        ### PixelDP Robustness ###
                        predictions_form_argmax = np.zeros(
                            [test_bach_size, 10])
                        softmax_predictions = sess.run(softmax_y_conv,
                                                       feed_dict={
                                                           x: adv_images_dict,
                                                           noise: Noise,
                                                           FM_h: perturbFM_h
                                                       })
                        argmax_predictions = np.argmax(softmax_predictions,
                                                       axis=1)
                        for n_draws in range(0, 1000):
                            _BenignLNoise = generateIdLMNoise(
                                image_size, Delta2, epsilon2_update, L)
                            _perturbFM_h = np.random.laplace(
                                0.0, 2 * Delta2 / (epsilon2_update * L),
                                14 * 14 * 128)
                            _perturbFM_h = np.reshape(_perturbFM_h,
                                                      [-1, 14, 14, 128])
                            if n_draws == 500:
                                print("n_draws = 500")
                            for j in range(test_bach_size):
                                pred = argmax_predictions[j]
                                predictions_form_argmax[j, pred] += 1
                            softmax_predictions = sess.run(
                                softmax_y_conv,
                                feed_dict={
                                    x: adv_images_dict,
                                    noise: (_BenignLNoise / 10 + Noise),
                                    FM_h: perturbFM_h
                                }) * sess.run(
                                    softmax_y_conv,
                                    feed_dict={
                                        x: adv_images_dict,
                                        noise: Noise,
                                        FM_h: (_perturbFM_h / 10 + perturbFM_h)
                                    })
                            #softmax_predictions = sess.run(softmax_y_conv, feed_dict={x: adv_images_dict, noise: (_BenignLNoise), FM_h: perturbFM_h}) * sess.run(softmax_y_conv, feed_dict={x: adv_images_dict, noise: Noise, FM_h: (_perturbFM_h)})
                            argmax_predictions = np.argmax(softmax_predictions,
                                                           axis=1)
                        final_predictions = predictions_form_argmax
                        is_correct = []
                        is_robust = []
                        for j in range(test_bach_size):
                            is_correct.append(
                                np.argmax(test_bach[1][j]) == np.argmax(
                                    final_predictions[j]))
                            robustness_from_argmax = robustness.robustness_size_argmax(
                                counts=predictions_form_argmax[j],
                                eta=0.05,
                                dp_attack_size=fgsm_eps,
                                dp_epsilon=dp_epsilon,
                                dp_delta=0.05,
                                dp_mechanism='laplace') / dp_mult
                            is_robust.append(
                                robustness_from_argmax >= fgsm_eps)
                        adv_acc_dict[atk] = np.sum(
                            is_correct) * 1.0 / test_bach_size
                        robust_adv_acc_dict[atk] = np.sum([
                            a and b for a, b in zip(is_robust, is_correct)
                        ]) * 1.0 / np.sum(is_robust)
                        robust_adv_utility_dict[atk] = np.sum(
                            is_robust) * 1.0 / test_bach_size
                        ##############################
                for atk in attack_switch.keys():
                    if attack_switch[atk]:
                        # added robust prediction
                        log_str += " {}: {:.4f} {:.4f} {:.4f} {:.4f}".format(
                            atk, adv_acc_dict[atk], robust_adv_acc_dict[atk],
                            robust_adv_utility_dict[atk],
                            robust_adv_acc_dict[atk] *
                            robust_adv_utility_dict[atk])
                print(log_str)
                logfile.write(log_str + '\n')

            # Save the model checkpoint periodically.
            if step % (10 * step_for_epoch) == 0 and (step > _global_step):
                num_examples_per_step = L
                examples_per_sec = num_examples_per_step / duration
                sec_per_batch = float(duration)
                format_str = (
                    '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                    'sec/batch)')
                print(format_str % (datetime.now(), step, loss_value,
                                    examples_per_sec, sec_per_batch))
            """if step % (50*step_for_epoch) == 0 and (step >= 900*step_for_epoch):
示例#7
0
def test():
    """
    """
    tf.reset_default_graph()
    g = tf.get_default_graph()

    with g.as_default():
        # Placeholder nodes.
        images_holder = tf.placeholder(
            tf.float32,
            [None, FLAGS.IMAGE_ROWS, FLAGS.IMAGE_COLS, FLAGS.NUM_CHANNELS])
        label_holder = tf.placeholder(tf.float32, [None, FLAGS.NUM_CLASSES])
        is_training = tf.placeholder(tf.bool, ())

        # model
        model = model_cifar100.RDPCNN(images_holder, label_holder,
                                      FLAGS.INPUT_SIGMA,
                                      is_training)  # for adv examples

        model_loss = model.loss()
        model_acc = model.cnn_accuracy

        # robust
        def inference(x):
            logits, _ = model.cnn.prediction(x)
            return logits

        def inference_prob(x):
            _, probs = model.cnn.prediction(x)
            return probs

        graph_dict = {}
        graph_dict["images_holder"] = images_holder
        graph_dict["label_holder"] = label_holder
        graph_dict["is_training"] = is_training

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config, graph=g) as sess:
        sess.run(tf.global_variables_initializer())
        # load model
        model.tf_load(sess, name=FLAGS.CNN_CKPT_RESTORE_NAME)

        # adv test
        ####################################################################################################
        x_advs = {}
        ch_model_logits = CallableModelWrapper(callable_fn=inference,
                                               output_layer='logits')
        ch_model_probs = CallableModelWrapper(callable_fn=inference_prob,
                                              output_layer='probs')
        # FastGradientMethod
        fgsm_obj = FastGradientMethod(model=ch_model_probs, sess=sess)
        x_advs["fgsm"] = fgsm_obj.generate(x=images_holder,
                                           eps=FLAGS.ATTACK_SIZE,
                                           clip_min=0.0,
                                           clip_max=1.0)  # testing now

        # Iterative FGSM (BasicIterativeMethod/ProjectedGradientMethod with no random init)
        # default: eps_iter=0.05, nb_iter=10
        ifgsm_obj = BasicIterativeMethod(model=ch_model_probs, sess=sess)
        x_advs["ifgsm"] = ifgsm_obj.generate(x=images_holder,
                                             eps=FLAGS.ATTACK_SIZE,
                                             eps_iter=FLAGS.ATTACK_SIZE / 10,
                                             nb_iter=10,
                                             clip_min=0.0,
                                             clip_max=1.0)

        # MomentumIterativeMethod
        # default: eps_iter=0.06, nb_iter=10
        mim_obj = MomentumIterativeMethod(model=ch_model_probs, sess=sess)
        x_advs["mim"] = mim_obj.generate(x=images_holder,
                                         eps=FLAGS.ATTACK_SIZE,
                                         eps_iter=FLAGS.ATTACK_SIZE / 10,
                                         nb_iter=10,
                                         decay_factor=1.0,
                                         clip_min=0.0,
                                         clip_max=1.0)

        # MadryEtAl (Projected Grdient with random init, same as rand+fgsm)
        # default: eps_iter=0.01, nb_iter=40
        madry_obj = MadryEtAl(model=ch_model_probs, sess=sess)
        x_advs["madry"] = madry_obj.generate(x=images_holder,
                                             eps=FLAGS.ATTACK_SIZE,
                                             eps_iter=FLAGS.ATTACK_SIZE / 10,
                                             nb_iter=10,
                                             clip_min=0.0,
                                             clip_max=1.0)
        graph_dict["x_advs"] = x_advs
        ####################################################################################################

        # tensorboard writer
        #test_writer = model_utils.init_writer(FLAGS.TEST_LOG_PATH, g)
        print("\nTest")
        if FLAGS.local:
            total_test_batch = 2
        else:
            total_test_batch = None
        dp_info = np.load(FLAGS.DP_INFO_NPY, allow_pickle=True).item()
        test_info(sess,
                  model,
                  True,
                  graph_dict,
                  dp_info,
                  FLAGS.TEST_LOG_FILENAME,
                  total_batch=total_test_batch)
        robust_info(sess, model, graph_dict, FLAGS.ROBUST_LOG_FILENAME)
示例#8
0
    def setUp(self):
        super(TestMadryEtAl, self).setUp()

        self.sess = tf.Session()
        self.model = SimpleModel()
        self.attack = MadryEtAl(self.model, sess=self.sess)
示例#9
0
def main(args):
    normalize = Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    transform = Compose([Resize(256), CenterCrop(224), ToTensor(), normalize])

    dataset = ImageDataset(args.image_folder,
                           transform=transform,
                           return_paths=True)
    # n_images = len(dataset)
    dataloader = DataLoader(dataset,
                            shuffle=False,
                            batch_size=args.batch_size,
                            pin_memory=True,
                            num_workers=0)

    model = models.resnet50(pretrained=True).to(args.device)
    model.eval()

    config = tf.ConfigProto(intra_op_parallelism_threads=1,
                            inter_op_parallelism_threads=1,
                            allow_soft_placement=True,
                            device_count={'CPU': 1})
    sess = tf.Session(config=config)
    x_op = tf.placeholder(tf.float32, shape=(
        None,
        3,
        224,
        224,
    ))

    tf_model = convert_pytorch_model_to_tf(model, args.device)
    cleverhans_model = CallableModelWrapper(tf_model, output_layer='logits')

    # compute clip_min and clip_max suing a full black and a full white image
    clip_min = normalize(torch.zeros(3, 1, 1)).min().item()
    clip_max = normalize(torch.ones(3, 1, 1)).max().item()

    eps = args.eps / 255.
    eps_iter = 20
    nb_iter = 10
    args.ord = np.inf if args.ord < 0 else args.ord
    grad_params = {'eps': eps, 'ord': args.ord}
    common_params = {'clip_min': clip_min, 'clip_max': clip_max}
    iter_params = {'eps_iter': eps_iter / 255., 'nb_iter': nb_iter}

    attack_name = ''
    if args.attack == 'fgsm':
        attack_name = '_L{}_eps{}'.format(args.ord, args.eps)
        attack_op = FastGradientMethod(cleverhans_model, sess=sess)
        attack_params = {**common_params, **grad_params}
    elif args.attack == 'iter':
        attack_name = '_L{}_eps{}_epsi{}_i{}'.format(args.ord, args.eps,
                                                     eps_iter, nb_iter)
        attack_op = BasicIterativeMethod(cleverhans_model, sess=sess)
        attack_params = {**common_params, **grad_params, **iter_params}
    elif args.attack == 'm-iter':
        attack_name = '_L{}_eps{}_epsi{}_i{}'.format(args.ord, args.eps,
                                                     eps_iter, nb_iter)
        attack_op = MomentumIterativeMethod(cleverhans_model, sess=sess)
        attack_params = {**common_params, **grad_params, **iter_params}
    elif args.attack == 'pgd':
        attack_name = '_L{}_eps{}_epsi{}_i{}'.format(args.ord, args.eps,
                                                     eps_iter, nb_iter)
        attack_op = MadryEtAl(cleverhans_model, sess=sess)
        attack_params = {**common_params, **grad_params, **iter_params}
    elif args.attack == 'jsma':
        attack_op = SaliencyMapMethod(cleverhans_model, sess=sess)
        attack_params = {'theta': eps, 'symbolic_impl': False, **common_params}
    elif args.attack == 'deepfool':
        attack_op = DeepFool(cleverhans_model, sess=sess)
        attack_params = common_params
    elif args.attack == 'cw':
        attack_op = CarliniWagnerL2(cleverhans_model, sess=sess)
        attack_params = common_params
    elif args.attack == 'lbfgs':
        attack_op = LBFGS(cleverhans_model, sess=sess)
        target = np.zeros((1, 1000))
        target[0, np.random.randint(1000)] = 1
        y = tf.placeholder(tf.float32, target.shape)
        attack_params = {'y_target': y, **common_params}

    attack_name = args.attack + attack_name

    print('Running [{}]. Params: {}'.format(args.attack.upper(),
                                            attack_params))

    adv_x_op = attack_op.generate(x_op, **attack_params)
    adv_preds_op = tf_model(adv_x_op)
    preds_op = tf_model(x_op)

    n_success = 0
    n_processed = 0
    progress = tqdm(dataloader)
    for paths, x in progress:

        progress.set_description('ATTACK')

        z, adv_x, adv_z = sess.run([preds_op, adv_x_op, adv_preds_op],
                                   feed_dict={
                                       x_op: x,
                                       y: target
                                   })

        src, dst = np.argmax(z, axis=1), np.argmax(adv_z, axis=1)
        success = src != dst
        success_paths = np.array(paths)[success]
        success_adv_x = adv_x[success]
        success_src = src[success]
        success_dst = dst[success]

        n_success += success_adv_x.shape[0]
        n_processed += x.shape[0]

        progress.set_postfix(
            {'Success': '{:3.2%}'.format(n_success / n_processed)})
        progress.set_description('SAVING')

        for p, a, s, d in zip(success_paths, success_adv_x, success_src,
                              success_dst):
            path = '{}_{}_src{}_dst{}.npz'.format(p, attack_name, s, d)
            path = os.path.join(args.out_folder, path)
            np.savez_compressed(path, img=a)
def train(cifar10_data, epochs, L, learning_rate, scale3, Delta2, epsilon2,
          eps2_ratio, alpha, perturbFM, fgsm_eps, total_eps, logfile,
          parameter_dict):
    logfile.write("fgsm_eps \t %g, LR \t %g, alpha \t %d , epsilon \t %d \n" %
                  (fgsm_eps, learning_rate, alpha, total_eps))
    """Train CIFAR-10 for a number of steps."""
    # make sure variables are placed on cpu
    # TODO: for AWS version, check if put variables on GPU will be better
    with tf.Graph().as_default(), tf.device('/cpu:0'):
        global_step = tf.Variable(0, trainable=False)
        attacks = ['ifgsm', 'mim', 'madry']

        # manually create all scopes
        with tf.variable_scope('conv1', reuse=tf.AUTO_REUSE) as scope:
            scope_conv1 = scope
        with tf.variable_scope('conv2', reuse=tf.AUTO_REUSE) as scope:
            scope_conv2 = scope
        with tf.variable_scope('conv3', reuse=tf.AUTO_REUSE) as scope:
            scope_conv3 = scope
        with tf.variable_scope('local4', reuse=tf.AUTO_REUSE) as scope:
            scope_local4 = scope
        with tf.variable_scope('local5', reuse=tf.AUTO_REUSE) as scope:
            scope_local5 = scope

        # Parameters Declarification
        #with tf.variable_scope('conv1') as scope:
        # with tf.device('/gpu:{}'.format(AUX_GPU_IDX[0])):
        with tf.variable_scope(scope_conv1) as scope:
            kernel1 = _variable_with_weight_decay(
                'kernel1',
                shape=[4, 4, 3, 128],
                stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2),
                wd=0.0,
                collect=[AECODER_VARIABLES])
            biases1 = _bias_on_cpu('biases1', [128],
                                   tf.constant_initializer(0.0),
                                   collect=[AECODER_VARIABLES])

        #
        shape = kernel1.get_shape().as_list()
        w_t = tf.reshape(kernel1, [-1, shape[-1]])
        w = tf.transpose(w_t)
        sing_vals = tf.svd(w, compute_uv=False)
        sensitivity = tf.reduce_max(sing_vals)
        gamma = 2 * Delta2 / (L * sensitivity)

        with tf.variable_scope(scope_conv2) as scope:
            kernel2 = _variable_with_weight_decay(
                'kernel2',
                shape=[5, 5, 128, 128],
                stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2),
                wd=0.0,
                collect=[CONV_VARIABLES])
            biases2 = _bias_on_cpu('biases2', [128],
                                   tf.constant_initializer(0.1),
                                   collect=[CONV_VARIABLES])

        with tf.variable_scope(scope_conv3) as scope:
            kernel3 = _variable_with_weight_decay(
                'kernel3',
                shape=[5, 5, 256, 256],
                stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2),
                wd=0.0,
                collect=[CONV_VARIABLES])
            biases3 = _bias_on_cpu('biases3', [256],
                                   tf.constant_initializer(0.1),
                                   collect=[CONV_VARIABLES])

        with tf.variable_scope(scope_local4) as scope:
            kernel4 = _variable_with_weight_decay(
                'kernel4',
                shape=[int(image_size / 4)**2 * 256, hk],
                stddev=0.04,
                wd=0.004,
                collect=[CONV_VARIABLES])
            biases4 = _bias_on_cpu('biases4', [hk],
                                   tf.constant_initializer(0.1),
                                   collect=[CONV_VARIABLES])

        with tf.variable_scope(scope_local5) as scope:
            kernel5 = _variable_with_weight_decay(
                'kernel5', [hk, 10],
                stddev=np.sqrt(2.0 / (int(image_size / 4)**2 * 256)) /
                math.ceil(5 / 2),
                wd=0.0,
                collect=[CONV_VARIABLES])
            biases5 = _bias_on_cpu('biases5', [10],
                                   tf.constant_initializer(0.1),
                                   collect=[CONV_VARIABLES])

        # group these for use as parameters
        params = [
            kernel1, biases1, kernel2, biases2, kernel3, biases3, kernel4,
            biases4, kernel5, biases5
        ]
        scopes = [
            scope_conv1, scope_conv2, scope_conv3, scope_local4, scope_local5
        ]

        # placeholders for input values
        FM_h = tf.placeholder(tf.float32, [None, 14, 14, 128])  # one time
        noise = tf.placeholder(tf.float32,
                               [None, image_size, image_size, 3])  # one time
        adv_noise = tf.placeholder(
            tf.float32, [None, image_size, image_size, 3])  # one time

        x_sb = tf.placeholder(tf.float32, [None, image_size, image_size, 3
                                           ])  # input is the bunch of n_batchs
        x_list = tf.split(x_sb, N_GPUS, axis=0)  # split it into each batch
        adv_x_sb = tf.placeholder(tf.float32,
                                  [None, image_size, image_size, 3])
        adv_x_list = tf.split(adv_x_sb, N_GPUS, axis=0)

        x_test = tf.placeholder(tf.float32, [None, image_size, image_size, 3])

        y_sb = tf.placeholder(tf.float32,
                              [None, 10])  # input is the bunch of n_batchs
        y_list = tf.split(y_sb, N_GPUS, axis=0)  # split it into each batch
        adv_y_sb = tf.placeholder(tf.float32,
                                  [None, 10])  # input is the bunch of n_batchs
        # adv_y_list = tf.split(adv_y_sb, N_GPUS, axis=0) # split it into each batch

        y_test = tf.placeholder(tf.float32, [None, 10])

        # re-arrange the input samples
        _split_adv_y_sb = tf.split(adv_y_sb, N_AUX_GPUS, axis=0)
        reorder_adv_y_sb = []
        for i in range(N_GPUS):
            reorder_adv_y_sb.append(
                tf.concat([
                    _split_adv_y_sb[i + N_GPUS * atk_index]
                    for atk_index in range(len(attacks))
                ],
                          axis=0))

        tower_pretrain_grads = []
        tower_train_grads = []
        all_train_loss = []

        pretrain_opt = tf.train.AdamOptimizer(learning_rate)
        train_opt = tf.train.GradientDescentOptimizer(learning_rate)

        # batch index
        bi = 0
        for gpu in GPU_IDX:
            # putting ops on each tower (GPU)
            with tf.device('/gpu:{}'.format(gpu)):
                print('Train inference GPU placement')
                print('/gpu:{}'.format(gpu))
                # Auto-Encoder #
                # pretrain_adv and pretrain_benign are cost tensor of the encoding layer
                with tf.variable_scope(scope_conv1) as scope:
                    Enc_Layer2 = EncLayer(inpt=adv_x_list[bi],
                                          n_filter_in=3,
                                          n_filter_out=128,
                                          filter_size=3,
                                          W=kernel1,
                                          b=biases1,
                                          activation=tf.nn.relu)
                    pretrain_adv = Enc_Layer2.get_train_ops2(
                        xShape=tf.shape(adv_x_list[bi])[0],
                        Delta=Delta2,
                        epsilon=epsilon2,
                        batch_size=L,
                        learning_rate=learning_rate,
                        W=kernel1,
                        b=biases1,
                        perturbFMx=adv_noise,
                        perturbFM_h=FM_h,
                        bn_index=bi)
                    Enc_Layer3 = EncLayer(inpt=x_list[bi],
                                          n_filter_in=3,
                                          n_filter_out=128,
                                          filter_size=3,
                                          W=kernel1,
                                          b=biases1,
                                          activation=tf.nn.relu)
                    pretrain_benign = Enc_Layer3.get_train_ops2(
                        xShape=tf.shape(x_list[bi])[0],
                        Delta=Delta2,
                        epsilon=epsilon2,
                        batch_size=L,
                        learning_rate=learning_rate,
                        W=kernel1,
                        b=biases1,
                        perturbFMx=noise,
                        perturbFM_h=FM_h,
                        bn_index=bi)
                    pretrain_cost = pretrain_adv + pretrain_benign
                # this cost is not used
                # cost = tf.reduce_sum((Enc_Layer2.cost + Enc_Layer3.cost)/2.0);

                # benign conv output
                x_image = x_list[bi] + noise
                y_conv = inference(x_image,
                                   FM_h,
                                   params,
                                   scopes,
                                   training=True,
                                   bn_index=bi)
                # softmax_y_conv = tf.nn.softmax(y_conv)

                # adv conv output
                adv_x_image = adv_x_list[bi] + adv_noise
                y_adv_conv = inference(adv_x_image,
                                       FM_h,
                                       params,
                                       scopes,
                                       training=True,
                                       bn_index=bi)

                # Calculate loss. Apply Taylor Expansion for the output layer
                perturbW = perturbFM * params[8]
                train_loss = cifar10.TaylorExp(y_conv, y_list[bi], y_adv_conv,
                                               reorder_adv_y_sb[bi], L, alpha,
                                               perturbW)
                all_train_loss.append(train_loss)

                # list of variables to train
                pretrain_var_list = tf.get_collection(AECODER_VARIABLES)
                train_var_list = tf.get_collection(CONV_VARIABLES)

                # compute tower gradients
                pretrain_grads = pretrain_opt.compute_gradients(
                    pretrain_cost, var_list=pretrain_var_list)
                train_grads = train_opt.compute_gradients(
                    train_loss, var_list=train_var_list)
                # get_pretrain_grads(pretrain_cost, global_step, learning_rate, pretrain_var_list)
                # train_grads = get_train_grads(train_loss, global_step, learning_rate, train_var_list)

                # note this list contains grads and variables
                tower_pretrain_grads.append(pretrain_grads)
                tower_train_grads.append(train_grads)

                # batch index
                bi += 1

        # average the gradient from each tower
        pretrain_var_dict = {}
        all_pretrain_grads = {}
        avg_pretrain_grads = []
        for var in tf.get_collection(AECODER_VARIABLES):
            if var.name not in all_pretrain_grads:
                all_pretrain_grads[var.name] = []
                pretrain_var_dict[var.name] = var
        for tower in tower_pretrain_grads:
            for var_grad in tower:
                all_pretrain_grads[var_grad[1].name].append(var_grad[0])
        for var_name in all_pretrain_grads:
            # expand dim 0, then concat on dim 0, then reduce mean on dim 0
            expand_pretrain_grads = [
                tf.expand_dims(g, 0) for g in all_pretrain_grads[var_name]
            ]
            concat_pretrain_grads = tf.concat(expand_pretrain_grads, axis=0)
            reduce_pretrain_grads = tf.reduce_mean(concat_pretrain_grads, 0)
            # rebuild (grad, var) list
            avg_pretrain_grads.append(
                (reduce_pretrain_grads, pretrain_var_dict[var_name]))
        print('*****************************')
        print("avg_pretrain_grads:")
        for avg_pretrain_grad in avg_pretrain_grads:
            print('grads')
            print((avg_pretrain_grad[0].name, avg_pretrain_grad[0].shape))
            print('var')
            print((avg_pretrain_grad[1].name, avg_pretrain_grad[1].shape))
            print('------')

        train_var_dict = {}
        all_train_grads = {}
        avg_train_grads = []
        for var in tf.get_collection(CONV_VARIABLES):
            if var.name not in all_train_grads:
                all_train_grads[var.name] = []
                train_var_dict[var.name] = var
        for tower in tower_train_grads:
            for var_grad in tower:
                all_train_grads[var_grad[1].name].append(var_grad[0])
        for var_name in all_train_grads:
            # expand dim 0, then concat on dim 0, then reduce mean on dim 0
            expand_train_grads = [
                tf.expand_dims(g, 0) for g in all_train_grads[var_name]
            ]
            concat_train_grads = tf.concat(expand_train_grads, axis=0)
            reduce_train_grads = tf.reduce_mean(concat_train_grads, 0)
            # rebuild (grad, var) list
            avg_train_grads.append(
                (reduce_train_grads, train_var_dict[var_name]))
        print('*****************************')
        print("avg_train_grads:")
        for avg_train_grad in avg_train_grads:
            print('grads')
            print((avg_train_grad[0].name, avg_train_grad[0].shape))
            print('var')
            print((avg_train_grad[1].name, avg_train_grad[1].shape))
            print('------')
        print('*****************************')

        # get averaged loss tensor
        avg_loss = tf.reduce_mean(tf.stack(all_train_loss), axis=0)

        # TODO: take the average of the bn variables from each tower/training GPU
        # currently, testing is using the bn variables on bn_index 0 (tower/training GPU 0)

        # build train op (apply average gradient to variables)
        # according to 1.13 doc, updates need to be manually applied
        _update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        print('update ops:')
        print(_update_ops)

        with tf.control_dependencies(_update_ops):
            pretrain_op = pretrain_opt.apply_gradients(avg_pretrain_grads,
                                                       global_step=global_step)
            train_op = train_opt.apply_gradients(avg_train_grads,
                                                 global_step=global_step)

        # start a session with memory growth
        config = tf.ConfigProto(log_device_placement=False)
        config.gpu_options.allow_growth = True
        sess = tf.Session(config=config)
        print("session created")

        # init kernel 1 and get some values from it
        sess.run(kernel1.initializer)
        dp_epsilon = 0.005
        parameter_dict['dp_epsilon'] = dp_epsilon
        _gamma = sess.run(gamma)
        _gamma_x = Delta2 / L
        epsilon2_update = epsilon2 / (1.0 + 1.0 / _gamma + 1 / _gamma_x)
        parameter_dict['epsilon2_update'] = epsilon2_update
        print(epsilon2_update / _gamma + epsilon2_update / _gamma_x)
        print(epsilon2_update)
        # NOTE: these values needs to be calculated in testing
        delta_r = fgsm_eps * (image_size**2)
        parameter_dict['delta_r'] = delta_r
        _sensitivityW = sess.run(sensitivity)
        parameter_dict['_sensitivityW'] = _sensitivityW
        delta_h = _sensitivityW * (14**2)
        parameter_dict['delta_h'] = delta_h
        #dp_mult = (Delta2/(L*epsilon2_update))/(delta_r / dp_epsilon) + (2*Delta2/(L*epsilon2_update))/(delta_h / dp_epsilon)
        dp_mult = (Delta2) / (L * epsilon2_update * (delta_h / 2 + delta_r))
        parameter_dict['dp_mult'] = dp_mult

        # place test-time inference into CPU
        with tf.device('/cpu:0'):
            # testing pipeline
            test_x_image = x_test + noise
            test_y_conv = inference(test_x_image,
                                    FM_h,
                                    params,
                                    scopes,
                                    training=True,
                                    bn_index=0)
            test_softmax_y_conv = tf.nn.softmax(test_y_conv)

        # ============== attacks ================
        iter_step_training = 3
        parameter_dict['iter_step_training'] = iter_step_training
        # iter_step_testing = 1000
        aux_dup_count = N_GPUS
        # split input x_super_batch into N_AUX_GPUS parts
        x_attacks = tf.split(x_sb, N_AUX_GPUS, axis=0)
        # split input x_test into aux_dup_count parts
        x_test_split = tf.split(x_test, aux_dup_count, axis=0)

        # setup all attacks
        # attack_switch = {'fgsm':False, 'ifgsm':True, 'deepfool':False, 'mim':True, 'spsa':False, 'cwl2':False, 'madry':True, 'stm':False}

        ch_model_probs = CustomCallableModelWrapper(
            callable_fn=inference_test_input_probs,
            output_layer='probs',
            params=params,
            scopes=scopes,
            image_size=image_size,
            adv_noise=adv_noise)
        attack_tensor_training_dict = {}
        attack_tensor_testing_dict = {}

        # define each attack method's tensor
        mu_alpha = tf.placeholder(tf.float32, [1])

        # build each attack
        for atk_idx in range(len(attacks)):
            atk = attacks[atk_idx]
            print('building attack {} tensors'.format(atk))
            # for each gpu assign to each attack
            attack_tensor_training_dict[atk] = []
            attack_tensor_testing_dict[atk] = []
            for i in range(aux_dup_count):
                if atk == 'ifgsm':
                    with tf.device('/gpu:{}'.format(AUX_GPU_IDX[i])):
                        print('ifgsm GPU placement: /gpu:{}'.format(
                            AUX_GPU_IDX[i]))
                        # ifgsm tensors for training
                        ifgsm_obj = BasicIterativeMethod(model=ch_model_probs,
                                                         sess=sess)
                        attack_tensor_training_dict[atk].append(
                            ifgsm_obj.generate(x=x_attacks[i],
                                               eps=mu_alpha,
                                               eps_iter=mu_alpha /
                                               iter_step_training,
                                               nb_iter=iter_step_training,
                                               clip_min=-1.0,
                                               clip_max=1.0))

                elif atk == 'mim':
                    with tf.device('/gpu:{}'.format(
                            AUX_GPU_IDX[i + 1 * aux_dup_count])):
                        print('mim GPU placement: /gpu:{}'.format(
                            AUX_GPU_IDX[i + 1 * aux_dup_count]))
                        # mim tensors for training
                        mim_obj = MomentumIterativeMethod(model=ch_model_probs,
                                                          sess=sess)
                        attack_tensor_training_dict[atk].append(
                            mim_obj.generate(
                                x=x_attacks[i + 1 * aux_dup_count],
                                eps=mu_alpha,
                                eps_iter=mu_alpha / iter_step_training,
                                nb_iter=iter_step_training,
                                decay_factor=1.0,
                                clip_min=-1.0,
                                clip_max=1.0))

                elif atk == 'madry':
                    with tf.device('/gpu:{}'.format(
                            AUX_GPU_IDX[i + 2 * aux_dup_count])):
                        print('madry GPU placement: /gpu:{}'.format(
                            AUX_GPU_IDX[i + 2 * aux_dup_count]))
                        # madry tensors for training
                        madry_obj = MadryEtAl(model=ch_model_probs, sess=sess)
                        attack_tensor_training_dict[atk].append(
                            madry_obj.generate(
                                x=x_attacks[i + 2 * aux_dup_count],
                                eps=mu_alpha,
                                eps_iter=mu_alpha / iter_step_training,
                                nb_iter=iter_step_training,
                                clip_min=-1.0,
                                clip_max=1.0))

        # combine all attack tensors
        adv_concat_list = []
        for i in range(aux_dup_count):
            adv_concat_list.append(
                tf.concat(
                    [attack_tensor_training_dict[atk][i] for atk in attacks],
                    axis=0))
        # the tensor that contains each batch of adv samples for training
        # has same sample order as the labels
        adv_super_batch_tensor = tf.concat(adv_concat_list, axis=0)

        #====================== attack =========================

        #adv_logits, _ = inference(c_x_adv + W_conv1Noise, perturbFM, params)

        print('******************** debug info **********************')
        # list of variables to train
        pretrain_var_list = tf.get_collection(AECODER_VARIABLES)
        print('pretrain var list')
        for v in pretrain_var_list:
            print((v.name, v.shape))
        print('**********************************')
        train_var_list = tf.get_collection(CONV_VARIABLES)
        print('train var list')
        for v in train_var_list:
            print((v.name, v.shape))
        print('**********************************')

        # all variables
        print('all variables')
        vl = tf.global_variables()
        for v in vl:
            print((v.name, v.shape))
        print('**********************************')

        # all ops
        ops = [n.name for n in tf.get_default_graph().as_graph_def().node]
        print('total number of ops')
        print(len(ops))
        # for op in ops:
        #   print(op)
        print('******************** debug info **********************')
        # exit()

        # Create a saver.
        saver = tf.train.Saver(var_list=tf.all_variables(), max_to_keep=1000)

        # Build an initialization operation to run below.
        init = tf.initialize_all_variables()
        sess.run(init)

        # load the most recent models
        _global_step = 0
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
        if ckpt and ckpt.model_checkpoint_path:
            print(ckpt.model_checkpoint_path)
            saver.restore(sess, ckpt.model_checkpoint_path)
            _global_step = int(
                ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1])
        else:
            print('No checkpoint file found')

        T = int(int(math.ceil(D / L)) * epochs + 1)  # number of steps
        print('total number of steps: {}'.format(T))
        step_for_epoch = int(math.ceil(D / L))
        #number of steps for one epoch
        parameter_dict['step_for_epoch'] = step_for_epoch
        print('step_for_epoch: {}'.format(step_for_epoch))

        # generate some fixed noise
        perturbH_test = np.random.laplace(0.0, 0, 14 * 14 * 128)  # one time
        perturbH_test = np.reshape(perturbH_test,
                                   [-1, 14, 14, 128])  # one time
        parameter_dict['perturbH_test'] = perturbH_test
        print('perturbH_test')
        print(perturbH_test.shape)

        perturbFM_h = np.random.laplace(0.0,
                                        2 * Delta2 / (epsilon2_update * L),
                                        14 * 14 * 128)  # one time
        perturbFM_h = np.reshape(perturbFM_h, [-1, 14, 14, 128])  # one time
        parameter_dict['perturbFM_h'] = perturbFM_h
        print('perturbFM_h')
        print(perturbFM_h.shape)

        Noise = generateIdLMNoise(image_size, Delta2, epsilon2_update,
                                  L)  # one time
        parameter_dict['Noise'] = Noise
        Noise_test = generateIdLMNoise(image_size, 0, epsilon2_update,
                                       L)  # one time
        parameter_dict['Noise_test'] = Noise_test
        print('Noise and Noise_test')
        print(Noise.shape)
        print(Noise_test.shape)
        # exit()

        # some timing variables
        adv_duration_total = 0.0
        adv_duration_count = 0
        train_duration_total = 0.0
        train_duration_count = 0

        # some debug flag
        adv_batch_flag = True
        batch_flag = True
        L_flag = True
        parameter_flag = True

        _global_step = 0
        for step in xrange(_global_step, _global_step + T):
            start_time = time.time()
            # TODO: fix this
            d_eps = random.random() * 0.5
            # d_eps = 0.25
            print('d_eps: {}'.format(d_eps))

            # version with 3 AUX GPU
            # get two super batchs, one for benign training, one for adv training
            super_batch_images, super_batch_labels = cifar10_data.train.next_super_batch(
                N_GPUS, random=True)
            super_batch_images_for_adv, super_batch_adv_labels = cifar10_data.train.next_super_batch_premix_ensemble(
                N_GPUS, random=True)

            # TODO: re-arrange the adv labels to match the adv samples

            # run adv_tensors_batch_concat to generate adv samples
            super_batch_adv_images = sess.run(adv_super_batch_tensor,
                                              feed_dict={
                                                  x_sb:
                                                  super_batch_images_for_adv,
                                                  adv_noise: Noise,
                                                  mu_alpha: [d_eps]
                                              })

            adv_finish_time = time.time()
            adv_duration = adv_finish_time - start_time
            adv_duration_total += adv_duration
            adv_duration_count += 1

            if adv_batch_flag:
                print(super_batch_images.shape)
                print(super_batch_labels.shape)
                print(super_batch_adv_images.shape)
                print(super_batch_adv_labels.shape)
                adv_batch_flag = False

            if batch_flag:
                print(super_batch_images.shape)
                print(super_batch_labels.shape)
                batch_flag = False

            if L_flag:
                print("L: {}".format(L))
                L_flag = False

            if parameter_flag:
                print('*=*=*=*=*')
                print(parameter_dict)
                print('*=*=*=*=*', flush=True)
                logfile.write('*=*=*=*=*\n')
                logfile.write(str(parameter_dict))
                logfile.write('*=*=*=*=*\n')
                parameter_flag = False

            _, _, avg_loss_value = sess.run(
                [pretrain_op, train_op, avg_loss],
                feed_dict={
                    x_sb: super_batch_images,
                    y_sb: super_batch_labels,
                    adv_x_sb: super_batch_adv_images,
                    adv_y_sb: super_batch_adv_labels,
                    noise: Noise,
                    adv_noise: Noise_test,
                    FM_h: perturbFM_h
                })

            assert not np.isnan(
                avg_loss_value), 'Model diverged with loss = NaN'

            train_finish_time = time.time()
            train_duration = train_finish_time - adv_finish_time
            train_duration_total += train_duration
            train_duration_count += 1

            # save model every 50 epochs
            if step % (50 * step_for_epoch) == 0 and (step >=
                                                      50 * step_for_epoch):
                print('saving model')
                checkpoint_path = os.path.join(os.getcwd() + dirCheckpoint,
                                               'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)

            # Save the model checkpoint periodically.
            # if step % (10*step_for_epoch) == 0 and (step > _global_step):
            if step % 10 == 0 and (step > _global_step):
                # print n steps and time
                print("current epoch: {:.2f}".format(step / step_for_epoch))
                num_examples_per_step = L * N_GPUS * 2
                avg_adv_duration = adv_duration_total / adv_duration_count
                avg_train_duration = train_duration_total / train_duration_count
                avg_total_duration = avg_adv_duration + avg_train_duration
                examples_per_sec = num_examples_per_step / avg_total_duration
                sec_per_step = avg_total_duration
                # sec_per_batch = sec_per_step / (N_GPUS * 2)
                format_str = (
                    '%s: step %d, loss = %.2f (%.1f examples/sec; %.2f '
                    'sec/step; %.2f sec/adv_gen_op; %.2f sec/train_op)')
                actual_str = format_str % (
                    datetime.now(), step, avg_loss_value, examples_per_sec,
                    sec_per_step, avg_adv_duration, avg_train_duration)
                print(actual_str, flush=True)
                logfile.write(actual_str + '\n')
        attack_params = {
            'batch_size': eval_batch_size,
            'eps': config['epsilon'],
            'eps_iter': config['step_size'],
            'nb_iter': config['num_steps'],
            'clip_min': 0.,
            'clip_max': 255.,
            'ord': np.inf
        }

        #from cleverhans.attacks import FastGradientMethod
        #attacker = FastGradientMethod(model, back='tf', sess=sess)

        from cleverhans.attacks import MadryEtAl
        attacker = MadryEtAl(model, back='tf', sess=sess)

        max_eps = 16
        epsilons = np.linspace(1, max_eps, max_eps)
        #epsilons = np.linspace(0, max_eps, max_eps // 4, endpoint=False)
        eval_par = {'batch_size': eval_batch_size}
        for e in epsilons:
            start_time = time.time()
            attack_params.update({'eps': e})
            x_adv = attacker.generate(x, **attack_params)
            preds_adv = model.get_probs(x_adv)
            acc = model_eval(sess,
                             x,
                             y,
                             preds_adv,
                             X_test[:nb_samples],
def main(argv=None):
    """
    CIFAR10 CleverHans tutorial
    :return:
    """

    # CIFAR10-specific dimensions
    img_rows = 32
    img_cols = 32
    channels = 3
    nb_classes = 10

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    if not hasattr(backend, "tf"):
        raise RuntimeError("This tutorial requires keras to be configured"
                           " to use the TensorFlow backend.")

    # Image dimensions ordering should follow the Theano convention
    if keras.backend.image_dim_ordering() != 'tf':
        keras.backend.set_image_dim_ordering('tf')
        print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to "
              "'th', temporarily setting to 'tf'")

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    keras.backend.set_session(sess)

    set_log_level(logging.WARNING)

    # Get CIFAR10 test data
    X_train, Y_train, X_test, Y_test = data_cifar10()

    assert Y_train.shape[1] == 10.
    label_smooth = .1
    Y_train = Y_train.clip(label_smooth / 9., 1. - label_smooth)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, channels))
    y = tf.placeholder(tf.float32, shape=(None, 10))
    phase = tf.placeholder(tf.bool, name="phase")

    model_path = FLAGS.model_path
    targeted = True if FLAGS.targeted else False
    binary = True if FLAGS.binary else False
    scale = True if FLAGS.scale else False
    learning_rate = FLAGS.learning_rate
    nb_filters = FLAGS.nb_filters
    batch_size = FLAGS.batch_size
    nb_samples = FLAGS.nb_samples
    nb_epochs = FLAGS.nb_epochs
    delay = FLAGS.delay
    eps = FLAGS.eps
    adv = FLAGS.adv

    attack = FLAGS.attack
    attack_iterations = FLAGS.attack_iterations

    save = False
    train_from_scratch = False

    if model_path is not None:
        if os.path.exists(model_path):
            # check for existing model in immediate subfolder
            if any(f.endswith('.meta') for f in os.listdir(model_path)):
                binary, scale, nb_filters, batch_size, learning_rate, nb_epochs, adv = parse_model_settings(
                    model_path)
                train_from_scratch = False
            else:
                model_path = build_model_save_path(
                    model_path, binary, batch_size, nb_filters, learning_rate, nb_epochs, adv, delay, scale)
                print(model_path)
                save = True
                train_from_scratch = True
    else:
        train_from_scratch = True  # train from scratch, but don't save since no path given

    if binary:
        if scale:
            from cleverhans_tutorials.tutorial_models import make_scaled_binary_cnn
            model = make_scaled_binary_cnn(phase, 'bin_', input_shape=(
                None, img_rows, img_cols, channels), nb_filters=nb_filters)
        else:
            from cleverhans_tutorials.tutorial_models import make_basic_binary_cnn
            model = make_basic_binary_cnn(phase, 'bin_', input_shape=(
                None, img_rows, img_cols, channels), nb_filters=nb_filters)
    else:
        from cleverhans_tutorials.tutorial_models import make_basic_cnn
        model = make_basic_cnn(phase, 'fp_', input_shape=(
            None, img_rows, img_cols, channels), nb_filters=nb_filters)

    preds = model(x, reuse=False)
    print("Defined TensorFlow model graph.")

    rng = np.random.RandomState([2017, 8, 30])

    def evaluate():
        # Evaluate the accuracy of the CIFAR10 model on legitimate test
        # examples
        eval_params = {'batch_size': batch_size}
        acc = model_eval(
            sess, x, y, preds, X_test, Y_test, phase=phase, args=eval_params)
        assert X_test.shape[0] == 10000, X_test.shape
        print('Test accuracy on legitimate examples: %0.4f' % acc)

    # Train an CIFAR10 model
    train_params = {
        'binary': binary,
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'loss_name': 'train loss',
        'filename': 'model',
        'reuse_global_step': False,
        'train_scope': 'train',
        'is_training': True
    }

    if adv:
        from cleverhans.attacks import FastGradientMethod
        fgsm = FastGradientMethod(model, back='tf', sess=sess)
        fgsm_params = {'eps': eps, 'clip_min': 0., 'clip_max': 1.}
        adv_x_train = fgsm.generate(x, phase, **fgsm_params)
        preds_adv = model.get_probs(adv_x_train)

    if train_from_scratch:
        if save:
            train_params.update({'log_dir': model_path})
            if adv and delay > 0:
                train_params.update({'nb_epochs': delay})

        # do clean training for 'nb_epochs' or 'delay' epochs
        model_train(sess, x, y, preds, X_train, Y_train, phase=phase,
                    evaluate=evaluate, args=train_params, save=save, rng=rng)

        # optionally do additional adversarial training
        if adv:
            print("Adversarial training for %d epochs" % (nb_epochs - delay))
            train_params.update({'nb_epochs': nb_epochs - delay})
            train_params.update({'reuse_global_step': True})
            model_train(sess, x, y, preds, X_train, Y_train, phase=phase,
                        predictions_adv=preds_adv, evaluate=evaluate, args=train_params,
                        save=save, rng=rng)
    else:
        tf_model_load(sess, model_path)
        print('Restored model from %s' % model_path)
        evaluate()

    # Evaluate the accuracy of the CIFAR10 model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess, x, y, preds, X_test, Y_test, phase=phase,
                          feed={phase: False}, args=eval_params)

    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))

    ###########################################################################
    # Build dataset
    ###########################################################################

    if targeted:
        from cleverhans.utils import build_targeted_dataset
        adv_inputs, true_labels, adv_ys = build_targeted_dataset(
            X_test, Y_test, np.arange(nb_samples), nb_classes, img_rows, img_cols, channels)
    else:
        adv_inputs = X_test[:nb_samples]

    ###########################################################################
    # Craft adversarial examples using generic approach
    ###########################################################################
    if targeted:
        att_batch_size = np.clip(
            nb_samples * (nb_classes - 1), a_max=MAX_BATCH_SIZE, a_min=1)
        nb_adv_per_sample = nb_classes - 1
        yname = "y_target"

    else:
        att_batch_size = np.minimum(nb_samples, MAX_BATCH_SIZE)
        nb_adv_per_sample = 1
        adv_ys = None
        yname = "y"

    print('Crafting ' + str(nb_samples) + ' * ' + str(nb_adv_per_sample) +
          ' adversarial examples')
    print("This could take some time ...")

    if attack == ATTACK_CARLINI_WAGNER_L2:
        from cleverhans.attacks import CarliniWagnerL2
        attacker = CarliniWagnerL2(model, back='tf', sess=sess)
        attack_params = {'binary_search_steps': 1,
                         'max_iterations': attack_iterations,
                         'learning_rate': 0.1,
                         'batch_size': att_batch_size,
                         'initial_const': 10,
                         }
    elif attack == ATTACK_JSMA:
        from cleverhans.attacks import SaliencyMapMethod
        attacker = SaliencyMapMethod(model, back='tf', sess=sess)
        attack_params = {'theta': 1., 'gamma': 0.1}
    elif attack == ATTACK_FGSM:
        from cleverhans.attacks import FastGradientMethod
        attacker = FastGradientMethod(model, back='tf', sess=sess)
        attack_params = {'eps': eps}
    elif attack == ATTACK_MADRYETAL:
        from cleverhans.attacks import MadryEtAl
        attacker = MadryEtAl(model, back='tf', sess=sess)
        attack_params = {'eps': eps, 'eps_iter': 0.01, 'nb_iter': nb_iter}
    else:
        print("Attack undefined")
        sys.exit(1)

    attack_params.update({yname: adv_ys, 'clip_min': 0., 'clip_max': 1.})
    X_test_adv = attacker.generate_np(adv_inputs, phase, **attack_params)
    '''
    adv_x = attacker.generate(x, phase, **attack_params)
    # Craft adversarial examples using Fast Gradient Sign Method (FGSM)
    eval_params = {'batch_size': att_batch_size}
    X_test_adv, = batch_eval(sess, [x], [adv_x], [adv_inputs], feed={
                             phase: False}, args=eval_params)
    '''

    if targeted:
        assert X_test_adv.shape[0] == nb_samples * \
            (nb_classes - 1), X_test_adv.shape
        # Evaluate the accuracy of the CIFAR10 model on adversarial examples
        print("Evaluating targeted results")
        adv_accuracy = model_eval(sess, x, y, preds, X_test_adv, true_labels,
                                  phase=phase, args=eval_params)
    else:
        assert X_test_adv.shape[0] == nb_samples, X_test_adv.shape
        # Evaluate the accuracy of the CIFAR10 model on adversarial examples
        print("Evaluating un-targeted results")
        adv_accuracy = model_eval(sess, x, y, preds, X_test_adv, Y_test,
                                  phase=phase, args=eval_params)

    # Compute the number of adversarial examples that were successfully found
    print('Test accuracy on adversarial examples {0:.4f}'.format(adv_accuracy))

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(np.sum((X_test_adv - adv_inputs)**2,
                                       axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

    # Friendly output for pasting into spreadsheet
    print('{0:.4f},'.format(accuracy))
    print('{0:.4f},'.format(adv_accuracy))
    print('{0:.4f},'.format(percent_perturbed))

    sess.close()

    '''
    print("Repeating the process, using adversarial training")

    def evaluate_2():
        # Evaluate the accuracy of the adversarialy trained CIFAR10 model on
        # legitimate test examples
        eval_params = {'batch_size': batch_size}
        accuracy = model_eval(sess, x, y, preds, X_test, Y_test,
                              phase=phase,
                              args=eval_params)
        print('Test accuracy on legitimate test examples: ' + str(accuracy))

        # Evaluate the accuracy of the adversarially trained CIFAR10 model on
        # adversarial examples
        accuracy_adv = model_eval(sess, x, y, preds_adv, X_test,
                                  Y_test, phase=phase, args=eval_params)
        print('Test accuracy on adversarial examples: ' + str(accuracy_adv))

    # Perform adversarial training
    train_params.update({'reuse_global_step': True})
    model_train(sess, x, y, preds, X_train, Y_train, phase=phase,
                predictions_adv=preds_adv, evaluate=evaluate_2,
                args=train_params)
    '''
    '''
示例#13
0
def main(_):
    tf.logging.set_verbosity(tf.logging.DEBUG)

    # Images for inception classifier are normalized to be in [-1, 1] interval,
    num_classes = 1001
    batch_shape = [FLAGS.batch_size, FLAGS.image_height, FLAGS.image_width, 3]

    # Load ImageNet Class Labels
    with open('labels.json') as f:
        labels = json.load(f)

    # Prepare Graph
    with tf.Graph().as_default():

        # Build Model
        if FLAGS.model_arch.lower() == 'resnet_v2_101':
            model = models.Resnet_V2_101_Model(num_classes)
            exceptions = []

        elif FLAGS.model_arch.lower() == 'inception_v3':
            model = models.Inception_V3_Model(num_classes)
            exceptions = ['InceptionV3/AuxLogits.*']

        else:
            raise ValueError('Invalid model architecture specified: {}'.format(
                FLAGS.model_arch))

        # Define Model Variables
        x_input = tf.placeholder(tf.float32, shape=batch_shape)
        FastGradientMethod(model).generate(x_input)
        model_variables = tf.contrib.framework.filter_variables(
            slim.get_model_variables(), exclude_patterns=exceptions)

        # Load Session
        saver = tf.train.Saver(model_variables)
        with tf.train.SessionManager().prepare_session(
                master=FLAGS.master,
                checkpoint_filename_with_path=FLAGS.checkpoint_path,
                saver=saver) as sess:

            # For Targeted Attacks
            target_idx = 0  # This will vary
            target = tf.constant(0, shape=[FLAGS.batch_size, num_classes])
            #      target = np.zeros((FLAGS.batch_size, num_classes), dtype=np.uint32)
            #      target[:, target] = 1

            # Build Attack
            if FLAGS.attack_type.lower() == 'fgsm':
                fgsm_opts = {
                    'eps': 0.3,
                    'clip_min': 0,
                    'clip_max': 1.,
                    'y_target': None
                }
                fgsm = FastGradientMethod(model)
                x_adv = fgsm.generate(x_input, **fgsm_opts)

            elif FLAGS.attack_type.lower() == 'bim':
                bim_opts = {
                    'eps': 0.3,
                    'clip_min': 0.,
                    'clip_max': 1.,
                    'y_target': None
                }
                bim = BasicIterativeMethod(model)
                x_adv = bim.generate(x_input, **bim_opts)

            elif FLAGS.attack_type.lower() == 'mim':
                mim_opts = {'eps': 0.3, 'clip_min': 0, 'clip_max': 1.}
                mim = MomentumIterativeMethod(model)
                x_adv = mim.generate(x_input, **mim_opts)

            elif FLAGS.attack_type.lower() == 'pgd':
                pgd_opts = {'eps': 0.3, 'clip_min': 0, 'clip_max': 1.}
                pgd = MadryEtAl(model)
                x_adv = pgd.generate(x_input, **pgd_opts)

            # Broken
            elif FLAGS.attack_type.lower() == 'jsma':
                jsma_opts = {
                    'theta': 1.,
                    'gamma': 0.1,
                    'clip-min': 0.,
                    'clip-max': 1.,
                    'y_target': None
                }
                jsma = SaliencyMapMethod(model)
                x_adv = jsma.generate(x_input, **jsma_opts)

            elif FLAGS.attack_type.lower() == 'lbfgs':
                lbfgs_opts = {'y_target': target}
                lbfgs = LBFGS(model)
                x_adv = lbfgs.generate(x_input, **lbfgs_opts)

            else:
                raise ValueError('Invalid attack type specified: {}'.format(
                    FLAGS.attack_type))

            start_time, batch_time, num_processed = time.time(), time.time(), 0
            for filenames, images in load_images(FLAGS.input_dir, batch_shape):
                adv_images = sess.run(x_adv, feed_dict={x_input: images})
                save_images(adv_images, filenames, FLAGS.output_dir)

                if FLAGS.show_predictions:
                    preds = sess.run(model(np.float32(images)))
                    probs = np.amax(preds, axis=1)
                    classes = np.argmax(preds, axis=1)
                    adv_preds = sess.run(model(adv_images))
                    adv_probs = np.amax(adv_preds, axis=1)
                    adv_classes = np.argmax(adv_preds, axis=1)

                    for i, _ in enumerate(filenames):
                        print('\nOriginal: {:.2f}% ({})\nAdversarial: {:.2f}% ({})'.format( \
                          probs[i]*100, labels[str(classes[i])], adv_probs[i]*100, labels[str(adv_classes[i])]))

                time_delta = time.time() - batch_time
                batch_time = time.time()
                num_processed += len(filenames)
                print('[SPEED ESTIMATION] BatchRate={:.4f} Hz; AverageRate={:.4f} Hz'.format( \
                  (len(filenames) / time_delta * 1.0), ((num_processed * 1.0) / (batch_time - start_time))))
def mnist_attack(train_start=0,
                 train_end=60000,
                 test_start=0,
                 test_end=10000,
                 viz_enabled=True,
                 nb_epochs=6,
                 batch_size=128,
                 nb_filters=64,
                 nb_samples=10,
                 learning_rate=0.001,
                 eps=0.3,
                 attack=0,
                 attack_iterations=100,
                 model_path=None,
                 targeted=False,
                 binary=False,
                 scale=False,
                 rand=False,
                 debug=None,
                 test=False,
                 data_dir=None,
                 delay=0,
                 adv=0,
                 nb_iter=40):
    """
    MNIST tutorial for generic attack
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param viz_enabled: (boolean) activate plots of adversarial examples
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param nb_classes: number of output classes
    :param nb_samples: number of test inputs to attack
    :param learning_rate: learning rate for training
    :param model_path: path to the model file
    :param targeted: should we run a targeted attack? or untargeted?
    :return: an AccuracyReport object
    """
    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # MNIST-specific dimensions
    img_rows = 28
    img_cols = 28
    channels = 1
    nb_classes = 10

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1237)

    # Create TF session
    sess = tf.Session()
    print("Created TensorFlow session.")

    if debug:
        set_log_level(logging.DEBUG)
    else:
        set_log_level(logging.WARNING)  # for running on sharcnet

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_mnist(datadir=data_dir,
                                                  train_start=train_start,
                                                  train_end=train_end,
                                                  test_start=test_start,
                                                  test_end=test_end)

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, channels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    phase = tf.placeholder(tf.bool, name='phase')

    # for attempting to break unscaled network.
    logits_scalar = tf.placeholder_with_default(INIT_T,
                                                shape=(),
                                                name="logits_temperature")

    save = False
    train_from_scratch = False
    if model_path is not None:
        if os.path.exists(model_path):
            # check for existing model in immediate subfolder
            if any(f.endswith('.meta') for f in os.listdir(model_path)):
                binary, scale, nb_filters, batch_size, learning_rate, nb_epochs, adv = parse_model_settings(
                    model_path)
                train_from_scratch = False
            else:
                model_path = build_model_save_path(model_path, binary,
                                                   batch_size, nb_filters,
                                                   learning_rate, nb_epochs,
                                                   adv, delay, scale)
                print(model_path)
                save = True
                train_from_scratch = True
    else:
        train_from_scratch = True  # train from scratch, but don't save since no path given

    # Define TF model graph
    if binary:
        print('binary=True')
        if scale:
            print('scale=True')
            if rand:
                print('rand=True')
                from cleverhans_tutorials.tutorial_models import make_scaled_binary_rand_cnn
                model = make_scaled_binary_rand_cnn(
                    phase,
                    logits_scalar,
                    'binsc_',
                    input_shape=(None, img_rows, img_cols, channels),
                    nb_filters=nb_filters)
            else:
                from cleverhans_tutorials.tutorial_models import make_scaled_binary_cnn
                model = make_scaled_binary_cnn(phase,
                                               logits_scalar,
                                               'binsc_',
                                               input_shape=(None, img_rows,
                                                            img_cols,
                                                            channels),
                                               nb_filters=nb_filters)
        else:
            from cleverhans_tutorials.tutorial_models import make_basic_binary_cnn
            model = make_basic_binary_cnn(phase,
                                          logits_scalar,
                                          'bin_',
                                          nb_filters=nb_filters)
    else:
        if rand:
            print('rand=True')
            from cleverhans_tutorials.tutorial_models import make_scaled_rand_cnn
            model = make_scaled_rand_cnn(phase,
                                         logits_scalar,
                                         'fp_rand',
                                         nb_filters=nb_filters)
        else:
            from cleverhans_tutorials.tutorial_models import make_basic_cnn
            model = make_basic_cnn(phase,
                                   logits_scalar,
                                   'fp_',
                                   nb_filters=nb_filters)

    preds = model(x, reuse=False)  # * logits_scalar
    print("Defined TensorFlow model graph.")

    ###########################################################################
    # Training the model using TensorFlow
    ###########################################################################
    rng = np.random.RandomState([2017, 8, 30])

    # Train an MNIST model
    train_params = {
        'binary': binary,
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'loss_name': 'train loss',
        'filename': 'model',
        'reuse_global_step': False,
        'train_scope': 'train',
        'is_training': True
    }

    if adv != 0:
        if adv == ADVERSARIAL_TRAINING_MADRYETAL:
            from cleverhans.attacks import MadryEtAl
            train_attack_params = {
                'eps': MAX_EPS,
                'eps_iter': 0.01,
                'nb_iter': nb_iter
            }
            train_attacker = MadryEtAl(model, sess=sess)

        elif adv == ADVERSARIAL_TRAINING_FGSM:
            from cleverhans.attacks import FastGradientMethod
            stddev = int(np.ceil((MAX_EPS * 255) // 2))
            train_attack_params = {
                'eps':
                tf.abs(
                    tf.truncated_normal(shape=(batch_size, 1, 1, 1),
                                        mean=0,
                                        stddev=stddev))
            }
            train_attacker = FastGradientMethod(model, back='tf', sess=sess)
        # create the adversarial trainer
        train_attack_params.update({'clip_min': 0., 'clip_max': 1.})
        adv_x_train = train_attacker.generate(x, phase, **train_attack_params)
        preds_adv_train = model.get_probs(adv_x_train)

        eval_attack_params = {'eps': MAX_EPS, 'clip_min': 0., 'clip_max': 1.}
        adv_x_eval = train_attacker.generate(x, phase, **eval_attack_params)
        preds_adv_eval = model.get_probs(adv_x_eval)  # * logits_scalar

    def evaluate():
        # Evaluate the accuracy of the MNIST model on clean test examples
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess,
                         x,
                         y,
                         preds,
                         X_test,
                         Y_test,
                         phase=phase,
                         args=eval_params)
        report.clean_train_clean_eval = acc
        assert X_test.shape[0] == test_end - test_start, X_test.shape
        print('Test accuracy on legitimate examples: %0.4f' % acc)

        if adv != 0:
            # Accuracy of the adversarially trained model on adversarial
            # examples
            acc = model_eval(sess,
                             x,
                             y,
                             preds_adv_eval,
                             X_test,
                             Y_test,
                             phase=phase,
                             args=eval_params)
            print('Test accuracy on adversarial examples: %0.4f' % acc)

            acc = model_eval(sess,
                             x,
                             y,
                             preds_adv_eval,
                             X_test,
                             Y_test,
                             phase=phase,
                             args=eval_params,
                             feed={logits_scalar: ATTACK_T})
            print('Test accuracy on adversarial examples (scaled): %0.4f' %
                  acc)

    if train_from_scratch:
        if save:
            train_params.update({'log_dir': model_path})
            if adv and delay > 0:
                train_params.update({'nb_epochs': delay})

        # do clean training for 'nb_epochs' or 'delay' epochs
        if test:
            model_train(sess,
                        x,
                        y,
                        preds,
                        X_train,
                        Y_train,
                        phase=phase,
                        evaluate=evaluate,
                        args=train_params,
                        save=save,
                        rng=rng)
        else:
            model_train(sess,
                        x,
                        y,
                        preds,
                        X_train,
                        Y_train,
                        phase=phase,
                        args=train_params,
                        save=save,
                        rng=rng)

        # optionally do additional adversarial training
        if adv:
            print("Adversarial training for %d epochs" % (nb_epochs - delay))
            train_params.update({'nb_epochs': nb_epochs - delay})
            train_params.update({'reuse_global_step': True})
            if test:
                model_train(sess,
                            x,
                            y,
                            preds,
                            X_train,
                            Y_train,
                            phase=phase,
                            predictions_adv=preds_adv_train,
                            evaluate=evaluate,
                            args=train_params,
                            save=save,
                            rng=rng)
            else:
                model_train(sess,
                            x,
                            y,
                            preds,
                            X_train,
                            Y_train,
                            phase=phase,
                            predictions_adv=preds_adv_train,
                            args=train_params,
                            save=save,
                            rng=rng)
    else:
        tf_model_load(sess, model_path)
        print('Restored model from %s' % model_path)
        evaluate()

    # Evaluate the accuracy of the MNIST model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    accuracy = model_eval(sess,
                          x,
                          y,
                          preds,
                          X_test,
                          Y_test,
                          phase=phase,
                          feed={phase: False},
                          args=eval_params)
    assert X_test.shape[0] == test_end - test_start, X_test.shape
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))
    report.clean_train_clean_eval = accuracy

    ###########################################################################
    # Build dataset
    ###########################################################################
    if viz_enabled:
        assert nb_samples == nb_classes
        idxs = [
            np.where(np.argmax(Y_test, axis=1) == i)[0][0]
            for i in range(nb_classes)
        ]
        viz_rows = nb_classes if targeted else 2
        # Initialize our array for grid visualization
        grid_shape = (nb_classes, viz_rows, img_rows, img_cols, channels)
        grid_viz_data = np.zeros(grid_shape, dtype='f')

    if targeted:
        from cleverhans.utils import build_targeted_dataset
        if viz_enabled:
            from cleverhans.utils import grid_visual
            adv_inputs, true_labels, adv_ys = build_targeted_dataset(
                X_test, Y_test, idxs, nb_classes, img_rows, img_cols, channels)
        else:
            adv_inputs, true_labels, adv_ys = build_targeted_dataset(
                X_test, Y_test, np.arange(nb_samples), nb_classes, img_rows,
                img_cols, channels)
    else:
        if viz_enabled:
            from cleverhans.utils import pair_visual
            adv_inputs = X_test[idxs]
        else:
            adv_inputs = X_test[:nb_samples]

    ###########################################################################
    # Craft adversarial examples using generic approach
    ###########################################################################
    if targeted:
        att_batch_size = np.clip(nb_samples * (nb_classes - 1),
                                 a_max=MAX_BATCH_SIZE,
                                 a_min=1)
        nb_adv_per_sample = nb_classes - 1
        yname = "y_target"

    else:
        att_batch_size = np.minimum(nb_samples, MAX_BATCH_SIZE)
        nb_adv_per_sample = 1
        adv_ys = None
        yname = "y"

    print('Crafting ' + str(nb_samples) + ' * ' + str(nb_adv_per_sample) +
          ' adversarial examples')
    print("This could take some time ...")

    if attack == ATTACK_CARLINI_WAGNER_L2:
        print('Attack: CarliniWagnerL2')
        from cleverhans.attacks import CarliniWagnerL2
        attacker = CarliniWagnerL2(model, back='tf', sess=sess)
        attack_params = {
            'binary_search_steps': 1,
            'max_iterations': attack_iterations,
            'learning_rate': 0.1,
            'batch_size': att_batch_size,
            'initial_const': 10,
        }
    elif attack == ATTACK_JSMA:
        print('Attack: SaliencyMapMethod')
        from cleverhans.attacks import SaliencyMapMethod
        attacker = SaliencyMapMethod(model, back='tf', sess=sess)
        attack_params = {'theta': 1., 'gamma': 0.1}
    elif attack == ATTACK_FGSM:
        print('Attack: FastGradientMethod')
        from cleverhans.attacks import FastGradientMethod
        attacker = FastGradientMethod(model, back='tf', sess=sess)
        attack_params = {'eps': eps}
    elif attack == ATTACK_MADRYETAL:
        print('Attack: MadryEtAl')
        from cleverhans.attacks import MadryEtAl
        attacker = MadryEtAl(model, back='tf', sess=sess)
        attack_params = {'eps': eps, 'eps_iter': 0.01, 'nb_iter': nb_iter}
    elif attack == ATTACK_BASICITER:
        print('Attack: BasicIterativeMethod')
        from cleverhans.attacks import BasicIterativeMethod
        attacker = BasicIterativeMethod(model, back='tf', sess=sess)
        attack_params = {'eps': eps, 'eps_iter': 0.01, 'nb_iter': nb_iter}
    else:
        print("Attack undefined")
        sys.exit(1)

    attack_params.update({yname: adv_ys, 'clip_min': 0., 'clip_max': 1.})
    adv_np = attacker.generate_np(adv_inputs, phase, **attack_params)
    '''
    name = 'm_fgsm_eps%s_n%s.npy' % (eps, nb_samples)
    fpath = os.path.join(
        '/scratch/gallowaa/mnist/adversarial_examples/cleverhans/', name)
    np.savez(fpath, x=adv_np, y=Y_test[:nb_samples])
    '''
    '''
    adv_x = attacker.generate(x, phase, **attack_params)
    adv_np, = batch_eval(sess, [x], [adv_x], [adv_inputs], feed={
                         phase: False}, args=eval_params)
    '''
    eval_params = {'batch_size': att_batch_size}
    if targeted:
        print("Evaluating targeted results")
        adv_accuracy = model_eval(sess,
                                  x,
                                  y,
                                  preds,
                                  adv_np,
                                  true_labels,
                                  phase=phase,
                                  args=eval_params)

    else:
        print("Evaluating untargeted results")
        if viz_enabled:
            adv_accuracy = model_eval(sess,
                                      x,
                                      y,
                                      preds,
                                      adv_np,
                                      Y_test[idxs],
                                      phase=phase,
                                      args=eval_params)
        else:
            adv_accuracy = model_eval(sess,
                                      x,
                                      y,
                                      preds,
                                      adv_np,
                                      Y_test[:nb_samples],
                                      phase=phase,
                                      args=eval_params)

    if viz_enabled:
        n = nb_classes - 1
        for i in range(nb_classes):
            if targeted:
                for j in range(nb_classes):
                    if i != j:
                        if j != 0 and i != n:
                            grid_viz_data[i, j] = adv_np[j * n + i]
                        if j == 0 and i > 0 or i == n and j > 0:
                            grid_viz_data[i, j] = adv_np[j * n + i - 1]
                    else:
                        grid_viz_data[i, j] = adv_inputs[j * n]
            else:
                grid_viz_data[j, 0] = adv_inputs[j]
                grid_viz_data[j, 1] = adv_np[j]
        print(grid_viz_data.shape)

    print('--------------------------------------')

    # Compute the number of adversarial examples that were successfully found
    print('Test accuracy on adversarial examples {0:.4f}'.format(adv_accuracy))
    report.clean_train_adv_eval = 1. - adv_accuracy

    # Compute the average distortion introduced by the algorithm
    percent_perturbed = np.mean(
        np.sum((adv_np - adv_inputs)**2, axis=(1, 2, 3))**.5)
    print('Avg. L_2 norm of perturbations {0:.4f}'.format(percent_perturbed))

    # Compute number of modified features (L_0 norm)
    nb_changed = np.where(adv_np != adv_inputs)[0].shape[0]
    percent_perturb = np.mean(float(nb_changed) / adv_np.reshape(-1).shape[0])

    # Compute the average distortion introduced by the algorithm
    print('Avg. rate of perturbed features {0:.4f}'.format(percent_perturb))

    # Friendly output for pasting into spreadsheet
    print('{0:.4f}'.format(accuracy))
    print('{0:.4f}'.format(adv_accuracy))
    print('{0:.4f}'.format(percent_perturbed))
    print('{0:.4f}'.format(percent_perturb))

    # Close TF session
    sess.close()

    # Finally, block & display a grid of all the adversarial examples
    if viz_enabled:
        import matplotlib.pyplot as plt
        _ = grid_visual(grid_viz_data)

    return report
示例#15
0
def train(fgsm_eps, _dp_epsilon, _attack_norm_bound, log_filename, ratio):
    FLAGS = None

    #ratio = 16
    #target_eps = [0.125,0.25,0.5,1,2,4,8]
    #target_eps = [0.25 + 0.25*ratio]
    target_eps = [0.2 + 0.2 * ratio]
    #print(target_eps[0])
    #fgsm_eps = 0.1
    dp_epsilon = _dp_epsilon
    image_size = 28
    _log_filename = log_filename + str(target_eps[0]) + '_fgsm_' + str(
        fgsm_eps) + '_dpeps_' + str(dp_epsilon) + '_attack_norm_bound_' + str(
            _attack_norm_bound) + '.txt'

    clip_bound = 0.001  # 'the clip bound of the gradients'
    clip_bound_2 = 1 / 1.5  # 'the clip bound for r_kM'

    small_num = 1e-5  # 'a small number'
    large_num = 1e5  # a large number'
    num_images = 50000  # 'number of images N'

    batch_size = 125  # 'batch_size L'
    sample_rate = batch_size / 50000  # 'sample rate q = L / N'
    # 900 epochs
    num_steps = 1800000  # 'number of steps T = E * N / L = E / q'
    num_epoch = 24  # 'number of epoches E'

    sigma = 5  # 'sigma'
    delta = 1e-5  # 'delta'

    lambd = 1e3  # 'exponential distribution parameter'

    iterative_clip_step = 2  # 'iterative_clip_step'

    clip = 1  # 'whether to clip the gradient'
    noise = 0  # 'whether to add noise'
    redistribute = 0  # 'whether to redistribute the noise'

    D = 50000

    sess = tf.InteractiveSession()

    # Create the model
    x = tf.placeholder(tf.float32, [None, 784])
    y_ = tf.placeholder(tf.float32, [None, 10])
    keep_prob = tf.placeholder(tf.float32)

    W_conv1 = weight_variable([5, 5, 1, 32])
    b_conv1 = bias_variable([32])
    W_conv2 = weight_variable([5, 5, 32, 64])
    b_conv2 = bias_variable([64])
    W_fc1 = weight_variable([7 * 7 * 64, 25])
    b_fc1 = bias_variable([25])
    W_fc2 = weight_variable([25, 10])
    b_fc2 = bias_variable([10])

    def inference(x, dp_mult):
        x_image = tf.reshape(x, [-1, 28, 28, 1])
        h_conv1 = tf.nn.relu((conv2d(x_image, W_conv1) + b_conv1) + dp_mult)
        h_pool1 = max_pool_2x2(h_conv1)
        h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
        h_pool2 = max_pool_2x2(h_conv2)
        h_pool2_flat = tf.reshape(h_pool2, [-1, 7 * 7 * 64])
        h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1)
        h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

        y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
        return y_conv, h_conv1

    def inference_prob(x):
        logits, _ = inference(x, 0)
        y_prob = tf.nn.softmax(logits)
        return y_prob

    shape = W_conv1.get_shape().as_list()
    w_t = tf.reshape(W_conv1, [-1, shape[-1]])
    w = tf.transpose(w_t)
    sing_vals = tf.svd(w, compute_uv=False)
    sensitivityW = tf.reduce_max(sing_vals)
    dp_delta = 0.05
    attack_norm_bound = _attack_norm_bound
    dp_mult = attack_norm_bound * math.sqrt(
        2 * math.log(1.25 / dp_delta)) / dp_epsilon
    noise = tf.placeholder(tf.float32, [None, 28, 28, 32])

    #y_conv, h_conv1 = inference(x, dp_mult * noise)
    y_conv, h_conv1 = inference(x, attack_norm_bound * noise)
    softmax_y = tf.nn.softmax(y_conv)
    # Define loss and optimizer

    priv_accountant = accountant.GaussianMomentsAccountant(D)
    privacy_accum_op = priv_accountant.accumulate_privacy_spending(
        [None, None], sigma, batch_size)

    # sess.run(tf.initialize_all_variables())
    sess.run(tf.global_variables_initializer())

    cross_entropy = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y_conv))
    #train_step = tf.train.AdamOptimizer(1e-5).minimize(cross_entropy);
    #train_step = tf.train.AdamOptimizer(1e-5).minimize(cross_entropy)

    # noise redistribution #
    grad, = tf.gradients(cross_entropy, h_conv1)
    normalized_grad = tf.sign(grad)
    normalized_grad = tf.stop_gradient(normalized_grad)
    normalized_grad_r = tf.abs(tf.reduce_mean(normalized_grad, axis=(0)))
    #print(normalized_grad_r)
    sum_r = tf.reduce_sum(normalized_grad_r, axis=(0, 1, 2), keepdims=False)
    #print(sum_r)
    normalized_grad_r = 256 * 32 * normalized_grad_r / sum_r
    print(normalized_grad_r)

    shape_grad = normalized_grad_r.get_shape().as_list()
    grad_t = tf.reshape(normalized_grad_r, [-1, shape_grad[-1]])
    g = tf.transpose(grad_t)
    sing_g_vals = tf.svd(g, compute_uv=False)
    sensitivity_2 = tf.reduce_max(sing_g_vals)
    ########################

    opt = GradientDescentOptimizer(learning_rate=1e-1)

    # compute gradient
    gw_W1 = tf.gradients(cross_entropy, W_conv1)[0]  # gradient of W1
    gb1 = tf.gradients(cross_entropy, b_conv1)[0]  # gradient of b1

    gw_W2 = tf.gradients(cross_entropy, W_conv2)[0]  # gradient of W2
    gb2 = tf.gradients(cross_entropy, b_conv2)[0]  # gradient of b2

    gw_Wf1 = tf.gradients(cross_entropy, W_fc1)[0]  # gradient of W_fc1
    gbf1 = tf.gradients(cross_entropy, b_fc1)[0]  # gradient of b_fc1

    gw_Wf2 = tf.gradients(cross_entropy, W_fc2)[0]  # gradient of W_fc2
    gbf2 = tf.gradients(cross_entropy, b_fc2)[0]  # gradient of b_fc2

    # clip gradient
    gw_W1 = tf.clip_by_norm(gw_W1, clip_bound)
    gw_W2 = tf.clip_by_norm(gw_W2, clip_bound)
    gw_Wf1 = tf.clip_by_norm(gw_Wf1, clip_bound)
    gw_Wf2 = tf.clip_by_norm(gw_Wf2, clip_bound)

    # sigma = FLAGS.sigma # when comp_eps(lmbda,q,sigma,T,delta)==epsilon

    # sensitivity = 2 * FLAGS.clip_bound #adjacency matrix with one tuple different
    sensitivity = clip_bound  # adjacency matrix with one more tuple

    gw_W1 += tf.random_normal(shape=tf.shape(gw_W1),
                              mean=0.0,
                              stddev=(sigma * sensitivity)**2,
                              dtype=tf.float32)
    gb1 += tf.random_normal(shape=tf.shape(gb1),
                            mean=0.0,
                            stddev=(sigma * sensitivity)**2,
                            dtype=tf.float32)
    gw_W2 += tf.random_normal(shape=tf.shape(gw_W2),
                              mean=0.0,
                              stddev=(sigma * sensitivity)**2,
                              dtype=tf.float32)
    gb2 += tf.random_normal(shape=tf.shape(gb2),
                            mean=0.0,
                            stddev=(sigma * sensitivity)**2,
                            dtype=tf.float32)
    gw_Wf1 += tf.random_normal(shape=tf.shape(gw_Wf1),
                               mean=0.0,
                               stddev=(sigma * sensitivity)**2,
                               dtype=tf.float32)
    gbf1 += tf.random_normal(shape=tf.shape(gbf1),
                             mean=0.0,
                             stddev=(sigma * sensitivity)**2,
                             dtype=tf.float32)
    gw_Wf2 += tf.random_normal(shape=tf.shape(gw_Wf2),
                               mean=0.0,
                               stddev=(sigma * sensitivity)**2,
                               dtype=tf.float32)
    gbf2 += tf.random_normal(shape=tf.shape(gbf2),
                             mean=0.0,
                             stddev=(sigma * sensitivity)**2,
                             dtype=tf.float32)

    train_step = opt.apply_gradients([(gw_W1, W_conv1), (gb1, b_conv1),
                                      (gw_W2, W_conv2), (gb2, b_conv2),
                                      (gw_Wf1, W_fc1), (gbf1, b_fc1),
                                      (gw_Wf2, W_fc2), (gbf2, b_fc2)])

    # craft adversarial samples from x for testing
    #softmax_y_test = tf.nn.softmax(y_conv)

    #====================== attack =========================

    attack_switch = {
        'fgsm': True,
        'ifgsm': True,
        'deepfool': False,
        'mim': True,
        'spsa': False,
        'cwl2': False,
        'madry': True,
        'stm': False
    }

    # define cleverhans abstract models for using cleverhans attacks
    ch_model_logits = CallableModelWrapper(callable_fn=inference,
                                           output_layer='logits')
    ch_model_probs = CallableModelWrapper(callable_fn=inference_prob,
                                          output_layer='probs')

    # define each attack method's tensor
    attack_tensor_dict = {}
    # FastGradientMethod
    if attack_switch['fgsm']:
        print('creating attack tensor of FastGradientMethod')
        fgsm_obj = FastGradientMethod(model=ch_model_probs, sess=sess)
        x_adv_test_fgsm = fgsm_obj.generate(x=x,
                                            eps=fgsm_eps,
                                            clip_min=0.0,
                                            clip_max=1.0)  # testing now
        attack_tensor_dict['fgsm'] = x_adv_test_fgsm

    # Iterative FGSM (BasicIterativeMethod/ProjectedGradientMethod with no random init)
    # default: eps_iter=0.05, nb_iter=10
    if attack_switch['ifgsm']:
        print('creating attack tensor of BasicIterativeMethod')
        ifgsm_obj = BasicIterativeMethod(model=ch_model_probs, sess=sess)
        x_adv_test_ifgsm = ifgsm_obj.generate(x=x,
                                              eps=fgsm_eps,
                                              eps_iter=fgsm_eps / 10,
                                              nb_iter=10,
                                              clip_min=0.0,
                                              clip_max=1.0)
        attack_tensor_dict['ifgsm'] = x_adv_test_ifgsm

    # MomentumIterativeMethod
    # default: eps_iter=0.06, nb_iter=10
    if attack_switch['mim']:
        print('creating attack tensor of MomentumIterativeMethod')
        mim_obj = MomentumIterativeMethod(model=ch_model_probs, sess=sess)
        x_adv_test_mim = mim_obj.generate(x=x,
                                          eps=fgsm_eps,
                                          eps_iter=fgsm_eps / 10,
                                          nb_iter=10,
                                          decay_factor=1.0,
                                          clip_min=0.0,
                                          clip_max=1.0)
        attack_tensor_dict['mim'] = x_adv_test_mim

    # MadryEtAl (Projected Grdient with random init, same as rand+fgsm)
    # default: eps_iter=0.01, nb_iter=40
    if attack_switch['madry']:
        print('creating attack tensor of MadryEtAl')
        madry_obj = MadryEtAl(model=ch_model_probs, sess=sess)
        x_adv_test_madry = madry_obj.generate(x=x,
                                              eps=fgsm_eps,
                                              eps_iter=fgsm_eps / 10,
                                              nb_iter=10,
                                              clip_min=0.0,
                                              clip_max=1.0)
        attack_tensor_dict['madry'] = x_adv_test_madry

    #====================== attack =========================

    #Define the correct prediction and accuracy#
    correct_prediction_x = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
    accuracy_x = tf.reduce_mean(tf.cast(correct_prediction_x, tf.float32))

    correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    s = math.log(sqrt(2.0 / math.pi) * 1e+5)
    sigmaEGM = sqrt(2.0) * 1.0 * (sqrt(s) +
                                  sqrt(s + dp_epsilon)) / (2.0 * dp_epsilon)
    print(sigmaEGM)
    __noiseE = np.random.normal(0.0, sigmaEGM**2,
                                28 * 28 * 32).astype(np.float32)
    __noiseE = np.reshape(__noiseE, [-1, 28, 28, 32])

    start_time = time.time()
    logfile = open(_log_filename, 'w')
    last_eval_time = -1
    accum_time = 0
    accum_epoch = 0
    max_benign_acc = -1
    max_adv_acc_dict = {}
    test_size = len(mnist.test.images)
    print("Computing The Noise Redistribution Vector")
    for i in range(4000):
        batch = mnist.train.next_batch(batch_size)
        sess.run([train_step],
                 feed_dict={
                     x: batch[0],
                     y_: batch[1],
                     keep_prob: 0.5,
                     noise: __noiseE * 0
                 })
    batch = mnist.train.next_batch(batch_size * 10)
    grad_redis = sess.run([normalized_grad_r],
                          feed_dict={
                              x: batch[0],
                              y_: batch[1],
                              keep_prob: 1.0,
                              noise: __noiseE * 0
                          })
    #print(grad_redis)
    _sensitivity_2 = sess.run([sensitivity_2],
                              feed_dict={
                                  x: batch[0],
                                  y_: batch[1],
                                  keep_prob: 1.0,
                                  noise: __noiseE * 0
                              })
    #print(_sensitivity_2)

    _sensitivityW = sess.run(sensitivityW)
    #print(_sensitivityW)
    Delta_redis = _sensitivityW / sqrt(_sensitivity_2[0])
    #print(Delta_redis)
    sigmaHGM = sqrt(2.0) * Delta_redis * (sqrt(s) + sqrt(s + dp_epsilon)) / (
        2.0 * dp_epsilon)
    #print(sigmaHGM)
    __noiseH = np.random.normal(0.0, sigmaHGM**2,
                                28 * 28 * 32).astype(np.float32)
    __noiseH = np.reshape(__noiseH, [-1, 28, 28, 32]) * grad_redis

    sess.run(tf.global_variables_initializer())
    print("Training")
    for i in range(num_steps):
        batch = mnist.train.next_batch(batch_size)
        sess.run(
            [train_step],
            feed_dict={
                x: batch[0],
                y_: batch[1],
                keep_prob: 0.5,
                noise: (__noiseE + __noiseH) / 2
            })
        sess.run([privacy_accum_op])
        spent_eps_deltas = priv_accountant.get_privacy_spent(
            sess, target_eps=target_eps)
        if i % 1000 == 0:
            print(i, spent_eps_deltas)
        _break = False
        for _eps, _delta in spent_eps_deltas:
            if _delta >= delta:
                _break = True
                break
        if _break == True:
            break
    print("Testing")
    benign_acc = accuracy_x.eval(
        feed_dict={
            x: mnist.test.images,
            y_: mnist.test.labels,
            keep_prob: 1.0,
            noise: (__noiseE + __noiseH) / 2
        })
    ### PixelDP Robustness ###
    adv_acc_dict = {}
    robust_adv_acc_dict = {}
    robust_adv_utility_dict = {}
    for atk in attack_switch.keys():
        if atk not in adv_acc_dict:
            adv_acc_dict[atk] = -1
            robust_adv_acc_dict[atk] = -1
            robust_adv_utility_dict[atk] = -1

        if attack_switch[atk]:
            adv_images_dict = sess.run(attack_tensor_dict[atk],
                                       feed_dict={
                                           x: mnist.test.images,
                                           y_: mnist.test.labels,
                                           keep_prob: 1.0
                                       })
            #grad_redis = sess.run([normalized_grad_r], feed_dict={x: adv_images_dict, y_: mnist.test.labels, keep_prob: 1.0, noise:__noise})
            ### Robustness ###
            predictions_form_argmax = np.zeros([test_size, 10])
            softmax_predictions = softmax_y.eval(
                feed_dict={
                    x: adv_images_dict,
                    keep_prob: 1.0,
                    noise: (__noiseE + __noiseH) / 2
                })
            argmax_predictions = np.argmax(softmax_predictions, axis=1)
            for n_draws in range(0, 2000):
                if n_draws % 1000 == 0:
                    print(n_draws)
                _noiseE = np.random.normal(0.0, sigmaEGM**2,
                                           28 * 28 * 32).astype(np.float32)
                _noiseE = np.reshape(_noiseE, [-1, 28, 28, 32])
                _noise = np.random.normal(0.0, sigmaHGM**2,
                                          28 * 28 * 32).astype(np.float32)
                _noise = np.reshape(_noise, [-1, 28, 28, 32]) * grad_redis
                for j in range(test_size):
                    pred = argmax_predictions[j]
                    predictions_form_argmax[j, pred] += 1
                softmax_predictions = softmax_y.eval(
                    feed_dict={
                        x: adv_images_dict,
                        keep_prob: 1.0,
                        noise: (__noiseE + __noiseH) / 2 +
                        (_noiseE + _noise) / 4
                    })
                argmax_predictions = np.argmax(softmax_predictions, axis=1)
            final_predictions = predictions_form_argmax
            is_correct = []
            is_robust = []
            for j in range(test_size):
                is_correct.append(
                    np.argmax(mnist.test.labels[j]) == np.argmax(
                        final_predictions[j]))
                robustness_from_argmax = robustnessGGaussian.robustness_size_argmax(
                    counts=predictions_form_argmax[j],
                    eta=0.05,
                    dp_attack_size=fgsm_eps,
                    dp_epsilon=dp_epsilon,
                    dp_delta=1e-5,
                    dp_mechanism='gaussian') / dp_mult
                is_robust.append(robustness_from_argmax >= fgsm_eps)
            adv_acc_dict[atk] = np.sum(is_correct) * 1.0 / test_size
            robust_adv_acc_dict[atk] = np.sum([
                a and b for a, b in zip(is_robust, is_correct)
            ]) * 1.0 / np.sum(is_robust)
            robust_adv_utility_dict[atk] = np.sum(is_robust) * 1.0 / test_size
            print(" {}: {:.4f} {:.4f} {:.4f} {:.4f}".format(
                atk, adv_acc_dict[atk], robust_adv_acc_dict[atk],
                robust_adv_utility_dict[atk],
                robust_adv_acc_dict[atk] * robust_adv_utility_dict[atk]))
            ##############################
    log_str = "step: {}\t target_epsilon: {}\t dp_epsilon: {:.1f}\t attack_norm_bound: {:.1f}\t benign_acc: {:.4f}\t".format(
        i, target_eps, dp_epsilon, attack_norm_bound, benign_acc)
    for atk in attack_switch.keys():
        if attack_switch[atk]:
            log_str += " {}: {:.4f} {:.4f} {:.4f} {:.4f}".format(
                atk, adv_acc_dict[atk], robust_adv_acc_dict[atk],
                robust_adv_utility_dict[atk],
                robust_adv_acc_dict[atk] * robust_adv_utility_dict[atk])
    print(log_str)
    logfile.write(log_str + '\n')
    ##############################
    duration = time.time() - start_time
    logfile.write(str(duration) + '\n')
    logfile.flush()
    logfile.close()
示例#16
0
    def setUp(self):
        super(TestMadryEtAl, self).setUp()

        self.sess = tf.Session()
        self.model = SimpleModel()
        self.attack = MadryEtAl(self.model, sess=self.sess)
def attack_batch(model, in_im, net_name, attack_name, im_list, gt_labels, sample_size, batch_size):
    logging.basicConfig(filename='Logs/'+net_name+"_"+attack_name+'.log', level=logging.INFO,
    format='%(asctime)s:%(levelname)s:%(message)s')
    config = tf.ConfigProto(device_count = {'GPU': 2})
    imgs = open(im_list).readlines()  # [::10]
    gt_labels = open(gt_labels).readlines()  # [::10]
    top_1 = 0;top_1_real = 0;fool_rate = 0
    isotropic, size = get_params(net_name)
    imageModel = CallableModelWrapper(model, 'logits')

    
    with tf.Session(config=config) as sess:
        if attack_name=='fgsm':
            attack = FastGradientMethod(imageModel, back='tf')
            adv_x = attack.generate(in_im,eps=8,clip_min=-124, clip_max=155)
        if attack_name=='ifgsm':
            attack = BasicIterativeMethod(imageModel, back='tf')
            adv_x = attack.generate(in_im,eps=8,eps_iter=1,nb_iter=12,clip_min=-124, clip_max=155)
        if attack_name=='cw2':
            attack = CarliniWagnerL2(imageModel, back='tf')
            adv_x = attack.generate(in_im,clip_min=-124, clip_max=155)
        if attack_name=='jsma':
            attack = SaliencyMapMethod(imageModel, back='tf')
            adv_x = attack.generate(in_im)
        if attack_name=='pgd':
            attack = MadryEtAl(imageModel, back='tf')
            adv_x = attack.generate(in_im,eps=8,eps_iter=1,nb_iter=12,clip_min=-124, clip_max=155)
        if attack_name=='deepfool':
            attack = DeepFool(imageModel, back='tf')
            adv_x = attack.generate(in_im, sess=sess, clip_min=-124, clip_max=155)
        
        sess.run(tf.global_variables_initializer())
        img_loader = loader_func(net_name, sess, isotropic, size)
        batch_im = np.zeros((batch_size, size, size, 3))
        
        for i in range(sample_size/batch_size):
            lim = min(batch_size, len(imgs)-i*batch_size)
            for j in range(lim):
                im = img_loader(imgs[i*batch_size+j].strip())
                batch_im[j] = np.copy(im)
            gt = np.array([int(gt_labels[i*batch_size+j].strip())
                       for j in range(lim)])
            adv_x_np=adv_x.eval(feed_dict={in_im: batch_im})

            # Calculate the neural probabilities
            y_adv_prob=tf.nn.softmax(model(in_im), name="yadv").eval(feed_dict={in_im: adv_x_np}); y_adv = np.argmax(y_adv_prob,1)
            y_true_prob=tf.nn.softmax(model(in_im), name="ypred").eval(feed_dict={in_im: batch_im}); y_true =  np.argmax(y_true_prob,1)

            # Calculate the top-1, top-1-true accuracies and fooling rate
            top_1 += np.sum(y_adv == gt); top_1_real += np.sum(y_true == gt)
            fool_rate += np.sum(y_true != y_adv )
            

            if i != 0 and i % 2 == 0:
                logging.info("batch: {} ==================================================================".format(i))
                logging.info("fooling rate {}".format((fool_rate)/float((i+1)*batch_size)*100))
            

    logging.info("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")           
    logging.info('Real Top-1 Accuracy = {}'.format(
    top_1_real/float(sample_size)*100))
    logging.info('Top-1 Accuracy = {}'.format((top_1/float(sample_size)*100)))
    logging.info('Top-1 Fooling Rate = {}'.format(fool_rate/float(sample_size)*100))
    logging.info("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++") 
示例#18
0
# # cw_params = {'binary_search_steps': 1,
# #              'max_iterations': attack_iterations,
# #              'learning_rate': 0.1,
# #              'batch_size': n_attack,
# #              'initial_const': 10}
# cw = CarliniWagnerL2(wrap_clf, back='tf', sess=sess)
# adv = cw.generate_np(X_atk, **cw_params)

from cleverhans.attacks import MadryEtAl
pgd_params = {'eps': 0.3,
              'eps_iter': 0.01,
              'nb_iter': 40,
              'clip_min': 0.,
              'clip_max': 1.,
              'rand_init': True}
pgd = MadryEtAl(wrap_clf, sess=sess)
adv = pgd.generate_np(X_atk, **pgd_params)

# adv_x = cw.generate(x, **cw_params)
# preds_adv = clf(adv_x)
# acc = model_eval(sess, x, y, preds_adv, X_test[:n_attack],
#                  y_test[:n_attack], args={'batch_size': n_attack})
# print('Test accuracy on CW adversarial examples: %0.4f\n' % acc)

pred = clf.predict(adv)
# print(np.sum(np.argmax(pred, axis=1) != np.argmax(y_test[:n_attack], axis=1)))
# pred_orig = clf.predict(X_atk)
# print(np.sum(np.argmax(pred, axis=1) != np.argmax(pred_orig, axis=1)))
print(np.sum(np.argmax(pred, axis=1) == np.argmax(y_target, axis=1)))

# Save some images
def prep_bbox(sess,
              logits_scalar,
              x,
              y,
              X_train,
              Y_train,
              X_test,
              Y_test,
              img_rows,
              img_cols,
              channels,
              nb_epochs,
              batch_size,
              learning_rate,
              rng,
              phase=None,
              binary=False,
              scale=False,
              nb_filters=64,
              model_path=None,
              adv=0,
              delay=0,
              eps=0.3):
    """
    Define and train a model that simulates the "remote"
    black-box oracle described in the original paper.
    :param sess: the TF session
    :param x: the input placeholder for MNIST
    :param y: the ouput placeholder for MNIST
    :param X_train: the training data for the oracle
    :param Y_train: the training labels for the oracle
    :param X_test: the testing data for the oracle
    :param Y_test: the testing labels for the oracle
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param rng: numpy.random.RandomState
    :return:
    """

    # Define TF model graph (for the black-box model)
    save = False
    train_from_scratch = False

    if model_path is not None:
        if os.path.exists(model_path):
            # check for existing model in immediate subfolder
            if any(f.endswith('.meta') for f in os.listdir(model_path)):
                binary, scale, nb_filters, batch_size, learning_rate, nb_epochs, adv = parse_model_settings(
                    model_path)
                train_from_scratch = False
            else:
                model_path = build_model_save_path(model_path, binary,
                                                   batch_size, nb_filters,
                                                   learning_rate, nb_epochs,
                                                   adv, delay, scale)
                print(model_path)
                save = True
                train_from_scratch = True
    else:
        train_from_scratch = True  # train from scratch, but don't save since no path given

    if binary:
        if scale:
            #from cleverhans_tutorials.tutorial_models import make_scaled_binary_cnn
            # model = make_scaled_binary_cnn(phase, 'bb_binsc_', input_shape=(
            from cleverhans_tutorials.tutorial_models import make_scaled_binary_rand_cnn
            model = make_scaled_binary_rand_cnn(phase,
                                                logits_scalar,
                                                'bb_binsc_',
                                                input_shape=(None, img_rows,
                                                             img_cols,
                                                             channels),
                                                nb_filters=nb_filters)
        else:
            from cleverhans_tutorials.tutorial_models import make_basic_binary_cnn
            model = make_basic_binary_cnn(phase,
                                          logits_scalar,
                                          'bb_bin_',
                                          input_shape=(None, img_rows,
                                                       img_cols, channels),
                                          nb_filters=nb_filters)
    else:
        from cleverhans_tutorials.tutorial_models import make_basic_cnn
        model = make_basic_cnn(phase,
                               logits_scalar,
                               'bb_fp_',
                               input_shape=(None, img_rows, img_cols,
                                            channels),
                               nb_filters=nb_filters)

    preds = model(x, reuse=False)
    print("Defined TensorFlow model graph.")

    def evaluate():
        # Print out the accuracy on legitimate data
        eval_params = {'batch_size': batch_size}
        acc = model_eval(sess,
                         x,
                         y,
                         preds,
                         X_test,
                         Y_test,
                         phase=phase,
                         args=eval_params)
        print('Test accuracy of black-box on legitimate test '
              'examples: %.4f' % acc)

    # Train an MNIST model
    train_params = {
        'binary': binary,
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'loss_name': 'bb train loss',
        'filename': 'bb_model',
        'train_scope': 'bb_model',
        'reuse_global_step': False,
        'is_training': True
    }

    if adv != 0:
        if adv == ADVERSARIAL_TRAINING_MADRYETAL:
            from cleverhans.attacks import MadryEtAl
            nb_iter = 20
            train_attack_params = {
                'eps': MAX_EPS,
                'eps_iter': 0.01,
                'nb_iter': nb_iter
            }
            train_attacker = MadryEtAl(model, sess=sess)

        if adv == ADVERSARIAL_TRAINING_FGSM:
            from cleverhans.attacks import FastGradientMethod
            train_attacker = FastGradientMethod(model, back='tf', sess=sess)

        # create the adversarial trainer
        train_attack_params.update({'clip_min': 0., 'clip_max': 1.})
        adv_x_train = train_attacker.generate(x, phase, **train_attack_params)
        preds_adv = model.get_probs(adv_x_train)

    if train_from_scratch:
        if save:
            train_params.update({'log_dir': model_path})
            if adv and delay > 0:
                train_params.update({'nb_epochs': delay})

        # do clean training for 'nb_epochs' or 'delay' epochs
        model_train(sess,
                    x,
                    y,
                    preds,
                    X_train,
                    Y_train,
                    phase=phase,
                    evaluate=evaluate,
                    args=train_params,
                    save=save,
                    rng=rng)

        # optionally do additional adversarial training
        if adv:
            print("Adversarial training for %d epochs" % (nb_epochs - delay))
            train_params.update({'nb_epochs': nb_epochs - delay})
            train_params.update({'reuse_global_step': True})
            model_train(sess,
                        x,
                        y,
                        preds,
                        X_train,
                        Y_train,
                        phase=phase,
                        predictions_adv=preds_adv,
                        evaluate=evaluate,
                        args=train_params,
                        save=save,
                        rng=rng)
    else:
        tf_model_load(sess, model_path)
        print('Restored model from %s' % model_path)

    accuracy = evaluate()

    return model, preds, accuracy, model_path
示例#20
0
def PDP_resnet_with_pretrain_adv(TIN_data, resnet_params, train_params, params_to_save):
  # dict for encoding layer variables and output layer variables
  pre_define_vars = {}

  # list of variables to train
  train_vars = []
  pretrain_vars = []

  with tf.Graph().as_default(), tf.device('/cpu:0'):
    global_step = tf.Variable(0, trainable=False)
    
    # Parameters Declarification
    ######################################
    
    # encoding (pretrain) layer variables
    with tf.variable_scope('enc_layer', reuse=tf.AUTO_REUSE) as scope:
      kernel1 = tf.get_variable('kernel1', shape=[train_params.enc_kernel_size, train_params.enc_kernel_size, 
                                3, train_params.enc_filters], dtype=tf.float32, 
                                initializer=tf.contrib.layers.xavier_initializer_conv2d())
      biases1 = tf.get_variable('biases1', shape=[train_params.enc_filters], dtype=tf.float32, 
                                initializer=tf.constant_initializer(0.0))
    pre_define_vars['kernel1'] = kernel1
    pre_define_vars['biases1'] = biases1 
    train_vars.append(kernel1)
    train_vars.append(biases1)
    pretrain_vars.append(kernel1)
    pretrain_vars.append(biases1)

    shape     = kernel1.get_shape().as_list()
    w_t       = tf.reshape(kernel1, [-1, shape[-1]])
    w         = tf.transpose(w_t)
    sing_vals = tf.svd(w, compute_uv=False)
    sensitivity = tf.reduce_max(sing_vals)
    gamma = 2*train_params.Delta2/(train_params.effective_batch_size * sensitivity)
    print('gamma: {}'.format(gamma))
    
    # output layer variables
    with tf.variable_scope('fc2', reuse=tf.AUTO_REUSE) as scope:
      stdv = 1.0 / math.sqrt(train_params.hk)
      final_w = tf.get_variable('kernel', shape=[train_params.hk, train_params.num_classes], dtype=tf.float32, 
                                initializer=tf.random_uniform_initializer(-stdv, stdv))
      final_b = tf.get_variable('bias', shape=[train_params.num_classes], dtype=tf.float32, 
                                initializer=tf.constant_initializer(0.0))
    pre_define_vars['final_w'] = final_w
    pre_define_vars['final_b'] = final_b 
    train_vars.append(final_w)
    train_vars.append(final_b)
    ######################################
    
    # Build a Graph that computes the logits predictions from the inputs
    ######################################
    # input placeholders
    x_sb = tf.placeholder(tf.float32, [None,train_params.image_size,train_params.image_size,3], name='x_sb') # input is the bunch of n_batchs
    x_sb_adv = tf.placeholder(tf.float32, [None,train_params.image_size,train_params.image_size,3], name='x_sb_adv')
    x_test = tf.placeholder(tf.float32, [None,train_params.image_size,train_params.image_size,3], name='x_test')

    y_sb = tf.placeholder(tf.float32, [None, train_params.num_classes], name='y_sb') # input is the bunch of n_batchs (super batch)
    y_sb_adv = tf.placeholder(tf.float32, [None, train_params.num_classes], name='y_sb_adv')
    y_test = tf.placeholder(tf.float32, [None, train_params.num_classes], name='y_test')

    FM_h = tf.placeholder(tf.float32, [None, train_params.enc_h_size, train_params.enc_h_size, train_params.enc_filters], name='FM_h') # one time
    noise = tf.placeholder(tf.float32, [None, train_params.image_size, train_params.image_size, 3], name='noise') # one time
    adv_noise = tf.placeholder(tf.float32, [None, train_params.image_size, train_params.image_size, 3], name='adv_noise') # one time

    learning_rate = tf.placeholder(tf.float32, shape=(), name='learning_rate')
    keep_prob = tf.placeholder(tf.float32, shape=(), name='keep_prob')

    # list of grads for each GPU
    tower_pretrain_grads = []
    tower_train_grads = []
    all_train_loss = []

    # optimizers
    pretrain_opt = tf.train.AdamOptimizer(learning_rate)
    train_opt = tf.train.AdamOptimizer(learning_rate)

    # model and loss on one GPU
    with tf.device('/gpu:{}'.format(GPU_IDX[0])):
      # setup encoding layer training
      with tf.variable_scope('enc_layer', reuse=tf.AUTO_REUSE) as scope:
        Enc_Layer2 = EncLayer(inpt=x_sb, n_filter_in=None, n_filter_out=None, filter_size=None, 
                              W=kernel1, b=biases1, activation=tf.nn.relu)
        pretrain_adv = Enc_Layer2.get_train_ops2(xShape=tf.shape(x_sb_adv)[0], Delta=train_params.Delta2, 
                                                epsilon=train_params.epsilon2, batch_size=None, learning_rate=None,
                                                W=kernel1, b=biases1, perturbFMx=adv_noise, perturbFM_h=FM_h)
        Enc_Layer3 = EncLayer(inpt=x_sb, n_filter_in=None, n_filter_out=None, filter_size=None, 
                              W=kernel1, b=biases1, activation=tf.nn.relu)
        pretrain_benign = Enc_Layer3.get_train_ops2(xShape=tf.shape(x_sb)[0], Delta=train_params.Delta2, 
                                                    epsilon=train_params.epsilon2, batch_size=None, learning_rate=None,
                                                    W=kernel1, b=biases1, perturbFMx=noise, perturbFM_h=FM_h)
        pretrain_cost = tf.reduce_mean(pretrain_adv + pretrain_benign)
      print_var('pretrain_cost', pretrain_cost)
      
      # use standard loss first
      y_logits = inference(x_sb + noise, FM_h, keep_prob, pre_define_vars, resnet_params, train_params)
      y_softmax = tf.nn.softmax(y_logits)

      y_logits_adv = inference(x_sb_adv + adv_noise, FM_h, keep_prob, pre_define_vars, resnet_params, train_params)
      y_softmax_adv = tf.nn.softmax(y_logits_adv)

      # taylor exp
      # TODO: use noise here
      perturbW = train_params.perturbFM * final_w
      # train_loss = TaylorExp_no_noise(y_softmax, y_sb, y_softmax_adv, y_sb_adv, 
      #                        train_params.effective_batch_size, train_params.alpha)
      train_loss = TaylorExp(y_softmax, y_sb, y_softmax_adv, y_sb_adv, 
                             train_params.effective_batch_size, train_params.alpha, perturbW)
      print_var('train_loss', train_loss)
      all_train_loss.append(train_loss)
    
    # split testing in each gpu
    x_sb_tests = tf.split(x_sb, N_ALL_GPUS, axis=0)
    y_softmax_test_list = []
    for gpu in range(N_ALL_GPUS):
      with tf.device('/gpu:{}'.format(gpu)):
        # testing graph now in each gpu
        y_logits_test = test_inference(x_sb_tests[gpu] + noise, FM_h, keep_prob, pre_define_vars, resnet_params, train_params)
        y_softmax_test_list.append(tf.nn.softmax(y_logits_test))
    y_softmax_test_concat = tf.concat(y_softmax_test_list, axis=0)

    print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$')
    all_vars = tf.global_variables()
    print_var_list('all vars', all_vars)
    print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$')

    # add selected vars into trainable variable list
    # ('res4' in var.name and ('gamma' in var.name or 'beta' in var.name)) or
    for var in tf.global_variables():
      if 'resnet_model' in var.name and \
        ('conv0' in var.name or 
        'fc' in var.name or 
        'res3' in var.name or 
        'res4' in var.name or 
        'res1' in var.name or 
        'res2' in var.name) and \
          ('gamma' in var.name or 
            'beta' in var.name or 
            'kernel' in var.name or
            'bias' in var.name):
        if var not in train_vars:
          train_vars.append(var)
      elif 'enc_layer' in var.name and \
        ('kernel' in var.name or
          'bias' in var.name):
        if var not in pretrain_vars:
          pretrain_vars.append(var)
        if var not in train_vars:
          train_vars.append(var)
      elif 'enc_layer' in var.name and \
        ('gamma' in var.name or 
          'beta' in var.name):
        if var not in pretrain_vars:
          pretrain_vars.append(var)
    
    print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$')
    print_var_list('train_vars', train_vars)
    print_var_list('pretrain_vars', pretrain_vars)
    print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$')

    # op for compute grads on one gpu
    with tf.device('/gpu:{}'.format(GPU_IDX[0])):
      # get all update_ops (updates of moving averageand std) for batch normalizations
      update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
      print_op_list('update ops', update_ops)
      enc_update_ops = [op for op in update_ops if 'enc_layer' in op.name]
      print_op_list('enc layer update ops', enc_update_ops)

      # when the gradients are computed, update the batch_norm
      with tf.control_dependencies(enc_update_ops):
        pretrain_grads = pretrain_opt.compute_gradients(pretrain_cost, var_list=pretrain_vars)
        print('*********** pretrain_grads ***********')
        for x in pretrain_grads:
          print(x)
        print('**********************')
      with tf.control_dependencies(update_ops):
        train_grads = train_opt.compute_gradients(train_loss, var_list=train_vars)
        print('*********** train_grads ***********')
        for x in train_grads:
          print(x)
        print('**********************')
      avg_pretrain_grads = pretrain_grads
      avg_train_grads = train_grads
      
      # get averaged loss tensor for pretrain and train ops
      total_loss = tf.reduce_sum(tf.stack(all_train_loss))
      total_pretrain_loss = tf.reduce_mean(pretrain_cost)

    # prepare to save gradients for large batch
    pretrain_grads_save = [g for g,v in pretrain_grads]
    # print('*********** pretrain_grads_save ***********' + str(pretrain_grads_save) + '**********************')
    train_grads_save = [g for g,v in train_grads]
    # print('*********** train_grads_save ***********' + str(train_grads_save) + '**********************')
    pretrain_grads_shapes = [g.shape.as_list() for g in pretrain_grads_save]
    train_grads_shapes = [g.shape.as_list() for g in train_grads_save]

    # placeholders for importing saved gradients
    pretrain_grads_placeholders = []
    for g,v in pretrain_grads:
      pretrain_grads_placeholders.append(tf.placeholder(tf.float32, v.shape))

    train_grads_placeholders = []
    for g,v in train_grads:
      train_grads_placeholders.append(tf.placeholder(tf.float32, v.shape))

    # construct the (grad, var) list
    assemble_pretrain_grads = []
    for i in range(len(pretrain_vars)):
      assemble_pretrain_grads.append((pretrain_grads_placeholders[i], pretrain_vars[i]))
    
    assemble_train_grads = []
    for i in range(len(train_grads)):
      assemble_train_grads.append((train_grads_placeholders[i], train_vars[i]))
    
    # apply the saved gradients
    pretrain_op = pretrain_opt.apply_gradients(assemble_pretrain_grads, global_step=global_step)
    train_op = train_opt.apply_gradients(assemble_train_grads, global_step=global_step)
    ######################################

    # Create a saver.
    saver = tf.train.Saver(var_list=tf.all_variables(), max_to_keep=1000)
    
    # start a session with memory growth
    config = tf.ConfigProto(log_device_placement=False)
    config.gpu_options.allow_growth=True
    sess = tf.Session(config=config)
    print("session created")

    # get some initial values
    sess.run(kernel1.initializer)
    _gamma = sess.run(gamma)
    _gamma_x = train_params.Delta2 / train_params.effective_batch_size
    epsilon2_update = train_params.epsilon2/(1.0 + 1.0/_gamma + 1/_gamma_x)
    delta_r = train_params.fgsm_eps * (train_params.image_size ** 2)
    _sensitivityW = sess.run(sensitivity)
    delta_h = _sensitivityW*(train_params.enc_h_size ** 2)
    #dp_mult = (train_params.Delta2 / (train_params.effective_batch_size * epsilon2_update)) / (delta_r / train_params.dp_epsilon) + \
    #  (2 * train_params.Delta2 / (train_params.effective_batch_size * epsilon2_update))/(delta_h / train_params.dp_epsilon)
    dp_mult = (train_params.Delta2*train_params.dp_epsilon) / (train_params.effective_batch_size*epsilon2_update * (delta_h / 2 + delta_r))
    # save some valus for testing
    params_to_save['epsilon2_update'] = epsilon2_update
    params_to_save['dp_mult'] = dp_mult

    #######################################
    # ADV attacks
    #######################################

    # split input for attacks
    x_attacks = tf.split(x_sb, 3, axis=0) # split it into each batch
    
    # currently only ifgsm, mim, and madry attacks are available
    attack_switch = {'fgsm':False, 'ifgsm':True, 'deepfool':False, 'mim':True, 'spsa':False, 'cwl2':False, 'madry':True, 'stm':False}
    
    # wrap the inference
    ch_model_probs = CustomCallableModelWrapper(callable_fn=inference_test_output_probs, output_layer='probs', 
                                                adv_noise=adv_noise, keep_prob=keep_prob, pre_define_vars=pre_define_vars, 
                                                resnet_params=resnet_params, train_params=train_params)
    
    # to save the reference to the attack tensors
    attack_tensor_training_dict = {}
    attack_tensor_testing_dict = {}

    # placeholder for eps parameter
    mu_alpha = tf.placeholder(tf.float32, [1])
      
    # Iterative FGSM (BasicIterativeMethod/ProjectedGradientMethod with no random init)
    # place on specific GPU
    with tf.device('/gpu:{}'.format(AUX_GPU_IDX[0])):
      print('ifgsm GPU placement')
      print('/gpu:{}'.format(AUX_GPU_IDX[0]))
      if attack_switch['ifgsm']:
          print('creating attack tensor of BasicIterativeMethod')
          ifgsm_obj = BasicIterativeMethod(model=ch_model_probs, sess=sess)
          attack_tensor_training_dict['ifgsm'] = ifgsm_obj.generate(x=x_attacks[0], eps=mu_alpha, eps_iter=mu_alpha/train_params.iter_step_training, nb_iter=train_params.iter_step_training, clip_min=-1.0, clip_max=1.0)
          attack_tensor_testing_dict['ifgsm'] = ifgsm_obj.generate(x=x_sb, eps=mu_alpha, eps_iter=mu_alpha/train_params.iter_step_testing, nb_iter=train_params.iter_step_testing, clip_min=-1.0, clip_max=1.0)

    # MomentumIterativeMethod
    # place on specific GPU
    with tf.device('/gpu:{}'.format(AUX_GPU_IDX[1])):
      print('mim GPU placement')
      print('/gpu:{}'.format(AUX_GPU_IDX[1]))
      if attack_switch['mim']:
          print('creating attack tensor of MomentumIterativeMethod')
          mim_obj = MomentumIterativeMethod(model=ch_model_probs, sess=sess)
          attack_tensor_training_dict['mim'] = mim_obj.generate(x=x_attacks[1], eps=mu_alpha, eps_iter=mu_alpha/train_params.iter_step_training, nb_iter=train_params.iter_step_training, decay_factor=1.0, clip_min=-1.0, clip_max=1.0)
          attack_tensor_testing_dict['mim'] = mim_obj.generate(x=x_sb, eps=mu_alpha, eps_iter=mu_alpha/train_params.iter_step_testing, nb_iter=train_params.iter_step_testing, decay_factor=1.0, clip_min=-1.0, clip_max=1.0)
      
    # MadryEtAl (Projected Grdient with random init, same as rand+fgsm)
    # place on specific GPU
    with tf.device('/gpu:{}'.format(AUX_GPU_IDX[2])):
      print('madry GPU placement')
      print('/gpu:{}'.format(AUX_GPU_IDX[2]))
      if attack_switch['madry']:
          print('creating attack tensor of MadryEtAl')
          madry_obj = MadryEtAl(model=ch_model_probs, sess=sess)
          attack_tensor_training_dict['madry'] = madry_obj.generate(x=x_attacks[2], eps=mu_alpha, eps_iter=mu_alpha/train_params.iter_step_training, nb_iter=train_params.iter_step_training, clip_min=-1.0, clip_max=1.0)
          attack_tensor_testing_dict['madry'] = madry_obj.generate(x=x_sb, eps=mu_alpha, eps_iter=mu_alpha/train_params.iter_step_testing, nb_iter=train_params.iter_step_testing, clip_min=-1.0, clip_max=1.0)

    # combine the tensors
    adv_tensors_concat = tf.concat([attack_tensor_training_dict[x] for x in train_params.attacks], axis=0)
    #######################################

    # init op
    print('initialize_all_variables')
    init = tf.initialize_all_variables()
    sess.run(init)

    # load pretrained variables of RESNET
    if train_params.load_weights:
      # first we need to load variable name convert table
      tgt_var_name_dict = {}
      with open(train_params.weight_table_path, 'r', encoding='utf-8') as inf:
        lines = inf.readlines()
        for line in lines:
          var_names = line.strip().split(' ')
          if var_names[1] == 'NONE':
            continue
          else:
            tgt_var_name_dict[var_names[0]] = var_names[1]

      # load variables dict from checkpoint
      pretrained_var_dict = load_pretrained_vars()

      # load pre-trained vars using name convert table
      for var in tf.global_variables():
        if var.name in tgt_var_name_dict:
          # print('var \"{}\" found'.format(var.name))
          try:
            var.load(pretrained_var_dict[tgt_var_name_dict[var.name]], session=sess)
            print('{} loaded'.format(var.name))
          except:
            print('var {} not loaded since shape changed'.format(var.name))
        else:
          if 'Adam' not in var.name:
            print('var \"{}\" NOT FOUND'.format(var.name))
    else:
      print('Training model from scratch')


    #####################################
    # init noise and save for testing
    perturbH_test = np.random.laplace(0.0, 0, train_params.enc_h_size*train_params.enc_h_size*train_params.enc_filters)
    perturbH_test = np.reshape(perturbH_test, [-1, train_params.enc_h_size, train_params.enc_h_size, train_params.enc_filters])
    params_to_save['perturbH_test'] = perturbH_test
    
    perturbFM_h = np.random.laplace(0.0, 2*train_params.Delta2/(epsilon2_update*train_params.effective_batch_size), 
                                        train_params.enc_h_size*train_params.enc_h_size*train_params.enc_filters)
    perturbFM_h = np.reshape(perturbFM_h, [-1, train_params.enc_h_size, train_params.enc_h_size, train_params.enc_filters])
    params_to_save['perturbFM_h'] = perturbFM_h

    Noise = generateIdLMNoise(train_params.image_size, train_params.Delta2, epsilon2_update, train_params.effective_batch_size)
    params_to_save['Noise'] = Noise

    Noise_test = generateIdLMNoise(train_params.image_size, 0, epsilon2_update, train_params.effective_batch_size)
    params_to_save['Noise_test'] = Noise_test

    # save params for testing
    with open(os.getcwd() + train_params.params_save_path, 'wb') as outf:
      pickle.dump(params_to_save, outf)
      print('params saved')

    ####################################
    print('start pretrain')
    start_time = time.time()
    lr_schedule_list = sorted(train_params.lr_schedule_pretrain.keys())
    attacks_and_benign = train_params.attacks + ['benign']
    # build zeros numpy arrays for accumulate grads
    accumu_pretrain_grads = [np.zeros(g_shape, dtype=np.float32) for g_shape in pretrain_grads_shapes]
    total_pretrain_loss_value = 0.0
    step = 0
    # pretrain loop
    while True:
      # if enough steps, break
      if step > train_params.pretrain_steps:
        break
      # add steps here so not forgot
      else:
        step += 1

      # manual schedule learning rate
      current_epoch = step // (train_params.epoch_steps)
      current_lr = train_params.lr_schedule_pretrain[get_lr(current_epoch, lr_schedule_list)]

      # benign and adv batch
      super_batch = TIN_data.train.next_super_batch(N_GPUS, ensemble=False, random=True)
      adv_super_batch = TIN_data.train.next_super_batch(N_GPUS, ensemble=False, random=True)

      # get pretrain grads
      pretrain_grads_save_np, _pretain_loss_value = sess.run([pretrain_grads_save, total_pretrain_loss], feed_dict={x_sb: super_batch[0], 
                                                                                                                    x_sb_adv: adv_super_batch[0], 
                                                                                                                    learning_rate: current_lr,
                                                                                                                    adv_noise: Noise_test, 
                                                                                                                    noise: Noise, 
                                                                                                                    FM_h: perturbFM_h})
      # accumulate grads
      for i in range(len(accumu_pretrain_grads)):
        accumu_pretrain_grads[i] = accumu_pretrain_grads[i] + pretrain_grads_save_np[i]
      
      # accumulate loss values
      total_pretrain_loss_value = total_pretrain_loss_value + _pretain_loss_value

      # use accumulated gradients to update variables
      if step % train_params.batch_multi == 0 and step > 0:
        # print('effective batch reached at step: {}, epoch: {}'.format(step, step / train_params.epoch_steps))
        # compute the average grads and build the feed dict
        pretrain_feed_dict = {}
        for i in range(len(accumu_pretrain_grads)):
          pretrain_feed_dict[pretrain_grads_placeholders[i]] = accumu_pretrain_grads[i] / train_params.batch_multi
        pretrain_feed_dict[learning_rate] = current_lr

        # run train ops by feeding the gradients
        sess.run(pretrain_op, feed_dict=pretrain_feed_dict)

        # get loss value
        avg_pretrain_loss_value = total_pretrain_loss_value / train_params.batch_multi

        # reset the average grads
        accumu_pretrain_grads = [np.zeros(g_shape, dtype=np.float32) for g_shape in pretrain_grads_shapes]
        total_pretrain_loss_value = 0.0

      # print loss
      if step % (1*train_params.epoch_steps) == 0 and step >= (1*train_params.epoch_steps):
        print('pretrain report at step: {}, epoch: {}'.format(step, step / train_params.epoch_steps))
        dt = time.time() - start_time
        avg_epoch_time = dt / (step / train_params.epoch_steps)
        print('epoch: {:.4f}, avg epoch time: {:.4f}, current_lr: {}'.format(step/train_params.epoch_steps, avg_epoch_time, current_lr), flush=True)
        print('pretrain_loss: {:.6f}'.format(avg_pretrain_loss_value))

    ####################################
    print('start train')
    start_time = time.time()
    lr_schedule_list = sorted(train_params.lr_schedule.keys())
    # train whole model
    # build zeros numpy arrays for accumulate grads
    accumu_pretrain_grads = [np.zeros(g_shape, dtype=np.float32) for g_shape in pretrain_grads_shapes]
    accumu_train_grads = [np.zeros(g_shape, dtype=np.float32) for g_shape in train_grads_shapes]
    total_pretrain_loss_value = 0.0
    total_train_loss_value = 0.0
    step = 0
    # train loop
    while True:
      # if enough steps, break
      if step > train_params.train_steps:
        break
      # add steps here so not forgot
      else:
        step += 1

      # compute the grads every step
      # random eps value for trianing
      d_eps = random.random()*train_params.random_eps_range

      # manual schedule learning rate
      current_epoch = step // (train_params.epoch_steps)
      current_lr = train_params.lr_schedule[get_lr(current_epoch, lr_schedule_list)]
      
      # benign and adv batch
      super_batch = TIN_data.train.next_super_batch(N_GPUS, ensemble=False, random=True)
      adv_super_batch = TIN_data.train.next_super_batch(N_GPUS, ensemble=False, random=True)

      # create adv samples
      super_batch_adv_images = sess.run(adv_tensors_concat, 
                                        feed_dict={x_sb:adv_super_batch[0], keep_prob:1.0,
                                                    adv_noise: Noise, mu_alpha:[d_eps]})   

      # get pretrain and train grads
      pretrain_grads_save_np, _pretain_loss_value = sess.run([pretrain_grads_save, total_pretrain_loss], feed_dict={x_sb: super_batch[0], 
                                                                                                                    x_sb_adv: super_batch_adv_images, 
                                                                                                                    learning_rate: current_lr,
                                                                                                                    adv_noise: Noise_test, 
                                                                                                                    noise: Noise, 
                                                                                                                    FM_h: perturbFM_h})
      train_grads_save_np, _train_loss_value = sess.run([train_grads_save, total_loss], feed_dict = {x_sb: super_batch[0], y_sb: super_batch[1],
                                                                  x_sb_adv: super_batch_adv_images, y_sb_adv: adv_super_batch[1],
                                                                  keep_prob: train_params.keep_prob, learning_rate: current_lr,
                                                                  noise: Noise, adv_noise: Noise_test, FM_h: perturbFM_h})

      # accumulate grads
      for i in range(len(accumu_pretrain_grads)):
        accumu_pretrain_grads[i] = accumu_pretrain_grads[i] + pretrain_grads_save_np[i]

      for i in range(len(accumu_train_grads)):
        accumu_train_grads[i] = accumu_train_grads[i] + train_grads_save_np[i]

      # accumulate loss values
      total_pretrain_loss_value = total_pretrain_loss_value + _pretain_loss_value
      total_train_loss_value = total_train_loss_value + _train_loss_value
      
      # use accumulated gradients to update variables
      if step % train_params.batch_multi == 0 and step > 0:
        # compute the average grads and build the feed dict
        pretrain_feed_dict = {}
        for i in range(len(accumu_pretrain_grads)):
          pretrain_feed_dict[pretrain_grads_placeholders[i]] = accumu_pretrain_grads[i] / train_params.batch_multi
        pretrain_feed_dict[learning_rate] = current_lr
        # pretrain_feed_dict[keep_prob] = 0.5

        train_feed_dict = {}
        for i in range(len(accumu_train_grads)):
          train_feed_dict[train_grads_placeholders[i]] = accumu_train_grads[i] / train_params.batch_multi
        train_feed_dict[learning_rate] = current_lr
        # train_feed_dict[keep_prob] = 0.5

        # run train ops
        sess.run(pretrain_op, feed_dict=pretrain_feed_dict)
        sess.run(train_op, feed_dict=train_feed_dict)

        # get loss value
        avg_pretrain_loss_value = total_pretrain_loss_value / train_params.batch_multi
        avg_train_loss_value = total_train_loss_value / train_params.batch_multi

        # reset the average grads
        accumu_pretrain_grads = [np.zeros(g_shape, dtype=np.float32) for g_shape in pretrain_grads_shapes]
        accumu_train_grads = [np.zeros(g_shape, dtype=np.float32) for g_shape in train_grads_shapes]
        total_pretrain_loss_value = 0.0
        total_train_loss_value = 0.0

      # print status every epoch
      if step % int(train_params.epoch_steps) == 0:
        dt = time.time() - start_time
        avg_epoch_time = dt / (step / train_params.epoch_steps)
        print('epoch: {:.4f}, avg epoch time: {:.4f}s, current_lr: {}'.format(step/train_params.epoch_steps, avg_epoch_time, current_lr), flush=True)

      # save model
      if step % int(train_params.epoch_steps) == 0 and int(step / train_params.epoch_steps) in train_params.epochs_to_save:
        print('saving model at epoch {}'.format(step / train_params.epoch_steps))
        checkpoint_path = os.path.join(os.getcwd() + train_params.check_point_dir, 'model.ckpt')
        saver.save(sess, checkpoint_path, global_step=step)
        
      # testing during training
      if step % int(train_params.epoch_steps) == 0 and int(step / train_params.epoch_steps) in train_params.epochs_to_test:
        test_start = time.time()
        print('train test reported at step: {}, epoch: {}'.format(step, step / train_params.epoch_steps))
        dt = time.time() - start_time
        avg_epoch_time = dt / (step / train_params.epoch_steps)
        print('epoch: {:.4f}, avg epoch time: {:.4f}s, current_lr: {}'.format(step/train_params.epoch_steps, avg_epoch_time, current_lr), flush=True)
        print('pretrain_loss: {:.6f}, train_loss: {:.6f}'.format(avg_pretrain_loss_value, avg_train_loss_value))
        # print('output layer: \n\t{}'.format(output_layer_value))

        #===================adv samples=====================
        adv_acc_dict = {}
        robust_adv_acc_dict = {}
        robust_adv_utility_dict = {}
        log_str = ''
        # cover all test data
        for i in range(train_params.test_epochs):
          test_batch = TIN_data.test.next_batch(train_params.test_batch_size)
          # if more GPUs available, generate testing adv samples at once
          if N_AUX_GPUS > 1:
            adv_images_dict = sess.run(attack_tensor_testing_dict, feed_dict ={x_sb: test_batch[0], 
                                                                               adv_noise: Noise_test, 
                                                                               mu_alpha: [train_params.fgsm_eps],
                                                                               keep_prob: 1.0})
          else:
            adv_images_dict = {}
          # test for each attack
          for atk in attacks_and_benign:
            if atk not in adv_acc_dict:
              adv_acc_dict[atk] = 0.0
              robust_adv_acc_dict[atk] = 0.0
              robust_adv_utility_dict[atk] = 0.0
            if atk == 'benign':
              testing_img = test_batch[0]
            elif attack_switch[atk]:
              # if only one gpu available, generate adv samples in-place
              if atk not in adv_images_dict:
                adv_images_dict[atk] = sess.run(attack_tensor_testing_dict[atk], feed_dict ={x_sb:test_batch[0], 
                                                                                             adv_noise: Noise_test, 
                                                                                             mu_alpha:[train_params.fgsm_eps],
                                                                                             keep_prob: 1.0})
              testing_img = adv_images_dict[atk]
            else:
              continue
            ### PixelDP Robustness ###
            predictions_form_argmax = np.zeros([train_params.test_batch_size, train_params.num_classes])
            softmax_predictions = sess.run(y_softmax_test_concat, feed_dict={x_sb: testing_img, noise: Noise, FM_h: perturbFM_h, keep_prob: 1.0})
            argmax_predictions = np.argmax(softmax_predictions, axis=1)
            for n_draws in range(0, train_params.num_samples):
              _BenignLNoise = generateIdLMNoise(train_params.image_size, train_params.Delta2, epsilon2_update, train_params.effective_batch_size)
              _perturbFM_h = np.random.laplace(0.0, 2*train_params.Delta2/(epsilon2_update*train_params.effective_batch_size), 
                                              train_params.enc_h_size*train_params.enc_h_size*train_params.enc_filters)
              _perturbFM_h = np.reshape(_perturbFM_h, [-1, train_params.enc_h_size, train_params.enc_h_size, train_params.enc_filters])
              for j in range(train_params.test_batch_size):
                pred = argmax_predictions[j]
                predictions_form_argmax[j, pred] += 1
              softmax_predictions = sess.run(y_softmax_test_concat, feed_dict={x_sb: testing_img, noise: (_BenignLNoise/10 + Noise), FM_h: perturbFM_h, keep_prob: 1.0}) * \
                sess.run(y_softmax_test_concat, feed_dict={x_sb: testing_img, noise: Noise, FM_h: (_perturbFM_h/10 + perturbFM_h), keep_prob: 1.0})
              argmax_predictions = np.argmax(softmax_predictions, axis=1)
            final_predictions = predictions_form_argmax
            is_correct = []
            is_robust = []
            for j in range(train_params.test_batch_size):
              is_correct.append(np.argmax(test_batch[1][j]) == np.argmax(final_predictions[j]))
              robustness_from_argmax = robustness.robustness_size_argmax(counts=predictions_form_argmax[j],
                                                                        eta=0.05, dp_attack_size=train_params.fgsm_eps, 
                                                                        dp_epsilon=train_params.dp_epsilon, dp_delta=0.05, 
                                                                        dp_mechanism='laplace') / dp_mult
              is_robust.append(robustness_from_argmax >= train_params.fgsm_eps)
            adv_acc_dict[atk] += np.sum(is_correct)*1.0/train_params.test_batch_size
            robust_adv_acc_dict[atk] += np.sum([a and b for a,b in zip(is_robust, is_correct)])*1.0/np.sum(is_robust)
            robust_adv_utility_dict[atk] += np.sum(is_robust)*1.0/train_params.test_batch_size
        ##############################
        # average all acc for whole test data
        for atk in attacks_and_benign:
          adv_acc_dict[atk] = adv_acc_dict[atk] / train_params.test_epochs
          robust_adv_acc_dict[atk] = robust_adv_acc_dict[atk] / train_params.test_epochs
          robust_adv_utility_dict[atk] = robust_adv_utility_dict[atk] / train_params.test_epochs
          # added robust prediction
          log_str += " {}: {:.6f} {:.6f} {:.6f} {:.6f}\n".format(atk, adv_acc_dict[atk], robust_adv_acc_dict[atk], robust_adv_utility_dict[atk], robust_adv_acc_dict[atk] * robust_adv_utility_dict[atk])
        dt = time.time() - test_start
        print('testing time: {}'.format(dt))
        print(log_str, flush=True)
        print('*******************')
示例#21
0
def train(cifar10_data, logfile):
    """Train CIFAR-10 for a number of steps."""
    logfile.write("fgsm_eps \t %g, epsilon \t %d \n" %
                  (fgsm_eps, target_eps[0]))
    with tf.Graph().as_default():
        global_step = tf.Variable(0, trainable=False)

        # Parameters Declarification
        #with tf.variable_scope('conv1') as scope:
        kernel1 = _variable_with_weight_decay(
            'kernel1',
            shape=[3, 3, 3, 128],
            stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2),
            wd=0.0)
        biases1 = cifar10._variable_on_cpu('biases1', [128],
                                           tf.constant_initializer(0.0))
        #with tf.variable_scope('conv2') as scope:
        kernel2 = _variable_with_weight_decay(
            'kernel2',
            shape=[5, 5, 128, 128],
            stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2),
            wd=0.0)
        biases2 = cifar10._variable_on_cpu('biases2', [128],
                                           tf.constant_initializer(0.1))
        #with tf.variable_scope('conv3') as scope:
        kernel3 = _variable_with_weight_decay(
            'kernel3',
            shape=[5, 5, 256, 256],
            stddev=np.sqrt(2.0 / (5 * 5 * 256)) / math.ceil(5 / 2),
            wd=0.0)
        biases3 = cifar10._variable_on_cpu('biases3', [256],
                                           tf.constant_initializer(0.1))
        #with tf.variable_scope('local4') as scope:
        kernel4 = cifar10._variable_with_weight_decay(
            'kernel4',
            shape=[int(image_size / 4)**2 * 256, hk],
            stddev=0.04,
            wd=0.004)
        biases4 = cifar10._variable_on_cpu('biases4', [hk],
                                           tf.constant_initializer(0.1))
        #with tf.variable_scope('local5') as scope:
        kernel5 = cifar10._variable_with_weight_decay(
            'kernel5', [hk, 10],
            stddev=np.sqrt(2.0 /
                           (int(image_size / 4)**2 * 256)) / math.ceil(5 / 2),
            wd=0.0)
        biases5 = cifar10._variable_on_cpu('biases5', [10],
                                           tf.constant_initializer(0.1))

        scale2 = tf.Variable(tf.ones([hk]))
        beta2 = tf.Variable(tf.zeros([hk]))

        params = [
            kernel1, biases1, kernel2, biases2, kernel3, biases3, kernel4,
            biases4, kernel5, biases5, scale2, beta2
        ]
        ########

        # Build a Graph that computes the logits predictions from the
        # inference model.
        shape = kernel1.get_shape().as_list()
        w_t = tf.reshape(kernel1, [-1, shape[-1]])
        w = tf.transpose(w_t)
        sing_vals = tf.svd(w, compute_uv=False)
        sensitivityW = tf.reduce_max(sing_vals)
        dp_delta = 0.05
        #dp_mult = attack_norm_bound * math.sqrt(2 * math.log(1.25 / dp_delta)) / dp_epsilon
        noise = tf.placeholder(tf.float32, [None, 28, 28, 32])

        dp_mult = attack_norm_bound * math.sqrt(
            2 * math.log(1.25 / dp_delta)) / dp_epsilon
        noise = tf.placeholder(tf.float32, [None, 14, 14, 128])
        sigma = tf.placeholder(tf.float32)
        x = tf.placeholder(tf.float32, [None, image_size, image_size, 3])
        #y_conv, h_conv1 = inference(x, params, dp_mult**2 * noise);
        y_conv, h_conv1 = inference(x, params, attack_norm_bound * noise)
        softmax_y_conv = tf.nn.softmax(y_conv)
        y_ = tf.placeholder(tf.float32, [None, 10])

        #logits = inference(images)

        # Calculate loss. Apply Taylor Expansion for the output layer
        loss = cifar10.lossDPSGD(y_conv, y_)

        # noise redistribution #
        grad, = tf.gradients(loss, h_conv1)
        normalized_grad = tf.sign(grad)
        normalized_grad = tf.stop_gradient(normalized_grad)
        normalized_grad_r = tf.abs(tf.reduce_mean(normalized_grad,
                                                  axis=(0)))**2
        sum_r = tf.reduce_sum(normalized_grad_r,
                              axis=(0, 1, 2),
                              keepdims=False)
        normalized_grad_r = 14 * 14 * 128 * normalized_grad_r / sum_r
        print(normalized_grad_r)

        shape_grad = normalized_grad_r.get_shape().as_list()
        grad_t = tf.reshape(normalized_grad_r, [-1, shape_grad[-1]])
        g = tf.transpose(grad_t)
        sing_g_vals = tf.svd(g, compute_uv=False)
        sensitivity_2 = tf.reduce_max(sing_g_vals)
        ########################

        opt = tf.train.GradientDescentOptimizer(lr)

        gw_K1 = tf.gradients(loss, kernel1)[0]
        gb1 = tf.gradients(loss, biases1)[0]

        gw_K2 = tf.gradients(loss, kernel2)[0]
        gb2 = tf.gradients(loss, biases2)[0]

        gw_K3 = tf.gradients(loss, kernel3)[0]
        gb3 = tf.gradients(loss, biases3)[0]

        gw_K4 = tf.gradients(loss, kernel4)[0]
        gb4 = tf.gradients(loss, biases4)[0]

        gw_K5 = tf.gradients(loss, kernel5)[0]
        gb5 = tf.gradients(loss, biases5)[0]

        #clip gradient
        gw_K1 = tf.clip_by_norm(gw_K1, clip_bound)
        gw_K2 = tf.clip_by_norm(gw_K2, clip_bound)
        gw_K3 = tf.clip_by_norm(gw_K3, clip_bound)
        gw_K4 = tf.clip_by_norm(gw_K4, clip_bound)
        gw_K5 = tf.clip_by_norm(gw_K5, clip_bound)

        #perturb
        gw_K1 += tf.random_normal(shape=tf.shape(gw_K1),
                                  mean=0.0,
                                  stddev=(sigma * sensitivity),
                                  dtype=tf.float32) / batch_size
        gw_K2 += tf.random_normal(shape=tf.shape(gw_K2),
                                  mean=0.0,
                                  stddev=(sigma * sensitivity),
                                  dtype=tf.float32) / batch_size
        gw_K3 += tf.random_normal(shape=tf.shape(gw_K3),
                                  mean=0.0,
                                  stddev=(sigma * sensitivity),
                                  dtype=tf.float32) / batch_size
        gw_K4 += tf.random_normal(shape=tf.shape(gw_K4),
                                  mean=0.0,
                                  stddev=(sigma * sensitivity),
                                  dtype=tf.float32) / batch_size
        gw_K5 += tf.random_normal(shape=tf.shape(gw_K5),
                                  mean=0.0,
                                  stddev=(sigma * sensitivity),
                                  dtype=tf.float32) / batch_size
        gb1 += tf.random_normal(shape=tf.shape(gb1),
                                mean=0.0,
                                stddev=(sigma * sensitivity),
                                dtype=tf.float32) / batch_size
        gb2 += tf.random_normal(shape=tf.shape(gb2),
                                mean=0.0,
                                stddev=(sigma * sensitivity),
                                dtype=tf.float32) / batch_size
        gb3 += tf.random_normal(shape=tf.shape(gb3),
                                mean=0.0,
                                stddev=(sigma * sensitivity),
                                dtype=tf.float32) / batch_size
        gb4 += tf.random_normal(shape=tf.shape(gb4),
                                mean=0.0,
                                stddev=(sigma * sensitivity),
                                dtype=tf.float32) / batch_size
        gb5 += tf.random_normal(shape=tf.shape(gb5),
                                mean=0.0,
                                stddev=(sigma * sensitivity),
                                dtype=tf.float32) / batch_size

        # apply gradients and keep tracking moving average of the parameters
        apply_gradient_op = opt.apply_gradients([(gw_K1, kernel1),
                                                 (gb1, biases1),
                                                 (gw_K2, kernel2),
                                                 (gb2, biases2),
                                                 (gw_K3, kernel3),
                                                 (gb3, biases3),
                                                 (gw_K4, kernel4),
                                                 (gb4, biases4),
                                                 (gw_K5, kernel5),
                                                 (gb5, biases5)],
                                                global_step=global_step)
        variable_averages = tf.train.ExponentialMovingAverage(
            MOVING_AVERAGE_DECAY, global_step)
        variables_averages_op = variable_averages.apply(
            tf.trainable_variables())
        with tf.control_dependencies(
            [apply_gradient_op, variables_averages_op]):
            train_op = tf.no_op(name='train')

        # Build a Graph that trains the model with one batch of examples and
        # updates the model parameters.
        #train_op = cifar10.trainDPSGD(loss, global_step, clip_bound, sigma, sensitivity)

        sess = tf.Session(config=tf.ConfigProto(log_device_placement=False))

        attack_switch = {
            'fgsm': True,
            'ifgsm': True,
            'deepfool': False,
            'mim': True,
            'spsa': False,
            'cwl2': False,
            'madry': True,
            'stm': False
        }

        ch_model_probs = CustomCallableModelWrapper(
            callable_fn=inference_test_input_probs,
            output_layer='probs',
            params=params,
            image_size=image_size)

        # define each attack method's tensor
        attack_tensor_dict = {}
        # FastGradientMethod
        if attack_switch['fgsm']:
            print('creating attack tensor of FastGradientMethod')
            fgsm_obj = FastGradientMethod(model=ch_model_probs, sess=sess)
            #x_adv_test_fgsm = fgsm_obj.generate(x=x, eps=fgsm_eps, clip_min=-1.0, clip_max=1.0, ord=2) # testing now
            x_adv_test_fgsm = fgsm_obj.generate(x=x,
                                                eps=fgsm_eps,
                                                clip_min=-1.0,
                                                clip_max=1.0)  # testing now
            attack_tensor_dict['fgsm'] = x_adv_test_fgsm

        # Iterative FGSM (BasicIterativeMethod/ProjectedGradientMethod with no random init)
        # default: eps_iter=0.05, nb_iter=10
        if attack_switch['ifgsm']:
            print('creating attack tensor of BasicIterativeMethod')
            ifgsm_obj = BasicIterativeMethod(model=ch_model_probs, sess=sess)
            #x_adv_test_ifgsm = ifgsm_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, clip_min=-1.0, clip_max=1.0, ord=2)
            x_adv_test_ifgsm = ifgsm_obj.generate(x=x,
                                                  eps=fgsm_eps,
                                                  eps_iter=fgsm_eps / 3,
                                                  nb_iter=3,
                                                  clip_min=-1.0,
                                                  clip_max=1.0)
            attack_tensor_dict['ifgsm'] = x_adv_test_ifgsm

        # MomentumIterativeMethod
        # default: eps_iter=0.06, nb_iter=10
        if attack_switch['mim']:
            print('creating attack tensor of MomentumIterativeMethod')
            mim_obj = MomentumIterativeMethod(model=ch_model_probs, sess=sess)
            #x_adv_test_mim = mim_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, decay_factor=1.0, clip_min=-1.0, clip_max=1.0, ord=2)
            x_adv_test_mim = mim_obj.generate(x=x,
                                              eps=fgsm_eps,
                                              eps_iter=fgsm_eps / 3,
                                              nb_iter=3,
                                              decay_factor=1.0,
                                              clip_min=-1.0,
                                              clip_max=1.0)
            attack_tensor_dict['mim'] = x_adv_test_mim

        # MadryEtAl (Projected Grdient with random init, same as rand+fgsm)
        # default: eps_iter=0.01, nb_iter=40
        if attack_switch['madry']:
            print('creating attack tensor of MadryEtAl')
            madry_obj = MadryEtAl(model=ch_model_probs, sess=sess)
            #x_adv_test_madry = madry_obj.generate(x=x, eps=fgsm_eps, eps_iter=fgsm_eps/10, nb_iter=10, clip_min=-1.0, clip_max=1.0, ord=2)
            x_adv_test_madry = madry_obj.generate(x=x,
                                                  eps=fgsm_eps,
                                                  eps_iter=fgsm_eps / 3,
                                                  nb_iter=3,
                                                  clip_min=-1.0,
                                                  clip_max=1.0)
            attack_tensor_dict['madry'] = x_adv_test_madry
        #====================== attack =========================

        correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(y_, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

        # Create a saver.
        saver = tf.train.Saver(tf.all_variables())

        # Privacy accountant
        priv_accountant = accountant.GaussianMomentsAccountant(D)
        privacy_accum_op = priv_accountant.accumulate_privacy_spending(
            [None, None], sigma, batch_size)

        # Build the summary operation based on the TF collection of Summaries.
        #summary_op = tf.summary.merge_all()

        # Build an initialization operation to run below.
        init = tf.initialize_all_variables()

        # Start running operations on the Graph.
        sess.run(init)

        # Start the queue runners.
        tf.train.start_queue_runners(sess=sess)

        summary_writer = tf.summary.FileWriter(os.getcwd() + path, sess.graph)

        # load the most recent models
        _global_step = 0
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
        if ckpt and ckpt.model_checkpoint_path:
            print(ckpt.model_checkpoint_path)
            saver.restore(sess, ckpt.model_checkpoint_path)
            _global_step = int(
                ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1])
        else:
            print('No checkpoint file found')

        T = int(int(math.ceil(D / batch_size)) * epochs + 1)  # number of steps
        step_for_epoch = int(math.ceil(D / batch_size))
        #number of steps for one epoch

        s = math.log(sqrt(2.0 / math.pi) * 1e+5)
        sigmaEGM = sqrt(2.0) * 1.0 * (sqrt(s) + sqrt(s + dp_epsilon)) / (
            2.0 * dp_epsilon)
        #print(sigmaEGM)
        __noiseE = np.random.normal(0.0, sigmaEGM,
                                    14 * 14 * 128).astype(np.float32)
        __noiseE = np.reshape(__noiseE, [-1, 14, 14, 128])
        print("Compute The Noise Redistribution Vector")
        for step in xrange(_global_step, 100 * step_for_epoch):
            batch = cifar10_data.train.next_batch(batch_size)
            #Get a random batch.
            _, loss_value = sess.run(
                [train_op, loss],
                feed_dict={
                    x: batch[0],
                    y_: batch[1],
                    noise: __noiseE * 0,
                    sigma: sigma_value * 0
                })
            if step % (5 * step_for_epoch) == 0:
                print(loss_value)
        batch = cifar10_data.train.next_batch(40 * batch_size)
        grad_redis = sess.run([normalized_grad_r],
                              feed_dict={
                                  x: batch[0],
                                  y_: batch[1],
                                  noise: __noiseE * 0
                              })
        _sensitivity_2 = sess.run([sensitivity_2],
                                  feed_dict={
                                      x: batch[0],
                                      y_: batch[1],
                                      noise: __noiseE * 0
                                  })
        #print(_sensitivity_2)

        _sensitivityW = sess.run(sensitivityW)
        #print(_sensitivityW)
        Delta_redis = _sensitivityW / sqrt(_sensitivity_2[0])
        #print(Delta_redis)
        sigmaHGM = sqrt(2.0) * Delta_redis * (
            sqrt(s) + sqrt(s + dp_epsilon)) / (2.0 * dp_epsilon)
        #print(sigmaHGM)
        __noiseH = np.random.normal(0.0, sigmaHGM,
                                    14 * 14 * 128).astype(np.float32)
        __noiseH = np.reshape(__noiseH, [-1, 14, 14, 128]) * grad_redis

        sess.run(init)
        print("Training")
        for step in xrange(_global_step, _global_step + T):
            start_time = time.time()
            batch = cifar10_data.train.next_batch(batch_size)
            #Get a random batch.
            #grad_redis = sess.run([normalized_grad_r], feed_dict = {x: batch[0], y_: batch[1], noise: (__noise + grad_redis)/2})
            _, loss_value = sess.run(
                [train_op, loss],
                feed_dict={
                    x: batch[0],
                    y_: batch[1],
                    noise: (__noiseE + __noiseH) / 2,
                    sigma: sigma_value
                })
            duration = time.time() - start_time

            assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

            sess.run([privacy_accum_op])
            spent_eps_deltas = priv_accountant.get_privacy_spent(
                sess, target_eps=target_eps)
            if step % (5 * step_for_epoch) == 0:
                print(loss_value)
                print(spent_eps_deltas)
            _break = False
            for _eps, _delta in spent_eps_deltas:
                if _delta >= delta:
                    _break = True
                    break
            if _break == True:
                break

        ## Robustness
        print("Testing")
        adv_acc_dict = {}
        robust_adv_acc_dict = {}
        robust_adv_utility_dict = {}
        test_bach_size = 5000
        for atk in attack_switch.keys():
            if atk not in adv_acc_dict:
                adv_acc_dict[atk] = -1
                robust_adv_acc_dict[atk] = -1
                robust_adv_utility_dict[atk] = -1
            if attack_switch[atk]:
                test_bach = cifar10_data.test.next_batch(test_bach_size)
                adv_images_dict = sess.run(attack_tensor_dict[atk],
                                           feed_dict={x: test_bach[0]})
                ### PixelDP Robustness ###
                predictions_form_argmax = np.zeros([test_bach_size, 10])
                softmax_predictions = sess.run(softmax_y_conv,
                                               feed_dict={
                                                   x: adv_images_dict,
                                                   noise:
                                                   (__noiseE + __noiseH) / 2
                                               })
                argmax_predictions = np.argmax(softmax_predictions, axis=1)
                for n_draws in range(0, 1000):
                    _noiseE = np.random.normal(0.0, sigmaEGM, 14 * 14 *
                                               128).astype(np.float32)
                    _noiseE = np.reshape(_noiseE, [-1, 14, 14, 128])
                    _noise = np.random.normal(0.0, sigmaHGM,
                                              14 * 14 * 128).astype(np.float32)
                    _noise = np.reshape(_noise, [-1, 14, 14, 128]) * grad_redis
                    for j in range(test_bach_size):
                        pred = argmax_predictions[j]
                        predictions_form_argmax[j, pred] += 1
                    softmax_predictions = sess.run(
                        softmax_y_conv,
                        feed_dict={
                            x:
                            adv_images_dict,
                            noise:
                            (__noiseE + __noiseH) / 2 + (_noiseE + _noise) / 4
                        })
                    argmax_predictions = np.argmax(softmax_predictions, axis=1)
                final_predictions = predictions_form_argmax
                is_correct = []
                is_robust = []
                for j in range(test_bach_size):
                    is_correct.append(
                        np.argmax(test_bach[1][j]) == np.argmax(
                            final_predictions[j]))
                    robustness_from_argmax = robustnessGGaussian.robustness_size_argmax(
                        counts=predictions_form_argmax[j],
                        eta=0.05,
                        dp_attack_size=fgsm_eps,
                        dp_epsilon=dp_epsilon,
                        dp_delta=0.05,
                        dp_mechanism='gaussian') / dp_mult
                    is_robust.append(robustness_from_argmax >= fgsm_eps)
                adv_acc_dict[atk] = np.sum(is_correct) * 1.0 / test_bach_size
                robust_adv_acc_dict[atk] = np.sum([
                    a and b for a, b in zip(is_robust, is_correct)
                ]) * 1.0 / np.sum(is_robust)
                robust_adv_utility_dict[atk] = np.sum(
                    is_robust) * 1.0 / test_bach_size
                ##############################
        log_str = ""
        for atk in attack_switch.keys():
            if attack_switch[atk]:
                # added robust prediction
                log_str += " {}: {:.4f} {:.4f} {:.4f} {:.4f}".format(
                    atk, adv_acc_dict[atk], robust_adv_acc_dict[atk],
                    robust_adv_utility_dict[atk],
                    robust_adv_acc_dict[atk] * robust_adv_utility_dict[atk])
        print(log_str)
        logfile.write(log_str + '\n')
def SSGD_resnet_testing(TIN_data, resnet_params, train_params, test_params,
                        all_params):
    # dict for encoding layer variables and output layer variables
    pre_define_vars = {}

    # list of variables to train
    train_vars = []

    with tf.Graph().as_default(), tf.device('/cpu:0'):
        global_step = tf.Variable(0, trainable=False)

        # Parameters Declarification
        ######################################

        # encoding (pretrain) layer variables
        with tf.variable_scope('enc_layer', reuse=tf.AUTO_REUSE) as scope:
            kernel1 = tf.get_variable(
                'kernel1',
                shape=[
                    train_params.enc_kernel_size, train_params.enc_kernel_size,
                    3, train_params.enc_filters
                ],
                dtype=tf.float32,
                initializer=tf.contrib.layers.xavier_initializer_conv2d())
            biases1 = tf.get_variable('biases1',
                                      shape=[train_params.enc_filters],
                                      dtype=tf.float32,
                                      initializer=tf.constant_initializer(0.0))
        pre_define_vars['kernel1'] = kernel1
        pre_define_vars['biases1'] = biases1
        train_vars.append(kernel1)
        train_vars.append(biases1)

        dp_mult = all_params['dp_mult']

        # output layer variables
        with tf.variable_scope('fc2', reuse=tf.AUTO_REUSE) as scope:
            stdv = 1.0 / math.sqrt(train_params.hk)
            final_w = tf.get_variable(
                'kernel',
                shape=[train_params.hk, train_params.num_classes],
                dtype=tf.float32,
                initializer=tf.random_uniform_initializer(-stdv, stdv))
            final_b = tf.get_variable('bias',
                                      shape=[train_params.num_classes],
                                      dtype=tf.float32,
                                      initializer=tf.constant_initializer(0.0))
        pre_define_vars['final_w'] = final_w
        pre_define_vars['final_b'] = final_b
        train_vars.append(final_w)
        train_vars.append(final_b)
        ######################################

        # Build a Graph that computes the logits predictions from the inputs
        ######################################
        # input placeholders
        x_sb = tf.placeholder(
            tf.float32,
            [None, train_params.image_size, train_params.image_size, 3],
            name='x_sb')  # input is the bunch of n_batchs
        x_test = tf.placeholder(
            tf.float32,
            [None, train_params.image_size, train_params.image_size, 3],
            name='x_test')

        y_sb = tf.placeholder(
            tf.float32, [None, train_params.num_classes],
            name='y_sb')  # input is the bunch of n_batchs (super batch)
        y_test = tf.placeholder(tf.float32, [None, train_params.num_classes],
                                name='y_test')

        noise = tf.placeholder(tf.float32, [
            None, train_params.enc_h_size, train_params.enc_h_size,
            train_params.enc_filters
        ],
                               name='noise')  # one time

        keep_prob = tf.placeholder(tf.float32, shape=(), name='keep_prob')

        with tf.device('/gpu:0'):
            # the model for testing
            y_logits_test, _ = test_inference(
                x_sb, train_params.attack_norm_bound * noise, keep_prob,
                pre_define_vars, resnet_params, train_params)
            y_softmax_test = tf.nn.softmax(y_logits_test)
        correct_prediction = tf.equal(tf.argmax(y_logits_test, 1),
                                      tf.argmax(y_sb, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

        # print all variables
        print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$')
        all_vars = tf.global_variables()
        print_var_list('all vars', all_vars)
        print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$')

        # add selected vars into list
        # ('res4' in var.name and ('gamma' in var.name or 'beta' in var.name)) or
        for var in tf.global_variables():
            if 'resnet_model' in var.name and \
              ('conv0' in var.name or
              'fc' in var.name or
              'res3' in var.name or
              'res4' in var.name or
              'res1' in var.name or
              'res2' in var.name) and \
                ('gamma' in var.name or
                  'beta' in var.name or
                  'kernel' in var.name or
                  'bias' in var.name):
                if var not in train_vars:
                    train_vars.append(var)
            elif 'enc_layer' in var.name and \
              ('kernel' in var.name or
                'bias' in var.name or
                'gamma' in var.name or
                'beta' in var.name):
                if var not in train_vars:
                    train_vars.append(var)

        print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$')
        print_var_list('train_vars', train_vars)
        print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$')

        ######################################

        # Create a saver.
        saver = tf.train.Saver(var_list=tf.all_variables(), max_to_keep=1000)

        # start a session with memory growth
        config = tf.ConfigProto(log_device_placement=False)
        config.gpu_options.allow_growth = True
        sess = tf.Session(config=config)
        print("session created")

        # list all checkpoints in ckpt_path
        checkpoint_path_read = os.path.join(os.getcwd() +
                                            test_params.check_point_dir)
        ckpts = tf.train.get_checkpoint_state(checkpoint_path_read)
        print(ckpts)
        # find the ckpt we need to load and load it
        for ckpt in ckpts.all_model_checkpoint_paths:
            # print(ckpt)
            ckpt_step = int(ckpt.split('-')[-1])
            if ckpt_step == test_params.step_to_load:
                saver.restore(sess, ckpt)
                print('model loaded from {}'.format(ckpt))

        # #######################################

        # # setup all attacks
        attack_switch = {
            'fgsm': False,
            'ifgsm': True,
            'deepfool': False,
            'mim': True,
            'spsa': False,
            'cwl2': False,
            'madry': True,
            'stm': False
        }

        ch_model_probs = CustomCallableModelWrapper(
            callable_fn=inference_test_output_probs,
            output_layer='probs',
            keep_prob=keep_prob,
            pre_define_vars=pre_define_vars,
            resnet_params=resnet_params,
            train_params=train_params)
        attack_tensor_testing_dict = {}

        # define each attack method's tensor
        mu_alpha = tf.placeholder(tf.float32, [1])

        # Iterative FGSM (BasicIterativeMethod/ProjectedGradientMethod with no random init)
        with tf.device('/gpu:0'):
            if attack_switch['ifgsm']:
                print('creating attack tensor of BasicIterativeMethod')
                ifgsm_obj = BasicIterativeMethod(model=ch_model_probs,
                                                 sess=sess)
                attack_tensor_testing_dict['ifgsm'] = ifgsm_obj.generate(
                    x=x_sb,
                    eps=mu_alpha,
                    eps_iter=mu_alpha / train_params.iter_step_testing,
                    nb_iter=train_params.iter_step_testing,
                    clip_min=-1.0,
                    clip_max=1.0)

        # MomentumIterativeMethod
        with tf.device('/gpu:0'):
            if attack_switch['mim']:
                print('creating attack tensor of MomentumIterativeMethod')
                mim_obj = MomentumIterativeMethod(model=ch_model_probs,
                                                  sess=sess)
                attack_tensor_testing_dict['mim'] = mim_obj.generate(
                    x=x_sb,
                    eps=mu_alpha,
                    eps_iter=mu_alpha / train_params.iter_step_testing,
                    nb_iter=train_params.iter_step_testing,
                    decay_factor=1.0,
                    clip_min=-1.0,
                    clip_max=1.0)

        # MadryEtAl (Projected Grdient with random init, same as rand+fgsm)
        with tf.device('/gpu:0'):
            if attack_switch['madry']:
                print('creating attack tensor of MadryEtAl')
                madry_obj = MadryEtAl(model=ch_model_probs, sess=sess)
                attack_tensor_testing_dict['madry'] = madry_obj.generate(
                    x=x_sb,
                    eps=mu_alpha,
                    eps_iter=mu_alpha / train_params.iter_step_testing,
                    nb_iter=train_params.iter_step_testing,
                    clip_min=-1.0,
                    clip_max=1.0)

        # #######################################

        sigmaEGM = all_params['sigmaEGM']

        __noiseE = all_params['__noiseE']

        grad_redis = all_params['grad_redis']

        _sensitivity_2 = all_params['_sensitivity_2']

        _sensitivityW = all_params['_sensitivityW']

        Delta_redis = all_params['Delta_redis']

        sigmaHGM = all_params['sigmaHGM']

        __noiseH = all_params['__noiseH']

        __noise_zero = all_params['__noise_zero']

        ####################################

        ####################################
        print('start testing')
        start_time = time.time()
        log_file_path = os.getcwd() + test_params.log_file_path
        log_file = open(log_file_path, 'a', encoding='utf-8')
        attacks_and_benign = test_params.attacks + ['benign']
        #===================adv samples=====================
        # for each eps setting
        for fgsm_eps in test_params.fgsm_eps_list:
            adv_acc_dict = {}
            robust_adv_acc_dict = {}
            robust_adv_utility_dict = {}
            log_str = ''
            eps_start_time = time.time()
            # cover all test data
            for i in range(test_params.test_epochs):
                test_batch = TIN_data.test.next_batch(
                    test_params.test_batch_size)
                adv_images_dict = {}
                # test for each attack
                for atk in attacks_and_benign:
                    start_time = time.time()
                    if atk not in adv_acc_dict:
                        adv_acc_dict[atk] = 0.0
                        robust_adv_acc_dict[atk] = 0.0
                        robust_adv_utility_dict[atk] = 0.0
                    if atk == 'benign':
                        testing_img = test_batch[0]
                    elif attack_switch[atk]:
                        # if only one gpu available, generate adv samples in-place
                        if atk not in adv_images_dict:
                            adv_images_dict[atk] = sess.run(
                                attack_tensor_testing_dict[atk],
                                feed_dict={
                                    x_sb: test_batch[0],
                                    mu_alpha: [fgsm_eps],
                                    keep_prob: 1.0
                                })
                        testing_img = adv_images_dict[atk]
                    else:
                        continue
                    print('adv gen time: {}s'.format(time.time() - start_time))
                    start_time = time.time()

                    ### PixelDP Robustness ###
                    predictions_form_argmax = np.zeros([
                        test_params.test_batch_size, train_params.num_classes
                    ])
                    softmax_predictions = sess.run(
                        y_softmax_test,
                        feed_dict={
                            x_sb: testing_img,
                            noise: (__noiseE + __noiseH) / 2,
                            keep_prob: 1.0
                        })
                    argmax_predictions = np.argmax(softmax_predictions, axis=1)
                    for n_draws in range(1, test_params.num_samples + 1):
                        if n_draws % 100 == 0:
                            print(
                                'current draws: {}, avg draw time: {}s'.format(
                                    n_draws,
                                    (time.time() - start_time) / n_draws))
                        _noiseE = np.random.normal(
                            0.0, sigmaEGM**2,
                            train_params.enc_h_size * train_params.enc_h_size *
                            train_params.enc_filters).astype(np.float32)
                        _noiseE = np.reshape(_noiseE, [
                            -1, train_params.enc_h_size,
                            train_params.enc_h_size, train_params.enc_filters
                        ])
                        _noise = np.random.normal(
                            0.0, sigmaHGM**2,
                            train_params.enc_h_size * train_params.enc_h_size *
                            train_params.enc_filters).astype(np.float32)
                        _noise = np.reshape(_noise, [
                            -1, train_params.enc_h_size,
                            train_params.enc_h_size, train_params.enc_filters
                        ]) * grad_redis
                        for j in range(test_params.test_batch_size):
                            pred = argmax_predictions[j]
                            predictions_form_argmax[j, pred] += 1
                        softmax_predictions = sess.run(
                            y_softmax_test,
                            feed_dict={
                                x_sb:
                                testing_img,
                                noise: (__noiseE + __noiseH) / 2 +
                                (_noiseE + _noise) / 4,
                                keep_prob:
                                1.0
                            })
                        argmax_predictions = np.argmax(softmax_predictions,
                                                       axis=1)
                    final_predictions = predictions_form_argmax
                    is_correct = []
                    is_robust = []
                    for j in range(test_params.test_batch_size):
                        is_correct.append(
                            np.argmax(test_batch[1][j]) == np.argmax(
                                final_predictions[j]))
                        robustness_from_argmax = robustnessGGaussian.robustness_size_argmax(
                            counts=predictions_form_argmax[j],
                            eta=0.05,
                            dp_attack_size=fgsm_eps,
                            dp_epsilon=train_params.dp_epsilon,
                            dp_delta=0.05,
                            dp_mechanism='gaussian') / dp_mult
                        is_robust.append(robustness_from_argmax >= fgsm_eps)
                    adv_acc_dict[atk] += np.sum(
                        is_correct) * 1.0 / test_params.test_batch_size
                    robust_adv_acc_dict[atk] += np.sum([
                        a and b for a, b in zip(is_robust, is_correct)
                    ]) * 1.0 / np.sum(is_robust)
                    robust_adv_utility_dict[atk] += np.sum(
                        is_robust) * 1.0 / test_params.test_batch_size

                    dt = time.time() - start_time
                    print('atk test time: {}s'.format(dt), flush=True)
            ##############################
            # average all acc for whole test data
            log_str += datetime.now().strftime("%Y-%m-%d_%H:%M:%S\n")
            log_str += 'model trained epoch: {}\n'.format(
                test_params.epoch_to_test)
            log_str += 'fgsm_eps: {}\n'.format(fgsm_eps)
            log_str += 'iter_step_testing: {}\n'.format(
                test_params.iter_step_testing)
            log_str += 'num_samples: {}\n'.format(test_params.num_samples)
            for atk in attacks_and_benign:
                adv_acc_dict[atk] = adv_acc_dict[atk] / test_params.test_epochs
                robust_adv_acc_dict[
                    atk] = robust_adv_acc_dict[atk] / test_params.test_epochs
                robust_adv_utility_dict[atk] = robust_adv_utility_dict[
                    atk] / test_params.test_epochs
                # added robust prediction
                log_str += " {}: {:.6f} {:.6f} {:.6f} {:.6f}\n".format(
                    atk, adv_acc_dict[atk], robust_adv_acc_dict[atk],
                    robust_adv_utility_dict[atk],
                    robust_adv_acc_dict[atk] * robust_adv_utility_dict[atk])
            dt = time.time() - eps_start_time
            print('total test time: {}s'.format(dt), flush=True)
            print(log_str, flush=True)
            print('*******************')

            log_file.write(log_str)
            log_file.write('*******************\n')
            log_file.flush()

            dt = time.time() - start_time
        log_file.close()
示例#23
0
class TestMadryEtAl(CleverHansTest):
    def setUp(self):
        super(TestMadryEtAl, self).setUp()

        self.sess = tf.Session()
        self.model = SimpleModel()
        self.attack = MadryEtAl(self.model, sess=self.sess)

    def test_attack_strength(self):
        """
        If clipping is not done at each iteration (not using clip_min and
        clip_max), this attack fails by
        np.mean(orig_labels == new_labels) == .5
        """
        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        x_adv = self.attack.generate_np(x_val,
                                        eps=1.0,
                                        eps_iter=0.05,
                                        clip_min=0.5,
                                        clip_max=0.7,
                                        nb_iter=5,
                                        sanity_checks=False)

        orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1)
        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)
        self.assertLess(np.mean(orig_labs == new_labs), 0.1)

    def test_clip_eta(self):
        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        x_adv = self.attack.generate_np(x_val,
                                        eps=1.0,
                                        eps_iter=0.1,
                                        nb_iter=5)

        delta = np.max(np.abs(x_adv - x_val), axis=1)
        self.assertTrue(np.all(delta <= 1.))

    def test_generate_np_gives_clipped_adversarial_examples(self):
        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        x_adv = self.attack.generate_np(x_val,
                                        eps=1.0,
                                        eps_iter=0.1,
                                        nb_iter=5,
                                        clip_min=-0.2,
                                        clip_max=0.3,
                                        sanity_checks=False)

        self.assertLess(-0.201, np.min(x_adv))
        self.assertLess(np.max(x_adv), .301)

    def test_multiple_initial_random_step(self):
        """
        This test generates multiple adversarial examples until an adversarial
        example is generated with a different label compared to the original
        label. This is the procedure suggested in Madry et al. (2017).

        This test will fail if an initial random step is not taken (error>0.5).
        """
        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1)
        new_labs_multi = orig_labs.copy()

        # Generate multiple adversarial examples
        for i in range(10):
            x_adv = self.attack.generate_np(x_val,
                                            eps=.5,
                                            eps_iter=0.05,
                                            clip_min=0.5,
                                            clip_max=0.7,
                                            nb_iter=2,
                                            sanity_checks=False)
            new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)

            # Examples for which we have not found adversarial examples
            I = (orig_labs == new_labs_multi)
            new_labs_multi[I] = new_labs[I]

        self.assertLess(np.mean(orig_labs == new_labs_multi), 0.5)
示例#24
0
def craft_one_type(sess,
                   model,
                   X,
                   Y,
                   dataset,
                   attack,
                   batch_size,
                   log_path=None,
                   fp_path=None,
                   model_logits=None):
    """
    TODO
    :param sess:
    :param model:
    :param X:
    :param Y:
    :param dataset:
    :param attack:
    :param batch_size:
    :return:
    """
    print("entered")
    if not log_path is None:
        PATH_DATA = log_path

    if attack == 'fgsm':
        # FGSM attack
        print('Crafting fgsm adversarial samples...')
        X_adv = fast_gradient_sign_method(sess,
                                          model,
                                          X,
                                          Y,
                                          eps=ATTACK_PARAMS[dataset]['eps'],
                                          clip_min=CLIP_MIN,
                                          clip_max=CLIP_MAX,
                                          batch_size=batch_size)
    elif attack == 'adapt-fgsm':
        # Adaptive FGSM attack
        print('Crafting fgsm adversarial samples...')

        X_adv = adaptive_fast_gradient_sign_method(
            sess,
            model,
            X,
            Y,
            eps=ATTACK_PARAMS[dataset]['eps'],
            clip_min=CLIP_MIN,
            clip_max=CLIP_MAX,
            batch_size=batch_size,
            log_dir=fp_path,
            model_logits=model_logits,
            dataset=dataset)
    elif attack == 'adapt-bim-b':
        # BIM attack
        print('Crafting %s adversarial samples...' % attack)
        X_adv = adaptive_basic_iterative_method(
            sess,
            model,
            X,
            Y,
            eps=ATTACK_PARAMS[dataset]['eps'],
            eps_iter=ATTACK_PARAMS[dataset]['eps_iter'],
            clip_min=CLIP_MIN,
            clip_max=CLIP_MAX,
            batch_size=batch_size,
            log_dir=fp_path,
            model_logits=model_logits,
            dataset=dataset)
    elif attack in ['bim-a', 'bim-b']:
        # BIM attack
        print('Crafting %s adversarial samples...' % attack)
        its, results = basic_iterative_method(
            sess,
            model,
            X,
            Y,
            eps=ATTACK_PARAMS[dataset]['eps'],
            eps_iter=ATTACK_PARAMS[dataset]['eps_iter'],
            clip_min=CLIP_MIN,
            clip_max=CLIP_MAX,
            batch_size=batch_size)
        if attack == 'bim-a':
            # BIM-A
            # For each sample, select the time step where that sample first
            # became misclassified
            X_adv = np.asarray([results[its[i], i] for i in range(len(Y))])
        else:
            # BIM-B
            # For each sample, select the very last time step
            X_adv = results[-1]
    elif attack == 'jsma':
        # JSMA attack
        print('Crafting jsma adversarial samples. This may take > 5 hours')
        X_adv = saliency_map_method(sess,
                                    model,
                                    X,
                                    Y,
                                    theta=1,
                                    gamma=0.1,
                                    clip_min=CLIP_MIN,
                                    clip_max=CLIP_MAX)
    elif attack == 'cw-l2':
        # C&W attack
        print(
            'Crafting %s examples. This takes > 5 hours due to internal grid search'
            % attack)
        image_size = ATTACK_PARAMS[dataset]['image_size']
        num_channels = ATTACK_PARAMS[dataset]['num_channels']
        num_labels = ATTACK_PARAMS[dataset]['num_labels']
        cw_attack = CarliniL2(sess,
                              model,
                              image_size,
                              num_channels,
                              num_labels,
                              batch_size=batch_size)
        X_adv = cw_attack.attack(X, Y)
    elif attack == 'cw-fp':
        # C&W attack to break LID detector
        print(
            'Crafting %s examples. This takes > 5 hours due to internal grid search'
            % attack)
        image_size = ATTACK_PARAMS[dataset]['image_size']
        num_channels = ATTACK_PARAMS[dataset]['num_channels']
        num_labels = ATTACK_PARAMS[dataset]['num_labels']
        cw_attack = CarliniFP_2vars(sess,
                                    model,
                                    image_size,
                                    num_channels,
                                    num_labels,
                                    batch_size=batch_size,
                                    fp_dir=fp_path)
        X_adv = cw_attack.attack(X, Y)

    elif attack == 'spsa':
        binary_steps = 1
        batch_shape = X.shape
        X_input = tf.placeholder(tf.float32, shape=(1, ) + batch_shape[1:])
        Y_label = tf.placeholder(tf.int32, shape=(1, ))
        alpha = tf.placeholder(tf.float32, shape=(1, ))

        num_samples = np.shape(X)[0]
        # X = (X - np.argmin(X))/(np.argmax(X)-np.argmin(X))
        _min = np.min(X)
        _max = np.max(X)
        print(_max, _min)
        print(tf.trainable_variables())
        filters = sess.run('conv1/kernel:0')
        biases = 0.0 * sess.run('conv1/bias:0')
        shift_model = Sequential()
        if (dataset == 'mnist'):
            shift_model.add(
                Conv2D(32,
                       kernel_size=(3, 3),
                       activation=None,
                       input_shape=(1, 28, 28)))
        else:
            shift_model.add(
                Conv2D(32,
                       kernel_size=(3, 3),
                       activation=None,
                       input_shape=(3, 32, 32)))

        X_input_2 = tf.placeholder(tf.float32,
                                   shape=(None, ) + batch_shape[1:])

        correction_term = shift_model(X_input_2)
        if (dataset == 'mnist'):
            X_correction = -0.5 * np.ones(
                (1, 1, 28, 28)
            )  # We will shift the image up by 0.5, so this is the correction
        else:
            X_correction = -0.5 * np.ones(
                (1, 3, 32, 32)
            )  # We will shift the image up by 0.5, so this is the correction

        # for PGD

        shift_model.layers[0].set_weights([filters, biases])
        bias_correction_terms = (sess.run(correction_term,
                                          feed_dict={X_input_2: X_correction}))
        for i in range(32):
            biases[i] = bias_correction_terms[0, i, 0, 0]
        _, acc = model.evaluate(X, Y, batch_size=batch_size, verbose=0)
        print("Model accuracy on the test set: %0.2f%%" % (100.0 * acc))
        original_biases = model.layers[0].get_weights()[1]
        original_weights = model.layers[0].get_weights()[0]
        model.layers[0].set_weights(
            [original_weights, original_biases + biases])
        #Correct model for input shift

        X = X + 0.5  #shift input to make it >=0
        _, acc = model.evaluate(X, Y, batch_size=batch_size, verbose=0)
        print("Model accuracy on the test set: %0.2f%%" % (100.0 * acc))
        # check accuracy post correction of input and model
        print('Crafting %s examples. Using Cleverhans' % attack)
        image_size = ATTACK_PARAMS[dataset]['image_size']
        num_channels = ATTACK_PARAMS[dataset]['num_channels']
        num_labels = ATTACK_PARAMS[dataset]['num_labels']

        from cleverhans.utils_keras import KerasModelWrapper
        wrapped_model = KerasModelWrapper(model)

        if dataset == "mnist":
            wrapped_model.nb_classes = 10
        elif dataset == "cifar":
            wrapped_model.nb_classes = 10
        else:
            wrapped_model.nb_classes = 10

        real_batch_size = X.shape[0]
        X_adv = None

        spsa = SPSA(wrapped_model, back='tf', sess=sess)
        spsa_params = {
            "epsilon": ATTACK_PARAMS[dataset]['eps'],
            'num_steps': 100,
            'spsa_iters': 1,
            'early_stop_loss_threshold': None,
            'is_targeted': False,
            'is_debug': False
        }
        X_adv_spsa = spsa.generate(X_input,
                                   alpha=alpha,
                                   y=Y_label,
                                   fp_path=fp_path,
                                   **spsa_params)

        for i in range(num_samples):

            # rescale to format TF wants

            #X_i_norm = (X[i] - _min)/(_max-_min)

            X_i_norm = X[i]
            # Run attack
            best_res = None
            ALPHA = np.ones(1) * 0.1
            lb = 1.0e-2
            ub = 1.0e2
            for j in range(binary_steps):
                res = sess.run(X_adv_spsa,
                               feed_dict={
                                   X_input: np.expand_dims(X_i_norm, axis=0),
                                   Y_label: np.array([np.argmax(Y[i])]),
                                   alpha: ALPHA
                               })
                if (dataset == 'mnist'):
                    X_place = tf.placeholder(tf.float32, shape=[1, 1, 28, 28])
                else:
                    X_place = tf.placeholder(tf.float32, shape=[1, 3, 32, 32])
                pred = model(X_place)
                model_op = sess.run(pred, feed_dict={X_place: res})

                if (not np.argmax(model_op) == np.argmax(Y[i, :])):
                    lb = ALPHA[0]
                else:
                    ub = ALPHA[0]
                ALPHA[0] = 0.5 * (lb + ub)
                print(ALPHA)
                if (best_res is None):
                    best_res = res
                else:
                    if (not np.argmax(model_op) == np.argmax(Y[i, :])):
                        best_res = res
                        pass

            # Rescale result back to our scale

            if (i == 0):
                X_adv = best_res
            else:
                X_adv = np.concatenate((X_adv, best_res), axis=0)

        _, acc = model.evaluate(X_adv, Y, batch_size=batch_size, verbose=0)
        print("Model accuracy on the adversarial test set: %0.2f%%" %
              (100.0 * acc))
        _, acc = model.evaluate(X, Y, batch_size=batch_size, verbose=0)
        print("Model accuracy on the test set: %0.2f%%" % (100.0 * acc))

        #Revert model to original
        model.layers[0].set_weights([original_weights, original_biases])
        #Revert adv shift
        X_adv = X_adv - 0.5
        X = X - 0.5  #Not used but just for logging purposes
    elif attack == 'adapt-pgd':
        binary_steps = 1
        rand_starts = 2
        batch_shape = X.shape
        X_input = tf.placeholder(tf.float32, shape=(1, ) + batch_shape[1:])
        Y_label = tf.placeholder(tf.int32, shape=(1, ))
        alpha = tf.placeholder(tf.float32, shape=(1, ))

        num_samples = np.shape(X)[0]
        # X = (X - np.argmin(X))/(np.argmax(X)-np.argmin(X))
        _min = np.min(X)
        _max = np.max(X)
        print(_max, _min)
        print(tf.trainable_variables())
        filters = sess.run('conv1/kernel:0')
        biases = 0.0 * sess.run('conv1/bias:0')
        shift_model = Sequential()
        if (dataset == 'mnist'):
            shift_model.add(
                Conv2D(32,
                       kernel_size=(3, 3),
                       activation=None,
                       input_shape=(1, 28, 28)))
        else:
            shift_model.add(
                Conv2D(32,
                       kernel_size=(3, 3),
                       activation=None,
                       input_shape=(3, 32, 32)))

        X_input_2 = tf.placeholder(tf.float32,
                                   shape=(None, ) + batch_shape[1:])

        correction_term = shift_model(X_input_2)
        if (dataset == 'mnist'):
            X_correction = -0.5 * np.ones(
                (1, 1, 28, 28)
            )  # We will shift the image up by 0.5, so this is the correction
        else:
            X_correction = -0.5 * np.ones(
                (1, 3, 32, 32)
            )  # We will shift the image up by 0.5, so this is the correction

        # for PGD

        shift_model.layers[0].set_weights([filters, biases])
        bias_correction_terms = (sess.run(correction_term,
                                          feed_dict={X_input_2: X_correction}))
        for i in range(32):
            biases[i] = bias_correction_terms[0, i, 0, 0]
        _, acc = model.evaluate(X, Y, batch_size=batch_size, verbose=0)
        print("Model accuracy on the test set: %0.2f%%" % (100.0 * acc))
        original_biases = model.layers[0].get_weights()[1]
        original_weights = model.layers[0].get_weights()[0]
        model.layers[0].set_weights(
            [original_weights, original_biases + biases])
        #Correct model for input shift

        X = X + 0.5  #shift input to make it >=0

        _, acc = model.evaluate(X, Y, batch_size=batch_size, verbose=0)
        print("Model accuracy on the test set: %0.2f%%" % (100.0 * acc))
        # check accuracy post correction of input and model
        print('Crafting %s examples. Using Cleverhans' % attack)
        image_size = ATTACK_PARAMS[dataset]['image_size']
        num_channels = ATTACK_PARAMS[dataset]['num_channels']
        num_labels = ATTACK_PARAMS[dataset]['num_labels']

        from cleverhans.utils_keras import KerasModelWrapper
        wrapped_model = KerasModelWrapper(model)

        if dataset == "mnist":
            wrapped_model.nb_classes = 10
        elif dataset == "cifar":
            wrapped_model.nb_classes = 10
        else:
            wrapped_model.nb_classes = 10

        real_batch_size = X.shape[0]
        X_adv = None

        pgd = MadryEtAl(wrapped_model, back='tf', sess=sess)
        X_adv_pgd, adv_loss_fp = pgd.generate(X_input,
                                              eps=0.3,
                                              eps_iter=0.02,
                                              clip_min=0.0,
                                              clip_max=1.0,
                                              nb_iter=20,
                                              rand_init=True,
                                              fp_path=fp_path,
                                              alpha=alpha)

        for i in range(num_samples):
            # rescale to format TF wants

            #X_i_norm = (X[i] - _min)/(_max-_min)

            X_i_norm = X[i]
            # Run attack
            best_res = None
            best_res_loss = 1000000.0
            ALPHA = np.ones(1) * 0.1
            lb = 1.0e-2
            ub = 1.0e2
            for j in range(binary_steps):
                bin_flag = 0
                for jj in range(rand_starts):

                    [res, res_loss] = sess.run(
                        [X_adv_pgd, adv_loss_fp],
                        feed_dict={
                            X_input: np.expand_dims(X[i], axis=0),
                            Y_label: np.array([np.argmax(Y[i])]),
                            alpha: ALPHA
                        })

                    if (dataset == 'mnist'):
                        X_place = tf.placeholder(tf.float32,
                                                 shape=[1, 1, 28, 28])
                    else:
                        X_place = tf.placeholder(tf.float32,
                                                 shape=[1, 3, 32, 32])

                    pred = model(X_place)
                    model_op = sess.run(pred, feed_dict={X_place: res})

                    if (best_res is None):
                        best_res = res
                    else:
                        if ((not np.argmax(model_op) == np.argmax(Y[i, :]))
                                and res_loss < best_res_loss):
                            best_res = res
                            best_res_loss = res_loss
                            bin_flag = 1
                            pass
                if (bin_flag == 1):
                    lb = ALPHA[0]
                else:
                    ub = ALPHA[0]
                ALPHA[0] = 0.5 * (lb + ub)
                print(ALPHA)
            # Rescale result back to our scale

            if (i == 0):
                X_adv = best_res
            else:
                X_adv = np.concatenate((X_adv, best_res), axis=0)

        _, acc = model.evaluate(X_adv, Y, batch_size=batch_size, verbose=0)
        print("Model accuracy on the adversarial test set: %0.2f%%" %
              (100.0 * acc))
        _, acc = model.evaluate(X, Y, batch_size=batch_size, verbose=0)
        print("Model accuracy on the test set: %0.2f%%" % (100.0 * acc))

        #Revert model to original
        model.layers[0].set_weights([original_weights, original_biases])
        #Revert adv shift
        X_adv = X_adv - 0.5
        X = X - 0.5  #Not used but just for logging purposes

        pass

    if ("adapt" in attack or "fp" in attack or "spsa" in attack):
        [m, _, _, _] = (np.shape(X_adv))
        cropped_X_adv = []
        cropped_Y = []
        cropped_X = []
        if (dataset == 'mnist'):
            X_place = tf.placeholder(tf.float32, shape=[1, 1, 28, 28])
            pred = model(X_place)
        else:
            X_place = tf.placeholder(tf.float32, shape=[1, 3, 32, 32])
            pred = model(X_place)
        for i in range(m):
            logits_op = sess.run(pred,
                                 feed_dict={X_place: X_adv[i:i + 1, :, :, :]})
            if (not np.argmax(logits_op) == np.argmax(Y[i, :])):
                cropped_Y.append(Y[i, :])
                cropped_X_adv.append(X_adv[i, :, :, :])
                cropped_X.append(X[i, :, :, :])
        X_adv = np.array(cropped_X_adv)
        X = np.array(cropped_X)
        Y = np.array(cropped_Y)

        f = open(
            os.path.join(log_path, 'Random_Test_%s_%s.p' % (dataset, attack)),
            'w')

        pickle.dump({"adv_input": X, "adv_labels": Y}, f)
        f.close()

    #np.save(os.path.join(PATH_DATA, 'Adv_%s_%s.npy' % (dataset, attack)), X_adv)
    f = open(os.path.join(log_path, 'Adv_%s_%s.p' % (dataset, attack)), 'w')

    pickle.dump({"adv_input": X_adv, "adv_labels": Y}, f)
    f.close()
    _, acc = model.evaluate(X, Y, batch_size=batch_size, verbose=0)
    print("Model accuracy on the test set: %0.2f%%" % (100.0 * acc))
    l2_diff = np.linalg.norm(X_adv.reshape((len(X), -1)) - X.reshape(
        (len(X), -1)),
                             axis=1).mean()
    print("Average L-2 perturbation size of the %s attack: %0.2f" %
          (attack, l2_diff))
    if (("adapt" in attack) or ("cw-fp" in attack)):
        return (X, X_adv, Y)
    else:
        print(Y.shape)
        return (X_adv, Y)
示例#25
0
def main(argv):

    model_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)

    if model_file is None:
        print('No model found')
        sys.exit()

    cifar = cifar10_input.CIFAR10Data(FLAGS.dataset_dir)

    nb_classes = 10
    X_test = cifar.train_data.xs
    Y_test = to_categorical(cifar.train_data.ys, nb_classes)
    assert Y_test.shape[1] == 10.
    print("train data shape", X_test.shape)

    set_log_level(logging.DEBUG)

    with tf.Session() as sess:

        x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3))
        y = tf.placeholder(tf.float32, shape=(None, 10))

        from cleverhans.model_zoo.madry_lab_challenges.cifar10_model import make_wresnet
        model = make_wresnet()
        saver = tf.train.Saver()
        # Restore the checkpoint
        saver.restore(sess, model_file)
        SCOPE = "cifar10_challenge"
        model2 = make_wresnet(scope=SCOPE)
        assert len(model.get_vars()) == len(model2.get_vars())
        found = [False] * len(model2.get_vars())
        for var1 in model.get_vars():
            var1_found = False
            var2_name = SCOPE + "/" + var1.name
            for idx, var2 in enumerate(model2.get_vars()):
                if var2.name == var2_name:
                    var1_found = True
                    found[idx] = True
                    sess.run(tf.assign(var2, var1))
                    break
            assert var1_found, var1.name
        assert all(found)

        model = model2
        saver = tf.train.Saver()

        # Restore the checkpoint
        #saver.restore(sess, model_file)

        nb_samples = FLAGS.nb_samples

        attack_params = {
            'batch_size': FLAGS.batch_size,
            'clip_min': 0.,
            'clip_max': 255.
        }

        if FLAGS.attack_type == 'cwl2':
            from cleverhans.attacks import CarliniWagnerL2
            attacker = CarliniWagnerL2(model, sess=sess)
            attack_params.update({
                'binary_search_steps': 1,
                'confidence': 0,
                'max_iterations': 100,
                'learning_rate': 0.1,
                'initial_const': 10,
                'batch_size': 10
            })

        else:  # eps and eps_iter in range 0-255
            attack_params.update({'eps': 16, 'ord': np.inf})
            if FLAGS.attack_type == 'fgsm':
                from cleverhans.attacks import FastGradientMethod
                attacker = FastGradientMethod(model, sess=sess)

            elif FLAGS.attack_type == 'pgd':
                attack_params.update({
                    'eps': 8,
                    'eps_iter': .02,
                    'ord': np.inf,
                    'nb_iter': 10
                })
                from cleverhans.attacks import MadryEtAl
                attacker = MadryEtAl(model, sess=sess)
            elif FLAGS.attack_type == 'deepFool':
                print("here")
                attack_params.update({
                    'ord': np.inf,
                    'eps': 6.0,
                    'max_iter': 100
                })
                from CdeepFool_cleverhans import DeepFool

                attacker = DeepFool(model, sess=sess)

        eval_par = {'batch_size': FLAGS.batch_size}

        if FLAGS.sweep:
            max_eps = 16
            epsilons = np.linspace(1, max_eps, max_eps)
            for e in epsilons:
                t1 = time.time()
                attack_params.update({'eps': e})
                x_adv = attacker.generate(x, **attack_params)
                preds_adv = model.get_probs(x_adv)
                x1 = sess.run(x_adv, feed_dict={x: X_test[0], y: Y_test[0]})
                print(x1.shape)
                l_inf = np.amax(np.abs(X_test[0] - x1))
                print('perturbation found: {}'.format(l_inf))

                acc = model_eval(sess,
                                 x,
                                 y,
                                 preds_adv,
                                 X_test[:nb_samples],
                                 Y_test[:nb_samples],
                                 args=eval_par)
                print('Epsilon %.2f, accuracy on adversarial' % e,
                      'examples %0.4f\n' % acc)
            t2 = time.time()
        else:
            t1 = time.time()
            x_adv = attacker.generate(x, **attack_params)
            preds_adv = model.get_probs(x_adv)
            logits = model.get_logits(x)
            #print (len(x_adv))

            num_eval_examples = 1000
            eval_batch_size = 100
            num_batches = int(math.ceil(num_eval_examples / eval_batch_size))

            x_adv_all = []  # adv accumulator
            y_adv_all = []
            y_true = []
            print('Iterating over {} batches'.format(num_batches))

            for ibatch in range(num_batches):
                bstart = ibatch * eval_batch_size
                bend = min(bstart + eval_batch_size, num_eval_examples)
                print('batch size: {}'.format(bend - bstart))

                x_batch = X_test[bstart:bend, :]
                y_batch = Y_test[bstart:bend]
                y_clean = np.argmax(sess.run(logits, feed_dict={x: x_batch}),
                                    axis=1)
                x_b_adv, pred = sess.run([x_adv, preds_adv],
                                         feed_dict={
                                             x: x_batch,
                                             y: y_batch
                                         })
                y_b_adv = np.argmax(sess.run(logits, feed_dict={x: x_b_adv}),
                                    axis=1)

                count = 0
                y_batch = np.argmax(y_batch, axis=1)
                for i in range(eval_batch_size):

                    if (y_b_adv[i] != y_batch[i] and y_clean[i] == y_batch[i]):
                        l_inf = np.amax(np.abs(x_batch[i] - x_b_adv[i]))
                        print('perturbation found: {}'.format(l_inf))
                        #print (y_b_adv[i])
                        x_adv_all.append(x_b_adv[i])
                        y_adv_all.append(y_b_adv[i])
                        y_true.append(y_batch[i])
                        count += 1
                #print (y_adv_all[0:20])
                #print (y_true[0:20])
                print("Totat adversariak cound in this batch", count)

                #x_adv_all.extend(x_b_adv)
                #y_adv_all.extend(y_b_adv)

            x_adv_all = np.array(x_adv_all)
            y_true = np.array(y_true)
            y_adv_all = np.array(y_adv_all)

            print('Adv Label', y_adv_all[0:20])
            print('Ori Label', y_true[0:20])

            #y_adv = np.squeeze(y_adv)
            print(x_adv_all.shape)
            print(y_adv_all.shape)
            print(y_true.shape)

            count = 0
            for i in range(y_adv_all.shape[0]):
                if y_true[i] != y_adv_all[i]:
                    count += 1
            print("Total adversarial examples found", count)
            pickle.dump(
                (x_adv_all, y_true, y_adv_all),
                open(
                    '/scratch/kiani/Projects/CIFAR data/Adversarial/deepFool/iter_100/deepFool_E6_train.p',
                    'wb'))

            #from numpy import linalg as LA
            #l_2 = LA.norm(X_test[0] - x1[0])

            #l_inf = np.amax(np.abs(x - x_adv))

            t2 = time.time()
        print("Range of data should be 0-255 and actual is: ",
              str(np.min(x_adv_all)) + " " + str(np.max(x_adv_all)))
        image = ((x_adv_all[2])).astype(np.uint8)
        img = Image.fromarray(image)
        img.save("deepFool_attack.jpeg")
        print("Took", t2 - t1, "seconds")
示例#26
0
def main(argv=None):
    model_path = FLAGS.model_path
    targeted = True if FLAGS.targeted else False
    scale = True if FLAGS.scale else False
    learning_rate = FLAGS.learning_rate
    nb_filters = FLAGS.nb_filters
    batch_size = FLAGS.batch_size
    nb_epochs = FLAGS.nb_epochs
    delay = FLAGS.delay
    eps = FLAGS.eps
    adv = FLAGS.adv

    attack = FLAGS.attack
    attack_iterations = FLAGS.attack_iterations
    nb_iter = FLAGS.nb_iter
    
    #### EMPIR extra flags
    lowprecision=FLAGS.lowprecision
    abits=FLAGS.abits
    wbits=FLAGS.wbits
    abitsList=FLAGS.abitsList
    wbitsList=FLAGS.wbitsList
    stocRound=True if FLAGS.stocRound else False
    rand=FLAGS.rand 
    model_path2 = FLAGS.model_path2
    model_path1 = FLAGS.model_path1
    model_path3 = FLAGS.model_path3
    ensembleThree=True if FLAGS.ensembleThree else False
    abits2=FLAGS.abits2
    wbits2=FLAGS.wbits2
    abits2List=FLAGS.abits2List
    wbits2List=FLAGS.wbits2List
    ####
   
    save = False
    train_from_scratch = False

    #### Imagenet flags
    imagenet_path = FLAGS.imagenet_path
    if imagenet_path is None:
        print("Error: Imagenet data path not specified")
        sys.exit(1)

    # Imagenet specific dimensions
    img_rows = _DEFAULT_IMAGE_SIZE
    img_cols = _DEFAULT_IMAGE_SIZE
    channels = _NUM_CHANNELS
    nb_classes = _NUM_CLASSES

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    if not hasattr(backend, "tf"):
        raise RuntimeError("This tutorial requires keras to be configured"
                           " to use the TensorFlow backend.")

    # Image dimensions ordering should follow the Theano convention
    if keras.backend.image_dim_ordering() != 'tf':
        keras.backend.set_image_dim_ordering('tf')
        print("INFO: '~/.keras/keras.json' sets 'image_dim_ordering' to "
              "'th', temporarily setting to 'tf'")

    # Create TF session and set as Keras backend session
    sess = tf.Session()
    keras.backend.set_session(sess)

    set_log_level(logging.WARNING)
    
    # Get imagenet datasets
    train_dataset, test_dataset = data_imagenet(nb_epochs, batch_size, imagenet_path)

    # Creating a initializable iterators
    train_iterator = train_dataset.make_initializable_iterator()
    test_iterator = test_dataset.make_initializable_iterator()

    # Getting next elements from the iterators
    next_test_element = test_iterator.get_next()
    next_train_element = train_iterator.get_next()
    
    train_x, train_y = train_iterator.get_next()
    test_x, test_y = test_iterator.get_next()

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, channels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))
    phase = tf.placeholder(tf.bool, name="phase")

    logits_scalar = tf.placeholder_with_default(
        INIT_T, shape=(), name="logits_temperature")
   
    if ensembleThree: 
        if (model_path1 is None or model_path2 is None or model_path3 is None):
            train_from_scratch = True
        else:
            train_from_scratch = False
    elif model_path is not None:
        if os.path.exists(model_path):
            # check for existing model in immediate subfolder
            if any(f.endswith('.meta') for f in os.listdir(model_path)):
                train_from_scratch = False
            else:
                model_path = build_model_save_path(
                    model_path, batch_size, nb_filters, learning_rate, nb_epochs, adv, delay)
                print(model_path)
                save = True
                train_from_scratch = True
    else:
        train_from_scratch = True  # train from scratch, but don't save since no path given
    
    if ensembleThree: 
       if (wbitsList is None) or (abitsList is None): # Layer wise separate quantization not specified for first model
           if (wbits==0) or (abits==0):
               print("Error: the number of bits for constant precision weights and activations across layers for the first model have to specified using wbits1 and abits1 flags")
               sys.exit(1)
           else:
               fixedPrec1 = 1
       elif (len(wbitsList) != 6) or (len(abitsList) != 6):
           print("Error: Need to specify the precisions for activations and weights for the atleast the four convolutional layers of alexnet excluding the first layer and 2 fully connected layers excluding the last layer of the first model")  
           sys.exit(1)
       else: 
           fixedPrec1 = 0
       
       if (wbits2List is None) or (abits2List is None): # Layer wise separate quantization not specified for second model
           if (wbits2==0) or (abits2==0):
               print("Error: the number of bits for constant precision weights and activations across layers for the second model have to specified using wbits1 and abits1 flags")
               sys.exit(1)
           else:
               fixedPrec2 = 1
       elif (len(wbits2List) != 6) or (len(abits2List) != 6):
           print("Error: Need to specify the precisions for activations and weights for the atleast the four convolutional layers of alexnet excluding the first layer and 2 fully connected layers excluding the last layer of the second model")  
           sys.exit(1)
       else: 
           fixedPrec2 = 0

       if (fixedPrec2 != 1) or (fixedPrec1 != 1): # Atleast one of the models have separate precisions per layer
           fixedPrec=0
           print("Within atleast one model has separate precisions")
           if (fixedPrec1 == 1): # first layer has fixed precision
               abitsList = (abits, abits, abits, abits, abits, abits)
               wbitsList = (wbits, wbits, wbits, wbits, wbits, wbits)
           if (fixedPrec2 == 1): # second layer has fixed precision
               abits2List = (abits2, abits2, abits2, abits2, abits2, abits2)
               wbits2List = (wbits2, wbits2, wbits2, wbits2, wbits2, wbits2)
       else:
           fixedPrec=1
       
       if (train_from_scratch):
           print ("The ensemble model cannot be trained from scratch")
           sys.exit(1)
       if fixedPrec == 1:
           from cleverhans_tutorials.tutorial_models import make_ensemble_three_alexnet
           model = make_ensemble_three_alexnet(
               phase, logits_scalar, 'lp1_', 'lp2_', 'fp_', wbits, abits, wbits2, abits2, input_shape=(None, img_rows, img_cols, channels), nb_filters=nb_filters, nb_classes=nb_classes) 
       else:
           from cleverhans_tutorials.tutorial_models import make_layerwise_three_combined_alexnet
           model = make_layerwise_three_combined_alexnet(
               phase, logits_scalar, 'lp1_', 'lp2_', 'fp_', wbitsList, abitsList, wbits2List, abits2List, input_shape=(None, img_rows, img_cols, channels), nb_filters=nb_filters, nb_classes=nb_classes) 
    elif lowprecision:
       if (wbitsList is None) or (abitsList is None): # Layer wise separate quantization not specified
           if (wbits==0) or (abits==0):
               print("Error: the number of bits for constant precision weights and activations across layers have to specified using wbits and abits flags")
               sys.exit(1)
           else:
               fixedPrec = 1
       elif (len(wbitsList) != 6) or (len(abitsList) != 6):
           print("Error: Need to specify the precisions for activations and weights for the atleast the four convolutional layers of alexnet excluding the first layer and 2 fully connected layers excluding the last layer")  
           sys.exit(1)
       else: 
           fixedPrec = 0
       
       if fixedPrec:
           
           ### For training from scratch
           from cleverhans_tutorials.tutorial_models import make_basic_lowprecision_alexnet
           model = make_basic_lowprecision_alexnet(phase, logits_scalar, 'lp_', wbits, abits, input_shape=(
            None, img_rows, img_cols, channels), nb_filters=nb_filters, nb_classes=nb_classes)  
       else:
           from cleverhans_tutorials.tutorial_models import make_layerwise_lowprecision_alexnet
           model = make_layerwise_lowprecision_alexnet(phase, logits_scalar, 'lp_', wbitsList, abitsList, 
            input_shape=(None, img_rows, img_cols, channels), nb_filters=nb_filters, nb_classes=nb_classes) 
    else:
        ### For training from scratch
        from cleverhans_tutorials.tutorial_models import make_basic_alexnet_from_scratch
        model = make_basic_alexnet_from_scratch(phase, logits_scalar, 'fp_', input_shape=(
        None, img_rows, img_cols, channels), nb_filters=nb_filters, nb_classes=nb_classes) 

    # separate calling function for ensemble models
    if ensembleThree:
        preds = model.ensemble_call(x, reuse=False)
    else:
    ##default
        preds = model(x, reuse=False)
    print("Defined TensorFlow model graph.")

    rng = np.random.RandomState([2017, 8, 30])

    def evaluate():
        # Evaluate the accuracy of the CIFAR10 model on legitimate test
        # examples
        eval_params = {'batch_size': batch_size}
        if ensembleThree:
            acc = model_eval_ensemble_imagenet(
                sess, x, y, preds, test_iterator, test_x, test_y, phase=phase, args=eval_params)
        else: #default below
            acc = model_eval_imagenet(
                sess, x, y, preds, test_iterator, test_x, test_y, phase=phase, args=eval_params)
        print('Test accuracy on legitimate examples: %0.4f' % acc)

    # Train an Imagenet model
    train_params = {
        'lowprecision': lowprecision,
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate,
        'loss_name': 'train loss',
        'filename': 'model',
        'reuse_global_step': False,
        'train_scope': 'train',
        'is_training': True
    }

    if adv != 0:
        if adv == ADVERSARIAL_TRAINING_MADRYETAL:
            from cleverhans.attacks import MadryEtAl
            train_attack_params = {'eps': MAX_EPS, 'eps_iter': 0.01,
                                   'nb_iter': nb_iter}
            train_attacker = MadryEtAl(model, sess=sess)

        elif adv == ADVERSARIAL_TRAINING_FGSM:
            from cleverhans.attacks import FastGradientMethod
            stddev = int(np.ceil((MAX_EPS * 255) // 2))
            train_attack_params = {'eps': tf.abs(tf.truncated_normal(
                shape=(batch_size, 1, 1, 1), mean=0, stddev=stddev))}
            train_attacker = FastGradientMethod(model, back='tf', sess=sess)
        # create the adversarial trainer
        train_attack_params.update({'clip_min': 0., 'clip_max': 1.})
        adv_x_train = train_attacker.generate(x, phase, **train_attack_params)
        preds_adv_train = model.get_probs(adv_x_train)

        eval_attack_params = {'eps': MAX_EPS, 'clip_min': 0., 'clip_max': 1.}
        adv_x_eval = train_attacker.generate(x, phase, **eval_attack_params)
        preds_adv_eval = model.get_probs(adv_x_eval)  # * logits_scalar
   #  if adv:
   #      from cleverhans.attacks import FastGradientMethod
   #      fgsm = FastGradientMethod(model, back='tf', sess=sess)
   #      fgsm_params = {'eps': eps, 'clip_min': 0., 'clip_max': 1.}
   #      adv_x_train = fgsm.generate(x, phase, **fgsm_params)
   #      preds_adv = model.get_probs(adv_x_train)

    if train_from_scratch:
        if save:
            train_params.update({'log_dir': model_path})
            if adv and delay > 0:
                train_params.update({'nb_epochs': delay})
        
        # do clean training for 'nb_epochs' or 'delay' epochs with learning rate reducing with time
        model_train_imagenet2(sess, x, y, preds, train_iterator, train_x, train_y, phase=phase,
                evaluate=evaluate, args=train_params, save=save, rng=rng)

        # optionally do additional adversarial training
        if adv:
            print("Adversarial training for %d epochs" % (nb_epochs - delay))
            train_params.update({'nb_epochs': nb_epochs - delay})
            train_params.update({'reuse_global_step': True})
            model_train_imagenet(sess, x, y, preds, train_iterator, train_x, train_y, phase=phase,
                    predictions_adv=preds_adv_train, evaluate=evaluate, args=train_params, save=save, rng=rng)
    else:
        if ensembleThree: ## ensembleThree models have to loaded from different paths
            variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
            # First 11 variables from path1
            stored_variables = ['lp_conv1_init/k', 'lp_conv1_init/b', 'lp_conv2_init/k', 'lp_conv3_init/k', 'lp_conv4_init/k', 'lp_conv5_init/k', 'lp_ip1init/W', 'lp_ip1init/b', 'lp_ip2init/W', 'lp_logits_init/W', 'lp_logits_init/b']
            variable_dict = dict(OrderedDict(zip(stored_variables, variables[:11]))) # only dict was messing with the order
            # Restore the first set of variables from model_path1
            saver = tf.train.Saver(variable_dict)
            saver.restore(sess, tf.train.latest_checkpoint(model_path1))
            # Restore the second set of variables from model_path2
            # Second 11 variables from path2
            variable_dict = dict(OrderedDict(zip(stored_variables, variables[11:22])))
            saver2 = tf.train.Saver(variable_dict)
            saver2.restore(sess, tf.train.latest_checkpoint(model_path2))
            # Third 11 variables from path3
            stored_variables = ['fp_conv1_init/k', 'fp_conv1_init/b', 'fp_conv2_init/k', 'fp_conv3_init/k', 'fp_conv4_init/k', 'fp_conv5_init/k', 'fp_ip1init/W', 'fp_ip1init/b', 'fp_ip2init/W', 'fp_logits_init/W', 'fp_logits_init/b']
            variable_dict = dict(OrderedDict(zip(stored_variables, variables[22:33])))
            saver3 = tf.train.Saver(variable_dict)
            saver3.restore(sess, tf.train.latest_checkpoint(model_path3))
            # Next 24 batch norm variables from path1
            stored_variables = ['lp__batchNorm1/batch_normalization/gamma', 'lp__batchNorm1/batch_normalization/beta', 'lp__batchNorm1/batch_normalization/moving_mean', 'lp__batchNorm1/batch_normalization/moving_variance', 'lp__batchNorm2/batch_normalization/gamma', 'lp__batchNorm2/batch_normalization/beta', 'lp__batchNorm2/batch_normalization/moving_mean', 'lp__batchNorm2/batch_normalization/moving_variance', 'lp__batchNorm3/batch_normalization/gamma', 'lp__batchNorm3/batch_normalization/beta', 'lp__batchNorm3/batch_normalization/moving_mean', 'lp__batchNorm3/batch_normalization/moving_variance', 'lp__batchNorm4/batch_normalization/gamma', 'lp__batchNorm4/batch_normalization/beta', 'lp__batchNorm4/batch_normalization/moving_mean', 'lp__batchNorm4/batch_normalization/moving_variance', 'lp__batchNorm5/batch_normalization/gamma', 'lp__batchNorm5/batch_normalization/beta', 'lp__batchNorm5/batch_normalization/moving_mean', 'lp__batchNorm5/batch_normalization/moving_variance', 'lp__batchNorm6/batch_normalization/gamma', 'lp__batchNorm6/batch_normalization/beta', 'lp__batchNorm6/batch_normalization/moving_mean', 'lp__batchNorm6/batch_normalization/moving_variance']

            variable_dict = dict(OrderedDict(zip(stored_variables, variables[33:57])))
            saver = tf.train.Saver(variable_dict)
            saver.restore(sess, tf.train.latest_checkpoint(model_path1))
            # Next 24 batch norm variables from path2
            variable_dict = dict(OrderedDict(zip(stored_variables, variables[57:81])))
            saver = tf.train.Saver(variable_dict)
            saver.restore(sess, tf.train.latest_checkpoint(model_path2))
            # Final 24 batch norm variables from path1
            stored_variables = ['fp__batchNorm1/batch_normalization/gamma', 'fp__batchNorm1/batch_normalization/beta', 'fp__batchNorm1/batch_normalization/moving_mean', 'fp__batchNorm1/batch_normalization/moving_variance', 'fp__batchNorm2/batch_normalization/gamma', 'fp__batchNorm2/batch_normalization/beta', 'fp__batchNorm2/batch_normalization/moving_mean', 'fp__batchNorm2/batch_normalization/moving_variance', 'fp__batchNorm3/batch_normalization/gamma', 'fp__batchNorm3/batch_normalization/beta', 'fp__batchNorm3/batch_normalization/moving_mean', 'fp__batchNorm3/batch_normalization/moving_variance', 'fp__batchNorm4/batch_normalization/gamma', 'fp__batchNorm4/batch_normalization/beta', 'fp__batchNorm4/batch_normalization/moving_mean', 'fp__batchNorm4/batch_normalization/moving_variance', 'fp__batchNorm5/batch_normalization/gamma', 'fp__batchNorm5/batch_normalization/beta', 'fp__batchNorm5/batch_normalization/moving_mean', 'fp__batchNorm5/batch_normalization/moving_variance', 'fp__batchNorm6/batch_normalization/gamma', 'fp__batchNorm6/batch_normalization/beta', 'fp__batchNorm6/batch_normalization/moving_mean', 'fp__batchNorm6/batch_normalization/moving_variance']
            variable_dict = dict(OrderedDict(zip(stored_variables, variables[81:105])))
            saver = tf.train.Saver(variable_dict)
            saver.restore(sess, tf.train.latest_checkpoint(model_path3))
        else: # restoring the model trained using this setup, not a downloaded one
            tf_model_load(sess, model_path)
            print('Restored model from %s' % model_path)
            # evaluate()


    # Evaluate the accuracy of the model on legitimate test examples
    eval_params = {'batch_size': batch_size}
    if ensembleThree:
        accuracy = model_eval_ensemble_imagenet(sess, x, y, preds, test_iterator, test_x, test_y, phase=phase, feed={phase: False}, args=eval_params)
    else: #default below
        accuracy = model_eval_imagenet(sess, x, y, preds, test_iterator, test_x, test_y, phase=phase, feed={phase: False}, args=eval_params)
    print('Test accuracy on legitimate test examples: {0}'.format(accuracy))

    ###########################################################################
    # Build dataset
    ###########################################################################

    adv_inputs = test_x #adversarial inputs can be generated from any of the test examples 

    ###########################################################################
    # Craft adversarial examples using generic approach
    ###########################################################################
    nb_adv_per_sample = 1
    adv_ys = None
    yname = "y"

    print('Crafting adversarial examples')
    print("This could take some time ...")

    if ensembleThree:
        model_type = 'ensembleThree'
    else:
        model_type = 'default'

    if attack == ATTACK_CARLINI_WAGNER_L2:
        from cleverhans.attacks import CarliniWagnerL2
        attacker = CarliniWagnerL2(model, back='tf', sess=sess, model_type=model_type, num_classes=nb_classes)
        attack_params = {'binary_search_steps': 1,
                         'max_iterations': attack_iterations,
                         'learning_rate': 0.1,
                         'batch_size': batch_size,
                         'initial_const': 10,
                         }
    elif attack == ATTACK_JSMA:
        from cleverhans.attacks import SaliencyMapMethod
        attacker = SaliencyMapMethod(model, back='tf', sess=sess, model_type=model_type, num_classes=nb_classes)
        attack_params = {'theta': 1., 'gamma': 0.1}
    elif attack == ATTACK_FGSM:
        from cleverhans.attacks import FastGradientMethod
        attacker = FastGradientMethod(model, back='tf', sess=sess, model_type=model_type, num_classes=nb_classes)
        attack_params = {'eps': eps}
    elif attack == ATTACK_MADRYETAL:
        from cleverhans.attacks import MadryEtAl
        attacker = MadryEtAl(model, back='tf', sess=sess, model_type=model_type, num_classes=nb_classes)
        attack_params = {'eps': eps, 'eps_iter': 0.01, 'nb_iter': nb_iter}
    elif attack == ATTACK_BASICITER:
        print('Attack: BasicIterativeMethod')
        from cleverhans.attacks import BasicIterativeMethod
        attacker = BasicIterativeMethod(model, back='tf', sess=sess, model_type=model_type, num_classes=nb_classes)
        attack_params = {'eps': eps, 'eps_iter': 0.01, 'nb_iter': nb_iter}
    else:
        print("Attack undefined")
        sys.exit(1)

    attack_params.update({'clip_min': -2.2, 'clip_max': 2.7}) # Since max and min for imagenet turns out to be around -2.11 and 2.12
    eval_params = {'batch_size': batch_size}
    '''
    adv_x = attacker.generate(x, phase, **attack_params)
    # Craft adversarial examples using Fast Gradient Sign Method (FGSM)
    eval_params = {'batch_size': batch_size}
    X_test_adv, = batch_eval(sess, [x], [adv_x], [adv_inputs], feed={
                             phase: False}, args=eval_params)
    '''

    print("Evaluating un-targeted results")
    if ensembleThree:
        adv_accuracy = model_eval_ensemble_adv_imagenet(sess, x, y, preds, test_iterator, 
                        test_x, test_y, phase=phase, args=eval_params, attacker=attacker, attack_params=attack_params)
    else:
        adv_accuracy = model_eval_adv_imagenet(sess, x, y, preds, test_iterator, 
                        test_x, test_y, phase=phase, args=eval_params, attacker=attacker, attack_params=attack_params)
    
    # Compute the number of adversarial examples that were successfully found
    print('Test accuracy on adversarial examples {0:.4f}'.format(adv_accuracy))


    # Close TF session
    sess.close()
def main(argv):

    model_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)

    if model_file is None:
        print('No model found')
        sys.exit()

    cifar = cifar10_input.CIFAR10Data(FLAGS.dataset_dir)

    nb_classes = 10
    X_test = cifar.eval_data.xs
    Y_test = to_categorical(cifar.eval_data.ys, nb_classes)
    assert Y_test.shape[1] == 10.

    set_log_level(logging.DEBUG)

    with tf.Session() as sess:

        x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3))
        y = tf.placeholder(tf.float32, shape=(None, 10))

        from madry_cifar10_model import make_madry_wresnet
        model = make_madry_wresnet()

        saver = tf.train.Saver()

        # Restore the checkpoint
        saver.restore(sess, model_file)

        nb_samples = FLAGS.nb_samples

        attack_params = {'batch_size': FLAGS.batch_size,
                         'clip_min': 0., 'clip_max': 255.}

        if FLAGS.attack_type == 'cwl2':
            from cleverhans.attacks import CarliniWagnerL2
            attacker = CarliniWagnerL2(model, sess=sess)
            attack_params.update({'binary_search_steps': 1,
                                  'max_iterations': 100,
                                  'learning_rate': 0.1,
                                  'initial_const': 10,
                                  'batch_size': 10
                                  })

        else:  # eps and eps_iter in range 0-255
            attack_params.update({'eps': 8, 'ord': np.inf})
            if FLAGS.attack_type == 'fgsm':
                from cleverhans.attacks import FastGradientMethod
                attacker = FastGradientMethod(model, sess=sess)

            elif FLAGS.attack_type == 'pgd':
                attack_params.update({'eps_iter': 2, 'nb_iter': 20})
                from cleverhans.attacks import MadryEtAl
                attacker = MadryEtAl(model, sess=sess)

        eval_par = {'batch_size': FLAGS.batch_size}

        if FLAGS.sweep:
            max_eps = 16
            epsilons = np.linspace(1, max_eps, max_eps)
            for e in epsilons:
                t1 = time.time()
                attack_params.update({'eps': e})
                x_adv = attacker.generate(x, **attack_params)
                preds_adv = model.get_probs(x_adv)
                acc = model_eval(sess, x, y, preds_adv, X_test[
                    :nb_samples], Y_test[:nb_samples], args=eval_par)
                print('Epsilon %.2f, accuracy on adversarial' % e,
                      'examples %0.4f\n' % acc)
            t2 = time.time()
        else:
            t1 = time.time()
            x_adv = attacker.generate(x, **attack_params)
            preds_adv = model.get_probs(x_adv)
            acc = model_eval(sess, x, y, preds_adv, X_test[
                :nb_samples], Y_test[:nb_samples], args=eval_par)
            t2 = time.time()
            print('Test accuracy on adversarial examples %0.4f\n' % acc)
        print("Took", t2 - t1, "seconds")
示例#28
0
def main(argv):

    model_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)

    if model_file is None:
        print("No model found")
        sys.exit()

    cifar = cifar10_input.CIFAR10Data(FLAGS.dataset_dir)

    nb_classes = 10
    X_test = cifar.eval_data.xs
    Y_test = to_categorical(cifar.eval_data.ys, nb_classes)
    assert Y_test.shape[1] == 10.0

    set_log_level(logging.DEBUG)

    with tf.Session() as sess:

        x = tf.placeholder(tf.float32, shape=(None, 32, 32, 3))
        y = tf.placeholder(tf.float32, shape=(None, 10))

        from cleverhans.model_zoo.madry_lab_challenges.cifar10_model import make_wresnet

        model = make_wresnet()

        saver = tf.train.Saver()

        # Restore the checkpoint
        saver.restore(sess, model_file)

        nb_samples = FLAGS.nb_samples

        attack_params = {
            "batch_size": FLAGS.batch_size,
            "clip_min": 0.0,
            "clip_max": 255.0,
        }

        if FLAGS.attack_type == "cwl2":
            from cleverhans.attacks import CarliniWagnerL2

            attacker = CarliniWagnerL2(model, sess=sess)
            attack_params.update(
                {
                    "binary_search_steps": 1,
                    "max_iterations": 100,
                    "learning_rate": 0.1,
                    "initial_const": 10,
                    "batch_size": 10,
                }
            )

        else:  # eps and eps_iter in range 0-255
            attack_params.update({"eps": 8, "ord": np.inf})
            if FLAGS.attack_type == "fgsm":
                from cleverhans.attacks import FastGradientMethod

                attacker = FastGradientMethod(model, sess=sess)

            elif FLAGS.attack_type == "pgd":
                attack_params.update({"eps_iter": 2, "nb_iter": 20})
                from cleverhans.attacks import MadryEtAl

                attacker = MadryEtAl(model, sess=sess)

        eval_par = {"batch_size": FLAGS.batch_size}

        if FLAGS.sweep:
            max_eps = 16
            epsilons = np.linspace(1, max_eps, max_eps)
            for e in epsilons:
                t1 = time.time()
                attack_params.update({"eps": e})
                x_adv = attacker.generate(x, **attack_params)
                preds_adv = model.get_probs(x_adv)
                acc = model_eval(
                    sess,
                    x,
                    y,
                    preds_adv,
                    X_test[:nb_samples],
                    Y_test[:nb_samples],
                    args=eval_par,
                )
                print(
                    "Epsilon %.2f, accuracy on adversarial" % e,
                    "examples %0.4f\n" % acc,
                )
            t2 = time.time()
        else:
            t1 = time.time()
            x_adv = attacker.generate(x, **attack_params)
            preds_adv = model.get_probs(x_adv)
            acc = model_eval(
                sess,
                x,
                y,
                preds_adv,
                X_test[:nb_samples],
                Y_test[:nb_samples],
                args=eval_par,
            )
            t2 = time.time()
            print("Test accuracy on adversarial examples %0.4f\n" % acc)
        print("Took", t2 - t1, "seconds")
示例#29
0
def train_zero_knowledge_gandef_model(train_start=0,
                                      train_end=60000,
                                      test_start=0,
                                      test_end=10000,
                                      smoke_test=True,
                                      save=False,
                                      testing=False,
                                      backprop_through_attack=False,
                                      num_threads=None):
    """
    MNIST cleverhans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param train_batch_size: size of training batches
    :param test_batch_size: size of testing batches
    :param learning_rate: learning rate for training
    :param save: if true, the final model will be saved
    :param testing: if true, complete an AccuracyReport for unit tests
                    to verify that performance is adequate
    :param backprop_through_attack: If True, backprop through adversarial
                                    example construction process during
                                    adversarial training.
    :return: an AccuracyReport object
    """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(1234)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get MNIST test data
    X_train, Y_train, X_test, Y_test = data_fashion_mnist()
    if smoke_test:
        X_train, Y_train, X_test, Y_test = X_train[:
                                                   256], Y_train[:
                                                                 256], X_test[:
                                                                              256], Y_test[:
                                                                                           256]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1))
    y_soft = tf.placeholder(tf.float32, shape=(None, 10))

    # Prepare optimizer
    learning_rate = 1e-4
    clf_opt = tf.train.AdamOptimizer(learning_rate)
    dic_opt = tf.train.AdamOptimizer(learning_rate * 10)

    # Train an MNIST model
    train_params = {
        'nb_epochs': 80,
        'batch_size': 128,
        'trade_off': 2,
        'inner_epochs': 1
    }
    rng = np.random.RandomState([2017, 8, 30])

    # Adversarial training
    print("Start adversarial training")
    zero_knowledge_gandef_model = make_zero_knowledge_gandef_model(
        name="model_zero_knowledge_gandef")
    aug_x = gaussian_augment(x, std=1)
    preds_clean = zero_knowledge_gandef_model(x)
    preds_aug = zero_knowledge_gandef_model(aug_x)

    def cross_entropy(truth, preds, mean=True):
        # Get the logits operator
        op = preds.op
        if op.type == "Softmax":
            logits, = op.inputs
        else:
            logits = preds

        # Calculate cross entropy loss
        out = tf.nn.softmax_cross_entropy_with_logits(logits=logits,
                                                      labels=truth)

        # Take average loss and return
        if mean:
            out = tf.reduce_mean(out)
        return out

    def sigmoid_entropy(truth, preds, mean=True):
        # Get the logits operator
        op = preds.op
        if op.type == "Softmax":
            logits, = op.inputs
        else:
            logits = preds

        # Calculate cross entropy loss
        out = tf.nn.sigmoid_cross_entropy_with_logits(logits=logits,
                                                      labels=truth)

        # Take average loss and return
        if mean:
            out = tf.reduce_mean(out)
        return out

    # Perform and evaluate adversarial training
    gan_train_v2(sess,
                 x,
                 y_soft,
                 preds_clean,
                 X_train,
                 Y_train,
                 loss_func=[cross_entropy, sigmoid_entropy],
                 optimizer=[clf_opt, dic_opt],
                 predictions_adv=preds_aug,
                 evaluate=None,
                 args=train_params,
                 rng=rng,
                 var_list=zero_knowledge_gandef_model.get_gan_params())

    # Evaluate the accuracy of the MNIST model on Clean examples
    preds_clean = zero_knowledge_gandef_model(x)
    eval_params = {
        'batch_size': 128,
        'use_dic': False,
        'is_clean': True,
        'reject_threshold': 0.5
    }
    clean_acc = confident_model_eval(sess,
                                     x,
                                     y_soft,
                                     preds_clean,
                                     X_test,
                                     Y_test,
                                     args=eval_params)
    print('Test accuracy on Clean test examples: %0.4f\n' % clean_acc)
    report.adv_train_clean_eval = clean_acc

    # Evaluate the accuracy of the MNIST model on FGSM examples
    fgsm_params = {'eps': 0.6, 'clip_min': -1., 'clip_max': 1.}
    fgsm_att = FastGradientMethod(zero_knowledge_gandef_model, sess=sess)
    fgsm_adv = fgsm_att.generate(x, **fgsm_params)
    preds_fgsm_adv = zero_knowledge_gandef_model(fgsm_adv)
    eval_params = {
        'batch_size': 128,
        'use_dic': False,
        'is_clean': False,
        'reject_threshold': 0.5
    }
    fgsm_acc = confident_model_eval(sess,
                                    x,
                                    y_soft,
                                    preds_fgsm_adv,
                                    X_test,
                                    Y_test,
                                    args=eval_params)
    print('Test accuracy on FGSM test examples: %0.4f\n' % fgsm_acc)
    report.adv_train_adv_eval = fgsm_acc

    # Evaluate the accuracy of the MNIST model on BIM examples
    bim_params = {'eps': 0.6, 'eps_iter': 0.1, 'clip_min': -1., 'clip_max': 1.}
    bim_att = BasicIterativeMethod(zero_knowledge_gandef_model, sess=sess)
    bim_adv = bim_att.generate(x, **bim_params)
    preds_bim_adv = zero_knowledge_gandef_model(bim_adv)
    eval_params = {
        'batch_size': 128,
        'use_dic': False,
        'is_clean': False,
        'reject_threshold': 0.5
    }
    bim_acc = confident_model_eval(sess,
                                   x,
                                   y_soft,
                                   preds_bim_adv,
                                   X_test,
                                   Y_test,
                                   args=eval_params)
    print('Test accuracy on BIM test examples: %0.4f\n' % bim_acc)
    report.adv_train_adv_eval = bim_acc

    # Evaluate the accuracy of the MNIST model on PGD examples
    pgd_params = {
        'eps': 0.6,
        'eps_iter': 0.02,
        'nb_iter': 40,
        'clip_min': -1.,
        'clip_max': 1.,
        'rand_init': True
    }
    pgd_att = MadryEtAl(zero_knowledge_gandef_model, sess=sess)
    pgd_adv = pgd_att.generate(x, **bim_params)
    preds_pgd_adv = zero_knowledge_gandef_model(pgd_adv)
    eval_params = {
        'batch_size': 128,
        'use_dic': False,
        'is_clean': False,
        'reject_threshold': 0.5
    }
    pgd_acc = confident_model_eval(sess,
                                   x,
                                   y_soft,
                                   preds_pgd_adv,
                                   X_test,
                                   Y_test,
                                   args=eval_params)
    print('Test accuracy on PGD test examples: %0.4f\n' % pgd_acc)
    report.adv_train_adv_eval = pgd_acc

    # Save model
    if save:
        model_path = "models/zero_knowledge_gandef"
        vars_to_save = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
                                         scope='model_zero_knowledge_gandef*')
        assert len(vars_to_save) > 0
        saver = tf.train.Saver(var_list=vars_to_save)
        saver.save(sess, model_path)
        print('Model saved\n')
    else:
        print('Model not saved\n')
def cifar10_tutorial(train_start=0,
                     train_end=60000,
                     test_start=0,
                     test_end=10000,
                     nb_epochs=NB_EPOCHS,
                     batch_size=BATCH_SIZE,
                     architecture=ARCHITECTURE,
                     load_model=LOAD_MODEL,
                     ckpt_dir='None',
                     learning_rate=LEARNING_RATE,
                     clean_train=CLEAN_TRAIN,
                     backprop_through_attack=BACKPROP_THROUGH_ATTACK,
                     nb_filters=NB_FILTERS,
                     num_threads=None,
                     label_smoothing=0.):
    """
    CIFAR10 cleverhans tutorial
    :param train_start: index of first training set example
    :param train_end: index of last training set example
    :param test_start: index of first test set example
    :param test_end: index of last test set example
    :param nb_epochs: number of epochs to train model
    :param batch_size: size of training batches
    :param learning_rate: learning rate for training
    :param clean_train: perform normal training on clean examples only
                        before performing adversarial training.
    :param backprop_through_attack: If True, backprop through adversarial
                                    example construction process during
                                    adversarial training.
    :param label_smoothing: float, amount of label smoothing for cross entropy
    :return: an AccuracyReport object
    """

    # Object used to keep track of (and return) key accuracies
    report = AccuracyReport()

    # Set TF random seed to improve reproducibility
    tf.set_random_seed(int(time.time() * 1000) % 2**31)
    np.random.seed(int(time.time() * 1001) % 2**31)

    # Set logging level to see debug information
    set_log_level(logging.DEBUG)

    # Create TF session
    if num_threads:
        config_args = dict(intra_op_parallelism_threads=1)
    else:
        config_args = {}
    sess = tf.Session(config=tf.ConfigProto(**config_args))

    # Get CIFAR10 data
    data = CIFAR10(train_start=train_start,
                   train_end=train_end,
                   test_start=test_start,
                   test_end=test_end)
    dataset_size = data.x_train.shape[0]
    dataset_train = data.to_tensorflow()[0]
    dataset_train = dataset_train.map(
        lambda x, y: (random_shift(random_horizontal_flip(x)), y), 4)
    dataset_train = dataset_train.batch(batch_size)
    dataset_train = dataset_train.prefetch(16)
    x_train, y_train = data.get_set('train')

    pgd_train = None
    if FLAGS.load_pgd_train_samples:
        pgd_path = os.path.expanduser('~/data/advhyp/{}/samples'.format(
            FLAGS.load_pgd_train_samples))
        x_train = np.load(os.path.join(pgd_path, 'train_clean.npy'))
        y_train = np.load(os.path.join(pgd_path, 'train_y.npy'))
        pgd_train = np.load(os.path.join(pgd_path, 'train_pgd.npy'))
        if x_train.shape[1] == 3:
            x_train = x_train.transpose((0, 2, 3, 1))
            pgd_train = pgd_train.transpose((0, 2, 3, 1))
        if len(y_train.shape) == 1:
            y_tmp = np.zeros((len(y_train), np.max(y_train) + 1),
                             y_train.dtype)
            y_tmp[np.arange(len(y_tmp)), y_train] = 1.
            y_train = y_tmp

    x_test, y_test = data.get_set('test')
    pgd_test = None
    if FLAGS.load_pgd_test_samples:
        pgd_path = os.path.expanduser('~/data/advhyp/{}/samples'.format(
            FLAGS.load_pgd_test_samples))
        x_test = np.load(os.path.join(pgd_path, 'test_clean.npy'))
        y_test = np.load(os.path.join(pgd_path, 'test_y.npy'))
        pgd_test = np.load(os.path.join(pgd_path, 'test_pgd.npy'))
        if x_test.shape[1] == 3:
            x_test = x_test.transpose((0, 2, 3, 1))
            pgd_test = pgd_test.transpose((0, 2, 3, 1))
        if len(y_test.shape) == 1:
            y_tmp = np.zeros((len(y_test), np.max(y_test) + 1), y_test.dtype)
            y_tmp[np.arange(len(y_tmp)), y_test] = 1.
            y_test = y_tmp

    train_idcs = np.arange(len(x_train))
    np.random.shuffle(train_idcs)
    x_train, y_train = x_train[train_idcs], y_train[train_idcs]
    if pgd_train is not None:
        pgd_train = pgd_train[train_idcs]
    test_idcs = np.arange(len(x_test))[:FLAGS.test_size]
    np.random.shuffle(test_idcs)
    x_test, y_test = x_test[test_idcs], y_test[test_idcs]
    if pgd_test is not None:
        pgd_test = pgd_test[test_idcs]

    # Use Image Parameters
    img_rows, img_cols, nchannels = x_test.shape[1:4]
    nb_classes = y_test.shape[1]

    # Define input TF placeholder
    x = tf.placeholder(tf.float32, shape=(None, img_rows, img_cols, nchannels))
    y = tf.placeholder(tf.float32, shape=(None, nb_classes))

    # Train an MNIST model
    train_params = {
        'nb_epochs': nb_epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }
    eval_params = {'batch_size': batch_size}
    pgd_params = {
        # ord: ,
        'eps': FLAGS.eps,
        'eps_iter': (FLAGS.eps / 5),
        'nb_iter': 10,
        'clip_min': 0,
        'clip_max': 255
    }
    cw_params = {
        'binary_search_steps': FLAGS.cw_search_steps,
        'max_iterations': FLAGS.cw_steps,  #1000
        'abort_early': True,
        'learning_rate': FLAGS.cw_lr,
        'batch_size': batch_size,
        'confidence': 0,
        'initial_const': FLAGS.cw_c,
        'clip_min': 0,
        'clip_max': 255
    }

    # Madry dosen't divide by 255
    x_train *= 255
    x_test *= 255
    if pgd_train is not None:
        pgd_train *= 255
    if pgd_test is not None:
        pgd_test *= 255

    print('x_train amin={} amax={}'.format(np.amin(x_train), np.amax(x_train)))
    print('x_test amin={} amax={}'.format(np.amin(x_test), np.amax(x_test)))

    print(
        'clip_min : {}, clip_max : {}  >> CHECK WITH WHICH VALUES THE CLASSIFIER WAS PRETRAINED !!! <<'
        .format(pgd_params['clip_min'], pgd_params['clip_max']))

    rng = np.random.RandomState()  # [2017, 8, 30]
    debug_dict = dict() if FLAGS.save_debug_dict else None

    def do_eval(preds,
                x_set,
                y_set,
                report_key,
                is_adv=None,
                predictor=None,
                x_adv=None):
        if predictor is None:
            acc = model_eval(sess, x, y, preds, x_set, y_set, args=eval_params)
        else:
            do_eval(preds, x_set, y_set, report_key, is_adv=is_adv)
            if x_adv is not None:
                x_set_adv, = batch_eval(sess, [x], [x_adv], [x_set],
                                        batch_size=batch_size)
                assert x_set.shape == x_set_adv.shape
                x_set = x_set_adv
            n_batches = math.ceil(x_set.shape[0] / batch_size)
            p_set, p_det = np.concatenate([
                predictor.send(x_set[b * batch_size:(b + 1) * batch_size])
                for b in tqdm.trange(n_batches)
            ]).T
            acc = np.equal(p_set, y_set[:len(p_set)].argmax(-1)).mean()
            # if is_adv:
            # import IPython ; IPython.embed() ; exit(1)
            if FLAGS.save_debug_dict:
                debug_dict['x_set'] = x_set
                debug_dict['y_set'] = y_set
                ddfn = 'logs/debug_dict_{}.pkl'.format(
                    'adv' if is_adv else 'clean')
                if not os.path.exists(ddfn):
                    with open(ddfn, 'wb') as f:
                        pickle.dump(debug_dict, f)
                debug_dict.clear()
        if is_adv is None:
            report_text = None
        elif is_adv:
            report_text = 'adversarial'
        else:
            report_text = 'legitimate'
        if report_text:
            print('Test accuracy on %s examples %s: %0.4f' %
                  (report_text, 'with correction'
                   if predictor is not None else 'without correction', acc))
            if is_adv is not None:
                label = 'test_acc_{}_{}'.format(
                    report_text, 'corrected' if predictor else 'uncorrected')
                swriter.add_scalar(label, acc)
                if predictor is not None:
                    detect = np.equal(p_det, is_adv).mean()
                    label = 'test_det_{}_{}'.format(
                        report_text,
                        'corrected' if predictor else 'uncorrected')
                    print(label, detect)
                    swriter.add_scalar(label, detect)
                    label = 'test_dac_{}_{}'.format(
                        report_text,
                        'corrected' if predictor else 'uncorrected')
                    swriter.add_scalar(
                        label,
                        np.equal(p_set,
                                 y_set[:len(p_set)].argmax(-1))[np.equal(
                                     p_det, is_adv)].mean())

        return acc

    if clean_train:
        if architecture == 'ConvNet':
            model = ModelAllConvolutional('model1',
                                          nb_classes,
                                          nb_filters,
                                          input_shape=[32, 32, 3])
        elif architecture == 'ResNet':
            model = ResNet(scope='ResNet')
        else:
            raise Exception('Specify valid classifier architecture!')

        preds = model.get_logits(x)
        loss = CrossEntropy(model, smoothing=label_smoothing)

        if load_model:
            model_name = 'naturally_trained'
            if FLAGS.load_adv_trained:
                model_name = 'adv_trained'
            if ckpt_dir is not 'None':
                ckpt = tf.train.get_checkpoint_state(
                    os.path.join(os.path.expanduser(ckpt_dir), model_name))
            else:
                ckpt = tf.train.get_checkpoint_state('./models/' + model_name)
            ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path

            saver = tf.train.Saver(var_list=dict(
                (v.name.split('/', 1)[1].split(':')[0], v)
                for v in tf.global_variables()))
            saver.restore(sess, ckpt_path)
            print('\nMODEL SUCCESSFULLY LOADED from : {}'.format(ckpt_path))

            initialize_uninitialized_global_variables(sess)

        else:

            def evaluate():
                do_eval(preds, x_test, y_test, 'clean_train_clean_eval', False)

            train(sess,
                  loss,
                  None,
                  None,
                  dataset_train=dataset_train,
                  dataset_size=dataset_size,
                  evaluate=evaluate,
                  args=train_params,
                  rng=rng,
                  var_list=model.get_params())

        logits_op = preds.op
        while logits_op.type != 'MatMul':
            logits_op = logits_op.inputs[0].op
        latent_x_tensor, weights = logits_op.inputs
        logits_tensor = preds

        nb_classes = weights.shape[-1].value

        if not FLAGS.save_pgd_samples:
            noise_eps = FLAGS.noise_eps.split(',')
            if FLAGS.noise_eps_detect is None:
                FLAGS.noise_eps_detect = FLAGS.noise_eps
            noise_eps_detect = FLAGS.noise_eps_detect.split(',')
            if pgd_train is not None:
                pgd_train = pgd_train[:FLAGS.n_collect]
            if not FLAGS.passthrough:
                predictor = tf_robustify.collect_statistics(
                    x_train[:FLAGS.n_collect],
                    y_train[:FLAGS.n_collect],
                    x,
                    sess,
                    logits_tensor=logits_tensor,
                    latent_x_tensor=latent_x_tensor,
                    weights=weights,
                    nb_classes=nb_classes,
                    p_ratio_cutoff=FLAGS.p_ratio_cutoff,
                    noise_eps=noise_eps,
                    noise_eps_detect=noise_eps_detect,
                    pgd_eps=pgd_params['eps'],
                    pgd_lr=pgd_params['eps_iter'] / pgd_params['eps'],
                    pgd_iters=pgd_params['nb_iter'],
                    save_alignments_dir='logs/stats'
                    if FLAGS.save_alignments else None,
                    load_alignments_dir=os.path.expanduser(
                        '~/data/advhyp/madry/stats')
                    if FLAGS.load_alignments else None,
                    clip_min=pgd_params['clip_min'],
                    clip_max=pgd_params['clip_max'],
                    batch_size=batch_size,
                    num_noise_samples=FLAGS.num_noise_samples,
                    debug_dict=debug_dict,
                    debug=FLAGS.debug,
                    targeted=False,
                    pgd_train=pgd_train,
                    fit_classifier=FLAGS.fit_classifier,
                    clip_alignments=FLAGS.clip_alignments,
                    just_detect=FLAGS.just_detect)
            else:

                def _predictor():
                    _x = yield
                    while (_x is not None):
                        _y = sess.run(preds, {x: _x}).argmax(-1)
                        _x = yield np.stack((_y, np.zeros_like(_y)), -1)

                predictor = _predictor()
            next(predictor)
            if FLAGS.save_alignments:
                exit(0)

            # Evaluate the accuracy of the model on clean examples
            acc_clean = do_eval(preds,
                                x_test,
                                y_test,
                                'clean_train_clean_eval',
                                False,
                                predictor=predictor)

        # Initialize the PGD attack object and graph
        if FLAGS.attack == 'pgd':
            pgd = MadryEtAl(model, sess=sess)
            adv_x = pgd.generate(x, **pgd_params)
        elif FLAGS.attack == 'cw':
            cw = CarliniWagnerL2(model, sess=sess)
            adv_x = cw.generate(x, **cw_params)
        elif FLAGS.attack == 'mean':
            pgd = MadryEtAl(model, sess=sess)
            mean_eps = FLAGS.mean_eps * FLAGS.eps

            def _attack_mean(x):
                x_many = tf.tile(x[None], (FLAGS.mean_samples, 1, 1, 1))
                x_noisy = x_many + tf.random_uniform(x_many.shape, -mean_eps,
                                                     mean_eps)
                x_noisy = tf.clip_by_value(x_noisy, 0, 255)
                x_pgd = pgd.generate(x_noisy, **pgd_params)
                x_clip = tf.minimum(x_pgd, x_many + FLAGS.eps)
                x_clip = tf.maximum(x_clip, x_many - FLAGS.eps)
                x_clip = tf.clip_by_value(x_clip, 0, 255)
                return x_clip

            adv_x = tf.map_fn(_attack_mean, x)
            adv_x = tf.reduce_mean(adv_x, 1)

        preds_adv = model.get_logits(adv_x)

        if FLAGS.save_pgd_samples:
            for ds, y, name in ((x_train, y_train, 'train'), (x_test, y_test,
                                                              'test')):
                train_batches = math.ceil(len(ds) / FLAGS.batch_size)
                train_pgd = np.concatenate([
                    sess.run(adv_x, {
                        x:
                        ds[b * FLAGS.batch_size:(b + 1) * FLAGS.batch_size]
                    }) for b in tqdm.trange(train_batches)
                ])
                np.save('logs/{}_clean.npy'.format(name), ds / 255.)
                np.save('logs/{}_y.npy'.format(name), y)
                train_pgd /= 255.
                np.save('logs/{}_pgd.npy'.format(name), train_pgd)
            exit(0)

        # Evaluate the accuracy of the model on adversarial examples
        if not FLAGS.load_pgd_test_samples:
            acc_pgd = do_eval(preds_adv,
                              x_test,
                              y_test,
                              'clean_train_adv_eval',
                              True,
                              predictor=predictor,
                              x_adv=adv_x)
        else:
            acc_pgd = do_eval(preds,
                              pgd_test,
                              y_test,
                              'clean_train_adv_eval',
                              True,
                              predictor=predictor)
        swriter.add_scalar('test_acc_mean', (acc_clean + acc_pgd) / 2., 0)

        print('Repeating the process, using adversarial training')

    exit(0)
    # Create a new model and train it to be robust to MadryEtAl
    if architecture == 'ConvNet':
        model2 = ModelAllConvolutional('model2',
                                       nb_classes,
                                       nb_filters,
                                       input_shape=[32, 32, 3])
    elif architecture == 'ResNet':
        model = ResNet()
    else:
        raise Exception('Specify valid classifier architecture!')

    pgd2 = MadryEtAl(model2, sess=sess)

    def attack(x):
        return pgd2.generate(x, **pgd_params)

    loss2 = CrossEntropy(model2, smoothing=label_smoothing, attack=attack)
    preds2 = model2.get_logits(x)
    adv_x2 = attack(x)

    if not backprop_through_attack:
        # For some attacks, enabling this flag increases the cost of
        # training, but gives the defender the ability to anticipate how
        # the atacker will change their strategy in response to updates to
        # the defender's parameters.
        adv_x2 = tf.stop_gradient(adv_x2)
    preds2_adv = model2.get_logits(adv_x2)

    if load_model:
        if ckpt_dir is not 'None':
            ckpt = tf.train.get_checkpoint_state(
                os.path.join(os.path.expanduser(ckpt_dir), 'adv_trained'))
        else:
            ckpt = tf.train.get_checkpoint_state('./models/adv_trained')
        ckpt_path = False if ckpt is None else ckpt.model_checkpoint_path

        assert ckpt_path and tf_model_load(
            sess, file_path=ckpt_path), '\nMODEL LOADING FAILED'
        print('\nMODEL SUCCESSFULLY LOADED from : {}'.format(ckpt_path))

        initialize_uninitialized_global_variables(sess)

    else:

        def evaluate2():
            # Accuracy of adversarially trained model on legitimate test inputs
            do_eval(preds2, x_test, y_test, 'adv_train_clean_eval', False)
            # Accuracy of the adversarially trained model on adversarial
            # examples
            do_eval(preds2_adv, x_test, y_test, 'adv_train_adv_eval', True)

        # Perform and evaluate adversarial training
        train(sess,
              loss2,
              None,
              None,
              dataset_train=dataset_train,
              dataset_size=dataset_size,
              evaluate=evaluate2,
              args=train_params,
              rng=rng,
              var_list=model2.get_params())

    # Evaluate model
    do_eval(preds2, x_test, y_test, 'adv_train_clean_eval', False)
    do_eval(preds2_adv, x_test, y_test, 'adv_train_adv_eval', True)

    return report
示例#31
0
def main(type="Resnet", dataset="CIFAR10", attack_type="FGM"):

    size = 256
    eval_params = {'batch_size': 128}

    ############################################# Prepare the Data #####################################################

    if dataset == 'CIFAR10':
        (_, _), (x_test, y_test) = prepare_CIFAR10()
        num_classes = 10
        input_dim = 32
    elif dataset == 'CIFAR100':
        (_, _), (x_test, y_test) = prepare_CIFAR100()
        num_classes = 100
        input_dim = 32
    else:
        (_, _), (x_test, y_test) = prepare_SVHN("./Data/")
        num_classes = 10
        input_dim = 32

    x_test = x_test / 255.
    y_test = keras.utils.to_categorical(y_test, num_classes)

    ############################################# Prepare the Data #####################################################


    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:

        # prepare the placeholders
        x = tf.placeholder(tf.float32, [None, input_dim, input_dim, 3])
        y = tf.placeholder(tf.float32, [None, num_classes])

        input_output = []
        def modelBuilder(x, num_classes, dataset, type, sess, input_output):

            if len(input_output) == 0:

                reuse = False

                # Model/Graph
                if type == 'End2End':
                    _, tf_model = \
                        prepare_GBP_End2End(num_classes,
                                            inputT=x, sess=sess,
                                            checkpoint_dir='./{}_{}/'.format(dataset, type), reuse=reuse)
                else:

                    _, tf_model = \
                        prepare_Resnet(num_classes,
                                       inputT=x, sess=sess,
                                       checkpoint_dir='./{}_{}/'.format(dataset, type), reuse=reuse)

                input_output.append(x)
                input_output.append(tf_model.logits)

            else:

                reuse = True

                # Model/Graph
                if type == 'End2End':
                    _, tf_model = \
                        prepare_GBP_End2End(num_classes, inputT=x, reuse=reuse)
                else:
                    _, tf_model = \
                        prepare_Resnet(num_classes, inputT=x, reuse=reuse)

                input_output.append(x)
                input_output.append(tf_model.logits)


            return tf_model.logits

        # create an attackable model for the cleverhans
        model = CallableModelWrapper(lambda placeholder: modelBuilder(placeholder, num_classes, dataset, type, sess, input_output), 'logits')

        # TODO: check the configurations
        if attack_type == "FGM": # pass
            attack = FastGradientMethod(model, back='tf', sess=sess)
            params = {
                'eps' : 0.06,
                'clip_min': 0.,
                'clip_max': 1.
            }
        elif attack_type == "CWL2": # pass
            attack = CarliniWagnerL2(model, back='tf', sess=sess)
            params = {
                'confidence': 0.9,
                'batch_size': 128,
                'learning_rate': 0.005,
            }
        elif attack_type == "DF": # pass
            attack = DeepFool(model, back='tf', sess=sess)
            params = {
            }
        elif attack_type == "ENM": # configurations checked, quickly tested
            attack = ElasticNetMethod(model, back='tf', sess=sess)
            params = {
                'confidence': 0.9,
                'batch_size': 128,
                'learning_rate': 0.005,
            }
        elif attack_type == "FFA": # configuration checked
            attack = FastFeatureAdversaries(model, back='tf', sess=sess)
            params = {
                'eps': 0.06,
                'eps_iter': 0.005,
                'clip_min': 0.,
                'clip_max': 1.
            }
        elif attack_type == "LBFGS":
            attack = LBFGS(model, back='tf', sess=sess)
            params = {
                'eps': 0.06,
                'clip_min': 0.,
                'clip_max': 1.
            }
        elif attack_type == "MEA":
            attack = MadryEtAl(model, back='tf', sess=sess)
            params = {
                'eps': 0.06,
                'clip_min': 0.,
                'clip_max': 1.
            }
        elif attack_type == "MIM":
            attack = MomentumIterativeMethod(model, back='tf', sess=sess)
            params = {
                'eps': 0.06,
                'clip_min': 0.,
                'clip_max': 1.
            }
        elif attack_type == "SMM":
            attack = SaliencyMapMethod(model, back='tf', sess=sess)
            params = {
                'eps': 0.06,
                'clip_min': 0.,
                'clip_max': 1.
            }
        elif attack_type == "SPSA":
            attack = SPSA(model, back='tf', sess=sess)
            params = {
                'eps': 0.06,
                'clip_min': 0.,
                'clip_max': 1.
            }
        elif attack_type == "VATM":
            attack = vatm(model, back='tf', sess=sess)
            params = {
                'eps': 0.06,
                'clip_min': 0.,
                'clip_max': 1.
            }
        elif attack_type == "VAM":
            attack = VirtualAdversarialMethod(model, back='tf', sess=sess)
            params = {
                'eps': 0.06,
                'clip_min': 0.,
                'clip_max': 1.
            }
        else:
            raise Exception("I don't recognize {} this attack type. I will use FGM instead.".format(attack_type))

        # tf operation
        adv_x = attack.generate(x, **params)

        # generate the adversarial examples
        adv_vals = sess.run(adv_x, feed_dict={x: x_test[:size]})

        # notice that "adv_vals" may contain NANs because of the failure of the attack
        # also the input may not be perturbed at all because of the failure of the attack
        to_delete = []
        for idx, adv in enumerate(adv_vals):
            # for nan
            if np.isnan(adv).any():
                to_delete.append(idx)
            # for no perturbation
            if np.array_equiv(adv, x_test[idx]):
                to_delete.append(idx)

        # cleanings
        adv_vals_cleaned = np.delete(adv_vals, to_delete, axis=0)
        ori_cleaned = np.delete(x_test[:size], to_delete, axis=0)
        y_cleaned = np.delete(y_test[:size], to_delete, axis=0)

        if len(adv_vals_cleaned) == 0:
            print("No adversarial example is generated!")
            return

        print("{} out of {} adversarial examples are generated.".format(len(adv_vals_cleaned), size))

        print("The average L_inf distortion is {}".format(
            np.mean([np.max(np.abs(adv - ori_cleaned[idx])) for idx, adv in enumerate(adv_vals_cleaned)])))

        # TODO: visualize the adv_vals

        accuracy = model_eval(sess, input_output[0], y, tf.nn.softmax(input_output[1]), x_test[:size], y_test[:size],
                              args=eval_params)
        print('Test accuracy on normal examples: %0.4f' % accuracy)

        accuracy = model_eval(sess, input_output[0], y, tf.nn.softmax(input_output[1]), adv_vals_cleaned, y_cleaned,
                              args=eval_params)
        print('Test accuracy on adversarial examples: %0.4f' % accuracy)
示例#32
0
class TestMadryEtAl(CleverHansTest):
    def setUp(self):
        super(TestMadryEtAl, self).setUp()

        self.sess = tf.Session()
        self.model = SimpleModel()
        self.attack = MadryEtAl(self.model, sess=self.sess)

    def test_attack_strength(self):
        """
        If clipping is not done at each iteration (not using clip_min and
        clip_max), this attack fails by
        np.mean(orig_labels == new_labels) == .5
        """
        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        x_adv = self.attack.generate_np(x_val, eps=1.0, eps_iter=0.05,
                                        clip_min=0.5, clip_max=0.7,
                                        nb_iter=5)

        orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1)
        new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)
        self.assertTrue(np.mean(orig_labs == new_labs) < 0.1)

    def test_clip_eta(self):
        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        x_adv = self.attack.generate_np(x_val, eps=1.0, eps_iter=0.1,
                                        nb_iter=5)

        delta = np.max(np.abs(x_adv - x_val), axis=1)
        self.assertTrue(np.all(delta <= 1.))

    def test_generate_np_gives_clipped_adversarial_examples(self):
        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        x_adv = self.attack.generate_np(x_val, eps=1.0, eps_iter=0.1,
                                        nb_iter=5,
                                        clip_min=-0.2, clip_max=0.3)

        self.assertTrue(-0.201 < np.min(x_adv))
        self.assertTrue(np.max(x_adv) < .301)

    def test_multiple_initial_random_step(self):
        """
        This test generates multiple adversarial examples until an adversarial
        example is generated with a different label compared to the original
        label. This is the procedure suggested in Madry et al. (2017).

        This test will fail if an initial random step is not taken (error>0.5).
        """
        x_val = np.random.rand(100, 2)
        x_val = np.array(x_val, dtype=np.float32)

        orig_labs = np.argmax(self.sess.run(self.model(x_val)), axis=1)
        new_labs_multi = orig_labs.copy()

        # Generate multiple adversarial examples
        for i in range(10):
            x_adv = self.attack.generate_np(x_val, eps=.5, eps_iter=0.05,
                                            clip_min=0.5, clip_max=0.7,
                                            nb_iter=2)
            new_labs = np.argmax(self.sess.run(self.model(x_adv)), axis=1)

            # Examples for which we have not found adversarial examples
            I = (orig_labs == new_labs_multi)
            new_labs_multi[I] = new_labs[I]

        self.assertTrue(np.mean(orig_labs == new_labs_multi) < 0.1)