Exemplo n.º 1
0
    def __init__(self, model, prob=0.0, actionProbThr=1.0):
        super(AttackModel, self).__init__()
        self.sess = model.sess
        self.layer_names = ['inputs', 'logits', 'probs']
        self.layers = [model.inputs, model.logits, model.probs]
        self.input_shape = tuple(model.inputs.shape.as_list())
        self.nb_classes = model.logits.get_shape()[1].value

        self.attackModels = {}
        self.attackModels["lbfgs"] = atts.LBFGS(self, "tf", self.sess)
        self.attackModels["fgsm"] = atts.FastGradientMethod(
            self, "tf", self.sess)
        self.attackModels["basicIt"] = atts.BasicIterativeMethod(
            self, "tf", self.sess)
        self.attackModels["pgd"] = atts.MadryEtAl(self, "tf", self.sess)
        self.attackModels["momenIt"] = atts.MomentumIterativeMethod(
            self, "tf", self.sess)
        self.attackModels["jsma"] = atts.SaliencyMapMethod(
            self, "tf", self.sess)
        self.attackModels["cwl2"] = atts.CarliniWagnerL2(self, "tf", self.sess)
        self.attackModels["ead"] = atts.ElasticNetMethod(self, "tf", self.sess)
        self.attackModels["deepfool"] = atts.DeepFool(self, "tf", self.sess)
        self.attackModels["spsa"] = atts.SPSA(self, "tf", self.sess)
        self.attackModels["featureAdvs"] = atts.FastFeatureAdversaries(
            self, "tf", self.sess)

        self.availableAttacks = list(self.attackModels.keys())
        self.availableAttacks.append("ofsm")
        self.ofsm = OFSM()
        self.availableAttacks.append("gauss")
        self.gaussEps = 0.0
        self.attack = None
        self.lastAttack = None
        self.attackProb = prob
        self.actionProbThr = actionProbThr
Exemplo n.º 2
0
def eval_cifar():
    """Evaluate an adversarially trained model."""
    attack_fn_name = FLAGS.attack_fn_name
    total_batches = FLAGS.num_batches
    batch_size = FLAGS.batch_size

    # Note that a `classifier` is a function mapping [0,1]-scaled image Tensors
    # to a logit Tensor. In particular, it includes *both* the preprocessing
    # function, and the neural network.
    classifier = make_classifier()
    cleverhans_model = cleverhans.model.CallableModelWrapper(
        classifier, 'logits')

    _, data_test = tf.keras.datasets.cifar10.load_data()
    data = _build_dataset(data_test, batch_size=batch_size, shuffle=False)

    # Necessary for backwards-compatibility
    # Earlier versions of TF don't have a registered gradient for the AddV2 op
    tf.RegisterGradient('AddV2')(math_grad._AddGrad)  # pylint: disable=protected-access

    # Generate adversarial images.
    if attack_fn_name == 'fgsm':
        attack = attacks.MadryEtAl(cleverhans_model)
        num_cifar_classes = 10
        adv_x = attack.generate(data.image,
                                eps=FLAGS.epsilon_attack,
                                eps_iter=FLAGS.learning_rate,
                                nb_iter=FLAGS.num_steps,
                                y=tf.one_hot(data.label,
                                             depth=num_cifar_classes))
    elif attack_fn_name == 'none':
        adv_x = data.image

    logits = classifier(adv_x)
    probs = tf.nn.softmax(logits)
    adv_acc = _top_1_accuracy(logits, data.label)

    with tf.train.SingularMonitoredSession() as sess:
        total_acc = 0.
        for _ in range(FLAGS.skip_batches):
            sess.run(data.image)
        for _ in range(total_batches):
            _, _, adv_acc_val = sess.run([probs, data.label, adv_acc])
            total_acc += adv_acc_val
            print('Batch accuracy: {}'.format(adv_acc_val))
        print('Total accuracy against {}: {}'.format(
            FLAGS.attack_fn_name, total_acc / total_batches))
Exemplo n.º 3
0
def gen_adv(wrap_model,
            model_input,
            attack_method,
            eps,
            eta,
            def_iter,
            clip_min=0.,
            clip_max=1.):
    """
    Generate adversarial examples using keras wrapper 
    """

    if attack_method == 'MadryEtAl':
        att = attacks.MadryEtAl(wrap_model)
        att_params = {
            'eps': eps,
            'eps_iter': eta,
            'clip_min': clip_min,
            'clip_max': clip_max,
            'nb_iter': def_iter
        }
    elif attack_method == 'MomentumIterativeMethod':
        att = attacks.MomentumIterativeMethod(wrap_model)
        att_params = {
            'eps': eps,
            'eps_iter': eta,
            'clip_min': clip_min,
            'clip_max': clip_max,
            'nb_iter': def_iter
        }
    elif attack_method == 'FastGradientMethod':
        att = attacks.FastGradientMethod(wrap_model)
        att_params = {'eps': eps, 'clip_min': clip_min, 'clip_max': clip_max}

    print('attack_method: {}'.format(attack_method))
    for k in att_params.keys():
        print('{}:{}'.format(k, att_params[k]))
    adv_x = tf.stop_gradient(att.generate(model_input, **att_params))

    return adv_x
                                                                +'_advratio'+str(FLAGS.adv_ratio)+BN_name+name_random \
                                                                +use_ball_
else:
    print('Using softmax loss')
    model = original_model
    train_loss = keras.losses.categorical_crossentropy
    filepath_dir = 'advtrained_models/'+FLAGS.dataset+'/resnet32v'+str(version)+'_'+FLAGS.optimizer \
                                                            +'_lr'+str(FLAGS.lr) \
                                                            +'_batchsize'+str(FLAGS.batch_size)+'_'+is_target+FLAGS.attack_method+'_advratio'+str(FLAGS.adv_ratio)+BN_name

wrap_ensemble = KerasModelWrapper(model, num_class=num_class)

eps = 8. / 256.
if FLAGS.attack_method == 'MadryEtAl':
    print('apply ' + is_target + 'PGD' + ' for advtrain')
    att = attacks.MadryEtAl(wrap_ensemble)
    att_params = {
        'eps': eps,
        #'eps_iter': 3.*eps/10.,
        'eps_iter': 2. / 256.,
        'clip_min': clip_min,
        'clip_max': clip_max,
        'nb_iter': 10,
        'y_target': y_target
    }
elif FLAGS.attack_method == 'MomentumIterativeMethod':
    print('apply ' + is_target + 'MIM' + ' for advtrain')
    att = attacks.MomentumIterativeMethod(wrap_ensemble)
    att_params = {
        'eps': eps,
        #'eps_iter': 3.*eps/10.,
                       outputs=model_output_baseline)
model_ensemble_baseline = keras.layers.Average()(model_out_baseline)
model_ensemble_baseline = Model(input=model_input_baseline,
                                output=model_ensemble_baseline)

#Get individual models
wrap_ensemble = KerasModelWrapper(model_ensemble)
wrap_ensemble_baseline = KerasModelWrapper(model_ensemble_baseline)

#Load model
model.load_weights(filepath)
model_baseline.load_weights(filepath_baseline)

# Initialize the attack method
if FLAGS.attack_method == 'MadryEtAl':
    att = attacks.MadryEtAl(wrap_ensemble)
    att_baseline = attacks.MadryEtAl(wrap_ensemble_baseline)
elif FLAGS.attack_method == 'FastGradientMethod':
    att = attacks.FastGradientMethod(wrap_ensemble)
    att_baseline = attacks.FastGradientMethod(wrap_ensemble_baseline)
elif FLAGS.attack_method == 'MomentumIterativeMethod':
    att = attacks.MomentumIterativeMethod(wrap_ensemble)
    att_baseline = attacks.MomentumIterativeMethod(wrap_ensemble_baseline)
elif FLAGS.attack_method == 'BasicIterativeMethod':
    att = attacks.BasicIterativeMethod(wrap_ensemble)
    att_baseline = attacks.BasicIterativeMethod(wrap_ensemble_baseline)

# Consider the attack to be constant
eval_par = {'batch_size': 100}
eps_ = FLAGS.eps
print('eps is %.3f' % eps_)