def __init__(self, model, prob=0.0, actionProbThr=1.0): super(AttackModel, self).__init__() self.sess = model.sess self.layer_names = ['inputs', 'logits', 'probs'] self.layers = [model.inputs, model.logits, model.probs] self.input_shape = tuple(model.inputs.shape.as_list()) self.nb_classes = model.logits.get_shape()[1].value self.attackModels = {} self.attackModels["lbfgs"] = atts.LBFGS(self, "tf", self.sess) self.attackModels["fgsm"] = atts.FastGradientMethod( self, "tf", self.sess) self.attackModels["basicIt"] = atts.BasicIterativeMethod( self, "tf", self.sess) self.attackModels["pgd"] = atts.MadryEtAl(self, "tf", self.sess) self.attackModels["momenIt"] = atts.MomentumIterativeMethod( self, "tf", self.sess) self.attackModels["jsma"] = atts.SaliencyMapMethod( self, "tf", self.sess) self.attackModels["cwl2"] = atts.CarliniWagnerL2(self, "tf", self.sess) self.attackModels["ead"] = atts.ElasticNetMethod(self, "tf", self.sess) self.attackModels["deepfool"] = atts.DeepFool(self, "tf", self.sess) self.attackModels["spsa"] = atts.SPSA(self, "tf", self.sess) self.attackModels["featureAdvs"] = atts.FastFeatureAdversaries( self, "tf", self.sess) self.availableAttacks = list(self.attackModels.keys()) self.availableAttacks.append("ofsm") self.ofsm = OFSM() self.availableAttacks.append("gauss") self.gaussEps = 0.0 self.attack = None self.lastAttack = None self.attackProb = prob self.actionProbThr = actionProbThr
def eval_cifar(): """Evaluate an adversarially trained model.""" attack_fn_name = FLAGS.attack_fn_name total_batches = FLAGS.num_batches batch_size = FLAGS.batch_size # Note that a `classifier` is a function mapping [0,1]-scaled image Tensors # to a logit Tensor. In particular, it includes *both* the preprocessing # function, and the neural network. classifier = make_classifier() cleverhans_model = cleverhans.model.CallableModelWrapper( classifier, 'logits') _, data_test = tf.keras.datasets.cifar10.load_data() data = _build_dataset(data_test, batch_size=batch_size, shuffle=False) # Necessary for backwards-compatibility # Earlier versions of TF don't have a registered gradient for the AddV2 op tf.RegisterGradient('AddV2')(math_grad._AddGrad) # pylint: disable=protected-access # Generate adversarial images. if attack_fn_name == 'fgsm': attack = attacks.MadryEtAl(cleverhans_model) num_cifar_classes = 10 adv_x = attack.generate(data.image, eps=FLAGS.epsilon_attack, eps_iter=FLAGS.learning_rate, nb_iter=FLAGS.num_steps, y=tf.one_hot(data.label, depth=num_cifar_classes)) elif attack_fn_name == 'none': adv_x = data.image logits = classifier(adv_x) probs = tf.nn.softmax(logits) adv_acc = _top_1_accuracy(logits, data.label) with tf.train.SingularMonitoredSession() as sess: total_acc = 0. for _ in range(FLAGS.skip_batches): sess.run(data.image) for _ in range(total_batches): _, _, adv_acc_val = sess.run([probs, data.label, adv_acc]) total_acc += adv_acc_val print('Batch accuracy: {}'.format(adv_acc_val)) print('Total accuracy against {}: {}'.format( FLAGS.attack_fn_name, total_acc / total_batches))
def gen_adv(wrap_model, model_input, attack_method, eps, eta, def_iter, clip_min=0., clip_max=1.): """ Generate adversarial examples using keras wrapper """ if attack_method == 'MadryEtAl': att = attacks.MadryEtAl(wrap_model) att_params = { 'eps': eps, 'eps_iter': eta, 'clip_min': clip_min, 'clip_max': clip_max, 'nb_iter': def_iter } elif attack_method == 'MomentumIterativeMethod': att = attacks.MomentumIterativeMethod(wrap_model) att_params = { 'eps': eps, 'eps_iter': eta, 'clip_min': clip_min, 'clip_max': clip_max, 'nb_iter': def_iter } elif attack_method == 'FastGradientMethod': att = attacks.FastGradientMethod(wrap_model) att_params = {'eps': eps, 'clip_min': clip_min, 'clip_max': clip_max} print('attack_method: {}'.format(attack_method)) for k in att_params.keys(): print('{}:{}'.format(k, att_params[k])) adv_x = tf.stop_gradient(att.generate(model_input, **att_params)) return adv_x
+'_advratio'+str(FLAGS.adv_ratio)+BN_name+name_random \ +use_ball_ else: print('Using softmax loss') model = original_model train_loss = keras.losses.categorical_crossentropy filepath_dir = 'advtrained_models/'+FLAGS.dataset+'/resnet32v'+str(version)+'_'+FLAGS.optimizer \ +'_lr'+str(FLAGS.lr) \ +'_batchsize'+str(FLAGS.batch_size)+'_'+is_target+FLAGS.attack_method+'_advratio'+str(FLAGS.adv_ratio)+BN_name wrap_ensemble = KerasModelWrapper(model, num_class=num_class) eps = 8. / 256. if FLAGS.attack_method == 'MadryEtAl': print('apply ' + is_target + 'PGD' + ' for advtrain') att = attacks.MadryEtAl(wrap_ensemble) att_params = { 'eps': eps, #'eps_iter': 3.*eps/10., 'eps_iter': 2. / 256., 'clip_min': clip_min, 'clip_max': clip_max, 'nb_iter': 10, 'y_target': y_target } elif FLAGS.attack_method == 'MomentumIterativeMethod': print('apply ' + is_target + 'MIM' + ' for advtrain') att = attacks.MomentumIterativeMethod(wrap_ensemble) att_params = { 'eps': eps, #'eps_iter': 3.*eps/10.,
outputs=model_output_baseline) model_ensemble_baseline = keras.layers.Average()(model_out_baseline) model_ensemble_baseline = Model(input=model_input_baseline, output=model_ensemble_baseline) #Get individual models wrap_ensemble = KerasModelWrapper(model_ensemble) wrap_ensemble_baseline = KerasModelWrapper(model_ensemble_baseline) #Load model model.load_weights(filepath) model_baseline.load_weights(filepath_baseline) # Initialize the attack method if FLAGS.attack_method == 'MadryEtAl': att = attacks.MadryEtAl(wrap_ensemble) att_baseline = attacks.MadryEtAl(wrap_ensemble_baseline) elif FLAGS.attack_method == 'FastGradientMethod': att = attacks.FastGradientMethod(wrap_ensemble) att_baseline = attacks.FastGradientMethod(wrap_ensemble_baseline) elif FLAGS.attack_method == 'MomentumIterativeMethod': att = attacks.MomentumIterativeMethod(wrap_ensemble) att_baseline = attacks.MomentumIterativeMethod(wrap_ensemble_baseline) elif FLAGS.attack_method == 'BasicIterativeMethod': att = attacks.BasicIterativeMethod(wrap_ensemble) att_baseline = attacks.BasicIterativeMethod(wrap_ensemble_baseline) # Consider the attack to be constant eval_par = {'batch_size': 100} eps_ = FLAGS.eps print('eps is %.3f' % eps_)