Пример #1
0
    def __init__(self,
                 handle,
                 dataset,
                 train_op,
                 session,
                 epoch_step,
                 batch_step,
                 summary_writer,
                 train_summary_op,
                 img_summary_op,
                 optimizer,
                 GPU_collections,
                 batch_size_placeholder,
                 pretrained=False,
                 adversarial_model=None,
                 adversarial_attacks=None,
                 adversarial_criterion=Misclassification(),
                 saver_path="model.ckpt",
                 num_adversarial_batches=4,
                 batch_size=32,
                 num_epochs=1000,
                 train_summary_period=1000,
                 val_summary_period=1000,
                 adv_summary_period=1000):

        self.session = session

        self.saver_path = saver_path

        self.epoch = 0
        self.batch_i = 0
        self.handle = handle
        self.dataset = dataset
        self.train_op = train_op
        self.epoch_step = epoch_step
        self.epoch_step_increment = self.epoch_step.assign_add(1)
        self.batch_step = batch_step
        self.batch_placeholder = tf.placeholder(tf.int32, (), 'b_ph')
        self.batch_step_assign = tf.assign(self.batch_step,
                                           self.batch_placeholder)
        self.num_epochs = num_epochs
        self.batch_size = batch_size

        self.optimizer = optimizer
        self.GPU_collections = GPU_collections
        self.batch_size_placeholder = batch_size_placeholder

        # summary ops
        self.train_summary_op = train_summary_op
        self.img_summary_op = img_summary_op
        self.train_summary_period = train_summary_period
        self.val_summary_period = val_summary_period
        self.adv_summary_period = adv_summary_period
        self.summary_writer = summary_writer

        # validation
        self.val_top_one_mean = tf.placeholder(tf.float32,
                                               name='val_top_one_mean')
        self.val_top_five_mean = tf.placeholder(tf.float32,
                                                name='val_top_five_mean')
        val_summaries = []
        val_summaries.append(
            tf.summary.scalar('top_1_accuracy_validation',
                              self.val_top_one_mean))
        val_summaries.append(
            tf.summary.scalar('top_5_accuracy_validation',
                              self.val_top_five_mean))
        self.val_summary_op = tf.summary.merge(val_summaries,
                                               name='val_summaries_op')

        # Adversarial attacks
        self.num_adversarial_batches = num_adversarial_batches
        self.adversarial_criterion = adversarial_criterion

        self.adv_result = tf.placeholder(tf.float32, name='adv_results')
        self.adversarial_attacks = adversarial_attacks
        self.adversarial_model = adversarial_model

        default_distances = {
            'GradientAttack': MSE,
            'FGSM': MSE,
            'LinfinityBasicIterativeAttack': Linfinity,
            'L2BasicIterativeAttack': MSE,
            'LinfinityBasicIterativeAttack': Linfinity,
            'ProjectedGradientDescentAttack': Linfinity,
            'DeepFoolAttack': MSE,
            'DeepFoolLinfinityAttack': Linfinity
        }

        self.attacks = dict()
        self.distances = dict()  # add support for custom distances
        self.adv_summaries = dict()

        for attack in self.adversarial_attacks:
            self.attacks[attack] = getattr(fb.attacks, attack)()
            if attack in default_distances.keys():
                self.distances[attack] = default_distances[attack]
            else:
                self.distances[attack] = MSE

            key = attack + '_median_dist'

            self.adv_summaries[attack] = tf.summary.scalar(
                attack + '_median_dist', self.adv_result)

        devices = device_lib.list_local_devices()
        GPU_devices = [dev.name for dev in devices if dev.device_type == 'GPU']
        self.num_GPUs = len(GPU_devices)

        self.pretrained = pretrained

        if self.dataset.train_handle is None:
            self.dataset.get_train_handle(self.session)

        self.saver = tf.train.Saver(tf.global_variables())
Пример #2
0
def generate_examples(model, config, pretrained_config, output_root):
    adv_example_filepath = os.path.join(output_root, 'examples')
    adv_targets_filepath = os.path.join(output_root, 'targets')

    # Set up adversarial attack.
    adv_model = PyTorchModel(model, (0, 1),
                             pretrained_config.data.class_count,
                             cuda=config.cuda)
    criterion = Misclassification()
    attack = getattr(foolbox.attacks, config.name)(adv_model, criterion)

    # Get data.
    pretrained_config.cuda = config.cuda
    pretrained_config.optim.batch_size = config.data.batch_size
    data = load_data(pretrained_config)
    # print('Test Accuracy:{}'.format(loader_accuracy(model, data['test'])))

    n_examples = config['num_examples']
    n_batches = int(
        math.ceil((n_examples * 1.0) / pretrained_config.optim.batch_size))

    # Save the results of the computations in the following variable.
    adv_ex = torch.Tensor()
    adv_targets = torch.LongTensor()
    adv_mse = torch.Tensor()
    adv_inf = torch.Tensor()
    success = torch.Tensor()

    # Set up distance for the adversarial attack.
    distance_name = config.get('distance')
    distance = getattr(foolbox.distances, distance_name) if distance_name is not None \
        else foolbox.distances.MeanSquaredDistance

    # Perform the attack.
    for sample in tqdm(islice(data['validation'], n_batches), total=n_batches):
        x = sample[0]
        y = sample[1].type(torch.LongTensor)
        x = to_cuda(x, cuda=config.cuda)

        adv, adv_t, batch_success, batch_adv_mse, batch_adv_inf = batch_attack(
            attack, adv_model, criterion, x,
            y.cpu().numpy(), config['attack_kwargs'], distance)
        adv_ex = torch.cat([adv_ex, adv], 0)
        adv_targets = torch.cat([adv_targets, adv_t], 0)
        success = torch.cat([success, batch_success], 0)
        adv_mse = torch.cat([adv_mse, batch_adv_mse], 0)
        adv_inf = torch.cat([adv_inf, batch_adv_inf], 0)

    # evaluate_adv_grad_norms(model, adv_ex, adv_targets, config.cuda)
    # Summarize the results.
    results = {
        "success_rate":
        success.mean().item(),
        "defense_rate":
        1 - success.mean().item(),
        "mean_mse": ((adv_mse * success).sum() / success.sum()).item(),
        "mean_inf": ((adv_inf * success).sum() / success.sum()).item(),
        "mse_quartiles":
        list(
            np.percentile(adv_mse[success == 1.0].numpy(),
                          [0, 25, 50, 75, 100]))
    }

    results["median_mse"] = results["mse_quartiles"][2]

    print("success rate: {}".format(results["success_rate"]))
    print("defense rate: {}".format(results["defense_rate"]))
    print("mean MSE for successful attacks: {}".format(results["mean_mse"]))
    print("mean L_inf for successful attacks: {}".format(results["mean_inf"]))
    print("MSE quartiles for successful attacks: {}".format(
        results["mse_quartiles"]))

    with open(os.path.join(config['output_root'], 'results.json'), 'w') as f:
        json.dump(results, f, sort_keys=True, indent=4)

    np.save(adv_example_filepath, adv_ex)
    np.save(adv_targets_filepath, adv_targets)

    print(
        accuracy(model, to_cuda(adv_ex, cuda=config.cuda),
                 to_cuda(adv_targets, cuda=config.cuda)))
Пример #3
0
<<<<<<< HEAD:NIPS_attack/test.py
print "{} images found".format(len(paths))
for path in paths[:100]:
=======
for path in paths:
>>>>>>> e604fd2040b9fa9642b4f51892ca69633d9ac984:test.py
    image = imread(path).astype(np.float32)

    test = image.copy()
    preds = kmodel.predict(preprocess_input(np.expand_dims(test, 0)))
    label = np.argmax(preds)
    #print("Top 3 predictions (regular: ", decode_predictions(preds, top=3))

    # run the attack
    print "running the attack"
    attack = MIM(model=fmodel, criterion=Misclassification())
    adversarial = attack(image[:, :, ::-1], label)

    if adversarial is None:
        print "Did not find an adversarial"
        continue
    # show results
    print(foolbox.utils.softmax(fmodel.predictions(adversarial))[781])
    adversarial_rgb = adversarial[np.newaxis, :, :, ::-1]
    preds = kmodel.predict(preprocess_input(adversarial_rgb.copy()))
    adv_label = np.argmax(preds)
    if adv_label != label:
        success += 1
    #print("Top 5 predictions (adversarial: ", decode_predictions(preds, top=5))

    diff = (adversarial_rgb[0] - image)
Пример #4
0
def BoundaryAttackPlusPlus(model=None,
                           criterion=Misclassification(),
                           distance=MSE,
                           threshold=None):
    warn("BoundaryAttackPlusPlus is deprecated; use HopSkipJumpAttack.")
    return LimitedHopSkipJumpAttack(model, criterion, distance, threshold)
Пример #5
0
 def __init__(self, model=None, criterion=Misclassification()):
     super(BoundaryAttack, self).__init__(model=model, criterion=criterion)
Пример #6
0
    dknn = DKNNL2(net,
                  x_train,
                  y_train,
                  x_valid,
                  y_valid,
                  layers,
                  k=75,
                  num_classes=10)
    # dknn = DKNNL2Approx(net, x_train, y_train, x_valid, y_valid, layers,
    #                     k=1, num_classes=10)
    y_pred = dknn.classify(x_test)
    ind = np.where(y_pred.argmax(1) == y_test.numpy())[0]
    print((y_pred.argmax(1) == y_test.numpy()).sum() / y_test.size(0))

dknn_fb = DkNNFoolboxModel(dknn, (0, 1), 1, preprocessing=(0, 1))
criterion = Misclassification()
distance = MeanSquaredDistance

attack = foolbox.attacks.BoundaryAttack(model=dknn_fb,
                                        criterion=criterion,
                                        distance=distance)

attack_params = {
    'iterations': 5000,
    'max_directions': 25,
    'starting_point': None,
    'initialization_attack': None,
    'log_every_n_steps': 100,
    'spherical_step': 0.5,
    'source_step': 0.05,
    'step_adaptation': 1.5,
Пример #7
0
import foolbox
from foolbox.models import KerasModel
from foolbox.attacks import LBFGSAttack
from foolbox.criteria import TargetClassProbability, Misclassification
import numpy as np
import keras
from keras.models import load_model
import matplotlib.pyplot as plt

kmodel = load_model('./LeNet.h5')
preprocessing = (np.array([104, 116, 123]), 1)
fmodel = KerasModel(kmodel, bounds=(0, 255))
attack = LBFGSAttack(model=fmodel, criterion=Misclassification())

adversarial_imgs = []
adversarial_labels = []
# adversarial_imgs = np.asarray(adversarial_imgs)
# adversarial_labels = np.asarray(adversarial_labels)
# print(type(adversarial_imgs))
img_temp = np.load('./mnist_pure/x_test.npy')
# print(img_temp.shape)
img_temp = np.asarray(img_temp, dtype=np.float32)
# print(img_temp[0].shape)
label_temp = np.load('./mnist_pure/y_test.npy')
label_temp = np.asarray(label_temp, dtype=np.float32)

for i in range(0, 10000):
    adversarial = attack(img_temp[i], label_temp[i])
    adversarial_imgs.append(adversarial)
    adv_labels = np.argmax(fmodel.predictions(adversarial))
    adversarial_labels.append(adv_labels)
    def generate_images(self, data_loaders, portion, fraction, epoch):
        all_images_adversarial, all_adv_preds, adv_image_ids = None, np.array(
            []), np.array([])
        total_possible_adv, created_adv = 0, 0
        for idx, (image_ids, inputs, labels,
                  protected_class) in enumerate(data_loaders[portion]):
            print('Epoch: {}'.format(idx))

            image_ids, inputs, labels, protected_class = self.subsample(
                image_ids, inputs, labels, protected_class, fraction)

            indices_to_consider, all_images_adversarial, all_adv_preds, adv_image_ids = self.load_from_disk(
                image_ids,
                inputs,
                labels,
                protected_class,
                epsilon=self.attack_call_kwargs['epsilons'],
                epoch=epoch)
            image_ids, inputs, labels, protected_class = (
                image_ids[indices_to_consider], inputs[indices_to_consider],
                labels[indices_to_consider],
                protected_class[indices_to_consider])

            inputs, labels, image_ids = inputs.to(self.device), labels.to(
                self.device), image_ids.to(self.device)

            predicted_classes = self.model.model_ft(inputs.double())
            _, predicted_classes = torch.max(predicted_classes, 1)

            mask = predicted_classes == labels  # only attack correctly classified inputs
            image_ids, inputs, labels, predicted_classes = image_ids[
                mask], inputs[mask], labels[mask], predicted_classes[mask]

            # The input taken by the attack is a channels first image, that is not normalized
            # (It will be mean normalized later on by foolbox. Mean and Std are passed through fmodel)
            inputs_ready_for_attack = hp.inverse_transpose_images(
                inputs, self.ds.data_transform)
            # because it expects channels first images but not preprocessed
            inputs_ready_for_attack = np.moveaxis(inputs_ready_for_attack, -1,
                                                  1)
            inputs_ready_for_attack = torch.tensor(inputs_ready_for_attack,
                                                   device=self.device)

            ## returned tuple contains 3 elements:
            ## (perturbed inuts, perturbed inputs clipped to maximum epsilon,
            ## an array indicating if adversarial attack was a success).

            criterion = Misclassification(
                labels)  # untargeted attacks only (for now)
            tup = self.attack(model=self.fmodel,
                              inputs=inputs_ready_for_attack.double(),
                              criterion=criterion,
                              **self.attack_call_kwargs)

            ## This runs into issues with pickling a pytorch model as defined in foolbox
            #             tup = self.parallel_attack(model=self.fmodel, inputs=inputs_ready_for_attack.double(),
            #                                        labels=labels, kwargs=self.attack_call_kwargs)

            if "deepfool" in self.name:
                ### Epsilon is None for DeepFool, which means that the attacker is
                ### allowed as much perturbation as needed, so first 2 elements have to be the same
                assert (np.all((tup[0] == tup[1]).cpu().numpy()))

            ### these images are NOT normalized and are channels first
            adversarial_images = tup[1]
            ### Sanity Checks
            assert (adversarial_images.shape == inputs.shape)
            for obj in adversarial_images:
                assert obj is not None

            # normalize the attacked image for inference, it's already channels first so no need to move axis
            adversarial_images_for_inference = adversarial_images - self.mean
            adversarial_images_for_inference /= self.std
            predictions_on_attacked = self.model(
                adversarial_images_for_inference.double())
            _, predictions_on_attacked = torch.max(predictions_on_attacked, 1)
            """
            places where adversarial attack was a success 
            (this is returned as third element in the tuple by foolbox) but is not really correct in all cases.
            So just to be completely sure, take a bitwise and with what we observe as adversarial.
            """
            adversarial_mask = tup[2] & (predictions_on_attacked != labels)
            total_possible_adv += len(adversarial_mask)
            created_adv += np.count_nonzero(adversarial_mask.cpu().numpy())
            adversarial_images = adversarial_images[adversarial_mask].cpu(
            ).numpy()
            predictions_on_attacked = predictions_on_attacked[
                adversarial_mask].cpu().numpy()
            inputs = inputs[adversarial_mask].cpu().numpy()
            image_ids = image_ids[adversarial_mask].cpu().numpy()
            labels = labels[adversarial_mask].cpu().numpy()

            # at this point whatever we have should be adversarial
            assert np.all(labels != predictions_on_attacked)

            if len(adversarial_images) == 0:
                continue

            ### Visual Sanity Checks
            # these are not normalized, so just need to move the axis
            image_adv = np.moveaxis(adversarial_images[0], 0, -1)
            #             image_adv = hp.inverse_transpose_images(adversarial_images[0], self.ds.data_transform)
            image_original = hp.inverse_transpose_images(
                inputs[0], self.ds.data_transform)
            stacked_image = np.concatenate((image_adv, image_original), axis=1)
            self.plot_example(stacked_image, labels, predictions_on_attacked)

            all_images_adversarial = adversarial_images if all_images_adversarial is None else np.concatenate(
                (all_images_adversarial, adversarial_images))
            all_adv_preds = np.concatenate(
                (all_adv_preds, predictions_on_attacked))
            adv_image_ids = np.concatenate((adv_image_ids, image_ids))
        ### adversarial images are channels first, NOT normalized!
        return all_images_adversarial, all_adv_preds, adv_image_ids, total_possible_adv, created_adv
                model = convolutional.leNet_pooling(dropout, 0, 0)
            elif args.dropout_type == 'dense':
                model = convolutional.leNet_dense(dropout, 0, 0)
            else:
                raise Exception("Invalid dropout style!")
        else:
            raise Exception("Invalid model!")

        model.fit(x_train, y_train, epochs=50, batch_size=128)
        preds = np.argmax(model.predict(x_test), axis=1)

        kmodel = KerasModel(model=model, bounds=(min_, max_))

        attack = None
        if args.attack_type == 'l2':
            attack = CarliniWagnerL2Attack(kmodel, Misclassification())
        elif args.attack_type == 'linf':
            attack = RandomPGD(kmodel, Misclassification())

        x_sample = x_test[:1000]
        y_sample = y_test[:1000]

        adversarial = None
        if args.attack_type == 'l2':
            adversarial = attack(x_sample, np.argmax(y_sample, axis=1), binary_search_steps=5, max_iterations=600)
        else:
            adversarial = attack(x_sample, np.argmax(y_sample, axis=1), iterations=30)

        failed = 0
        misclassified = 0
def LinfPGD_attack_func(f_model, inputs, labels):
    device = f_model.device
    inputs = inputs.to(device)
    criterions = Misclassification(labels.to(device))
    return LinfPGD_attack(model=f_model, inputs=inputs, criterion=criterions, epsilons=epsilons)
Пример #11
0
            x_train.shape[1:], dropout, dropout)
    elif args.experiment_type == "six_layer_dnn":
        kmodel = neural_networks.asymmetric_six_layer_nn_foolbox(
            x_train.shape[1:], dropout, dropout)
    elif args.experiment_type == "VGG":
        kmodel = convolutional.mini_VGG_foolbox(dropout, dropout, 0, "mnist")
    elif args.experiment_type == "leNet5":
        kmodel = convolutional.leNet_cnn_foolbox(dropout, dropout, "mnist")

    # kmodel.fit(x_train, y_train, epochs=10, batch_size=128)
    kmodel.fit(x_train, y_train, epochs=50, batch_size=128)

    preds = np.argmax(kmodel.predict(x_test), axis=1)

    # attack = CarliniWagnerL2Attack(kmodel, Misclassification())
    attack = RandomPGD(kmodel, Misclassification())

    # x_sample = x_test[:10]
    # y_sample = y_test[:10]
    x_sample = x_test[:1000]
    y_sample = y_test[:1000]

    # adversarial = attack(x_sample, np.argmax(y_sample, axis=1), binary_search_steps=5, max_iterations=600)
    adversarial = attack(x_sample, np.argmax(y_sample, axis=1), iterations=30)

    # For those samples for which the L2 method does not produce an adversarial sample within the attack parameters,
    # we exclude them from the perturbation evaluation.

    failed = 0
    misclassified = 0
Пример #12
0
    images = images_all[i:i + 1].to(device)
    labels = labels_all[i:i + 1].to(device)

    if args.targeted:
        imgTarget = images_tgt[i:i + 1].to(device)
        classVec = labels_tgt[i:i + 1].to(device)
        criterion = TargetedMisclassification(classVec)
        attack = attacksODS.BoundaryAttack(
            tensorboard=False,
            steps=args.num_step,
            surrogate_models=surrogate_model_list,
            ODS=args.ODS)
        advs = attack.run(fmodel, images, criterion, starting_points=imgTarget)
        history = attack.normHistory
    else:
        criterion = Misclassification(labels)
        attack = attacksODS.BoundaryAttack(
            init_attack=None,
            tensorboard=False,
            steps=args.num_step,
            surrogate_models=surrogate_model_list,
            ODS=args.ODS)
        advs = attack.run(fmodel, images, criterion)
        history = attack.normHistory

    print('image %d: query %d, current dist = %.4f' %
          (i + 1, args.num_step, (advs[0] - images[0]).norm()))

    distList_finalstep[i] = (advs[0] - images[0]).norm()
    distListAll[i] = history
    prefix = '_targeted' if args.targeted else ''
Пример #13
0
def run_adv_hyper(args, hypernet):
    arch = get_network(args)
    model_base, fmodel_base = sample_fmodel(args, hypernet, arch)
    criterion = Misclassification()
    fgs = foolbox.attacks.BIM(fmodel_base, criterion)
    _, test_loader = datagen.load_mnist(args)
    adv, y = [], []
    for n_models in [10, 100, 1000]:
        print('ensemble of {}'.format(n_models))
        for eps in [0.01, 0.03, 0.08, 0.1, 0.3, 0.5, 1.0]:
            total_adv = 0
            acc, _accs = [], []
            _vars, _stds, _ents = [], [], []
            for idx, (data, target) in enumerate(test_loader):
                data, target = data.cuda(), target.cuda()
                adv_batch, target_batch, _ = sample_adv_batch(
                    data, target, fmodel_base, eps, fgs)
                if adv_batch is None:
                    continue
                output = model_base(adv_batch)
                pred = output.data.max(1, keepdim=True)[1]
                correct = pred.eq(
                    target_batch.data.view_as(pred)).long().cpu().sum()
                n_adv = len(target_batch) - correct.item()
                total_adv += n_adv
                padv = np.argmax(
                    fmodel_base.predictions(adv_batch[0].cpu().numpy()))

                sample_adv, pred_labels, logits = [], [], []
                for _ in range(n_models):
                    model, fmodel = sample_fmodel(args, hypernet, arch)
                    output = model(adv_batch)
                    pred = output.data.max(1, keepdim=True)[1]
                    correct = pred.eq(
                        target_batch.data.view_as(pred)).long().cpu().sum()
                    acc.append(correct.item())
                    n_adv_sample = len(target_batch) - correct.item()
                    sample_adv.append(n_adv_sample)
                    pred_labels.append(pred.view(pred.numel()))
                    logits.append(F.softmax(output, dim=1))

                p_labels = torch.stack(pred_labels).float().transpose(0, 1)
                if len(p_labels) > 1:
                    p_labels_cols = p_labels.transpose(0, 1)
                    modes = mode(p_labels_cols)[0][0]
                    mode_chart = []
                    for i in range(len(modes)):
                        v = len(
                            np.setdiff1d(p_labels[i],
                                         modes[i],
                                         assume_unique=False))
                        mode_chart.append(v)
                    _vars.append(torch.tensor(mode_chart).float().mean())
                    _ents.append(
                        np.apply_along_axis(entropy, 1,
                                            p_labels.detach()).mean())
                acc = torch.tensor(acc, dtype=torch.float)
                _accs.append(torch.mean(acc))
                acc, adv, y = [], [], []

            # plot_entropy(args, _ents, eps)
            print('Eps: {}, Adv: {}/{}, var: {}, std: {}'.format(
                eps, total_adv, len(test_loader.dataset),
                torch.tensor(_vars).mean(),
                torch.tensor(_ents).mean()))
Пример #14
0
from scipy.misc import imread, imresize, imsave
import os
import numpy as np
import tensorflow as tf
from matplotlib import gridspec
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt

from vgg16 import Vgg16

image_dict = {'tabby': 281, 'laska': 356, 'mastiff': 243}
attack_list = ['FGSM', 'IterGS', 'SalMap']
criteria_dict = {
    'topkmis': TopKMisclassification(k=10),
    'mis': Misclassification()
}

name1 = 'tabby'
attack_type = "FGSM"
criterion_type = "topkmis"


def softmax_np(x, axis=None):
    return np.exp(x) / np.sum(np.exp(x), axis=axis)


def main():

    data_dir = "data_imagenet"
    train_dir = "adv_results_vgg16"
        if args.dropout_type == "pooling":
            kmodel = convolutional.mini_VGG_foolbox(dropout, 0, 0, "mnist")
        else:
            kmodel = convolutional.mini_VGG_foolbox(0, dropout, 0, "mnist")
    elif args.experiment_type == "leNet5":
        if args.dropout_type == "pooling":
            kmodel = convolutional.leNet_cnn_foolbox(dropout, 0, "mnist")
        else:
            kmodel = convolutional.leNet_cnn_foolbox(0, dropout, "mnist")

    # kmodel.fit(x_train, y_train, epochs=1, batch_size=128)
    kmodel.fit(x_train, y_train, epochs=50, batch_size=128)

    preds = np.argmax(kmodel.predict(x_test), axis=1)

    attack = CarliniWagnerL2Attack(kmodel, Misclassification())

    # x_sample = x_test[:10]
    # y_sample = y_test[:10]
    x_sample = x_test[:1000]
    y_sample = y_test[:1000]

    adversarial = attack(x_sample,
                         np.argmax(y_sample, axis=1),
                         binary_search_steps=5,
                         max_iterations=600)

    # For those samples for which the L2 method does not produce an adversarial sample within the attack parameters,
    # we exclude them from the perturbation evaluation.

    failed = 0
Пример #16
0
def run_adv_hyper(args, hypernet):
    arch = get_network(args)
    models, fmodels = [], []
    #for i in range(10):
    #    model_base, fmodel_base = sample_fmodel(args, hypernet, arch)
    #    models.append(model_base)
    #    fmodels.append(fmodel_base)
    #fmodel_base = attacks.load_model(FusedNet(models))
    model_base, fmodel_base = sample_fmodel(args, hypernet, arch)
    criterion = Misclassification()
    fgs = foolbox.attacks.FGSM(fmodel_base, criterion)
    _, test_loader = datagen.load_mnist(args)
    adv, y = [], []
    for n_models in [10, 100, 800]:
        print('ensemble of {}'.format(n_models))
        for eps in [0.01, 0.03, 0.08, 0.1, 0.3, 0.5, 1.0]:
            total_adv = 0
            acc, _accs = [], []
            _soft, _logs, _vars, _ents = [], [], [], []
            _soft_adv, _logs_adv, _vars_adv, _ents_adv = [], [], [], []
            for idx, (data, target) in enumerate(test_loader):
                data, target = data.cuda(), target.cuda()
                adv_batch, target_batch, _ = sample_adv_batch(
                    data, target, fmodel_base, eps, fgs)
                if adv_batch is None:
                    continue
                # get base hypermodel output, I guess
                output = model_base(adv_batch)
                pred = output.data.max(1, keepdim=True)[1]
                correct = pred.eq(
                    target_batch.data.view_as(pred)).long().cpu().sum()
                n_adv = len(target_batch) - correct.item()
                total_adv += n_adv

                #dis = []
                soft_out, pred_out, logits = [], [], []
                soft_out_adv, pred_out_adv, logits_adv = [], [], []
                for n in range(n_models):
                    model, fmodel = sample_fmodel(args, hypernet, arch)
                    output = model(data)
                    soft_out.append(F.softmax(output, dim=1))
                    #pred_out.append(output.data.max(1, keepdim=True)[1])
                    #logits.append(output)

                    output = model(adv_batch)
                    soft_out_adv.append(F.softmax(output, dim=1))
                    #pred_out_adv.append(output.data.max(1, keepdim=True)[1])
                    #logits_adv.append(output)
                    ## correction graph
                    #pred = output.data.max(1, keepdim=True)[1]
                    #correct = pred.eq(target_batch.data.view_as(pred)).long().cpu().sum()
                    #c = len(pred_out) - correct.item()
                    #print ('got {} / {} / {}'.format(correct.item(), len(target_batch), 32))
                    #dis.append(correct.item()/n_adv)
                    ##
                #np.save('/scratch/eecs-share/ratzlafn/acc.npy', np.array(dis))
                #sys.exit(0)

                softs = torch.stack(soft_out).float()
                #preds = torch.stack(pred_out).float()
                #logs = torch.stack(logits).float()
                softs_adv = torch.stack(soft_out_adv).float()
                #preds_adv = torch.stack(pred_out_adv).float()
                #logs_adv = torch.stack(logits_adv).float()
                #np.save('/scratch/eecs-share/ratzlafn/softs.npy', softs.detach().cpu().numpy())
                #np.save('/scratch/eecs-share/ratzlafn/logs.npy', logs.detach().cpu().numpy())
                #sys.exit(0)
                # Measure variance of individual logits across models.
                # HyperGAN ensemble has lower variance across 10 class predictions
                # But a single logit has high variance acorss models
                units_softmax = softs.var(
                    0).mean().item()  # var across models across images
                ent = float(entropy(softs.mean(0).detach()).mean())
                #units_logprob = logs.var(0).mean().item()
                ensemble_var = softs.mean(0).var(1).mean().item()

                units_softmax_adv = softs_adv.var(
                    0).mean().item()  # var across models - images
                ent_adv = float(entropy(softs_adv.mean(0).detach()).mean())

                #units_logprob_adv = logs_adv.var(0).mean().item()
                ensemble_var_adv = softs_adv.mean(0).var(1).mean().item()
                """ Core Debug """
                # print ('softmax var: ', units_softmax)
                # print ('logprob var: ', units_logprob)
                # print ('ensemble var: ', ensemble_var)

                # build lists
                _soft.append(units_softmax)
                #_logs.append(units_logprob)
                _vars.append(ensemble_var)
                _ents.append(ent)
                _soft_adv.append(units_softmax_adv)
                #_logs_adv.append(units_logprob_adv)
                _vars_adv.append(ensemble_var_adv)
                _ents_adv.append(ent_adv)

                if idx > 5:
                    print(
                        'NAT: Log var: -, Softmax var: {}, Ent: {}, Ens var: {}'
                        .format(
                            #torch.tensor(_logs).mean(),
                            torch.tensor(_soft).mean(),
                            torch.tensor(_ents).mean(),
                            torch.tensor(_vars).mean()))
                    print(
                        'ADV Eps: {}, Log var: -, Softmax var: {}, Ent: {}, Ens var: {}'
                        .format(
                            eps,
                            #torch.tensor(_logs_adv).mean(),
                            torch.tensor(_soft_adv).mean(),
                            torch.tensor(_ents_adv).mean(),
                            torch.tensor(_vars_adv).mean()))
                    break
            """
Пример #17
0
def criterion():
    return Misclassification()
Пример #18
0
def run_adv_model(args, models):
    for model in models:
        model.eval()
    model = FusedNet(models)
    fmodel = attacks.load_model(model)
    criterion = Misclassification()
    fgs = foolbox.attacks.BIM(fmodel)
    _, test_loader = datagen.load_mnist(args)
    for eps in [0.01, 0.03, 0.08, .1, .3, .5, 1.0]:
        total_adv = 0
        _soft, _logs, _vars, _ents = [], [], [], []
        _soft_adv, _logs_adv, _vars_adv, _ents_adv = [], [], [], []
        for idx, (data, target) in enumerate(test_loader):
            data, target = data.cuda(), target.cuda()
            adv_batch, target_batch, _ = sample_adv_batch(
                data, target, fmodel, eps, fgs)

            if adv_batch is None:
                continue
            # get intial prediction of ensemble, sure
            output = model(adv_batch)
            pred = output.data.max(1, keepdim=True)[1]
            correct = pred.eq(
                target_batch.data.view_as(pred)).long().cpu().sum()
            n_adv = len(target_batch) - correct.item()

            # set up to sample from individual models
            soft_out, pred_out, logits = [], [], []
            soft_out_adv, pred_out_adv, logits_adv = [], [], []
            for i in range(len(models)):
                output = models[i](data)
                soft_out.append(F.softmax(output, dim=1))
                pred_out.append(output.data.max(1, keepdim=True)[1])
                logits.append(output)

                output = model(adv_batch)
                soft_out_adv.append(F.softmax(output, dim=1))

            softs = torch.stack(soft_out).float()
            preds = torch.stack(pred_out).float()
            logs = torch.stack(logits).float()
            softs_adv = torch.stack(soft_out_adv).float()
            # Measure variance of individual logits across models.
            # HyperGAN ensemble has lower variance across 10 class predictions
            # But a single logit has high variance acorss models
            units_softmax = softs.var(
                0).mean().item()  # var across models across images
            units_logprob = logs.var(0).mean().item()
            ensemble_var = softs.mean(0).var(1).mean().item()
            ent = float(entropy(softs.mean(0).detach()).mean())

            units_softmax_adv = softs_adv.var(
                0).mean().item()  # var across models - images
            ent_adv = float(entropy(softs_adv.mean(0).detach()).mean())
            ensemble_var_adv = softs_adv.mean(0).var(1).mean().item()
            """ Core Debug """
            # print ('softmax var: ', units_softmax)
            # print ('logprob var: ', units_logprob)
            # print ('ensemble var: ', ensemble_var)

            # build lists
            _soft.append(units_softmax)
            _logs.append(units_logprob)
            _vars.append(ensemble_var)
            _ents.append(ent)

            _soft_adv.append(units_softmax_adv)
            _vars_adv.append(ensemble_var_adv)
            _ents_adv.append(ent_adv)

            total_adv += n_adv
            if idx % 10 == 0 and idx > 1:
                print(
                    'NAT: Log var: {}, Softmax var: {}, Ent var: {}, Ens var: {}'
                    .format(eps,
                            torch.tensor(_logs).mean(),
                            torch.tensor(_soft).mean(),
                            torch.tensor(_ents).mean(),
                            torch.tensor(_vars).mean()))
                print(
                    'ADV: Eps: {}, Ent var: {}, Softmax var: {}, Ens var: {}'.
                    format(eps,
                           torch.tensor(_ents_adv).mean(),
                           torch.tensor(_soft_adv).mean(),
                           torch.tensor(_vars_adv).mean()))

                break
        """
 def __init__(self, model, min_perturbation=None, max_iterations=100, subsample=10, criterion=Misclassification(),
              distance=MSE):
     super().__init__(attack_method_def=DeepFoolL2Attack, model=model, min_perturbation=min_perturbation,
                      criterion=criterion, distance=distance)
     self._max_iterations = max_iterations
     self._subsample = subsample
 def __init__(self, model, step_size_iter=0.05, max_perturbation=0.3, n_iterations=10, min_perturbation=None,
              binary_search=True, random_start=False, return_early=True, criterion=Misclassification(),
              distance=MSE):
     super().__init__(attack_method_def=LinfinityBasicIterativeAttack, model=model,
                      min_perturbation=min_perturbation, criterion=criterion, distance=distance)
     self._binary_search = binary_search
     self._step_size_iter = step_size_iter
     self._n_iterations = n_iterations
     self._random_start = random_start
     self._return_early = return_early
     self._max_perturbation = max_perturbation