def main_defense_script():

    ########################################################################
    #   SHARED BLOCK                                                       #
    ########################################################################

    # Initialize CIFAR classifier
    classifier_net = cifar_loader.load_pretrained_cifar_resnet(flavor=32,
                                                               use_gpu=False)
    classifier_net.eval()

    # Differentiable normalizer needed for classification
    cifar_normer = utils.DifferentiableNormalize(mean=config.CIFAR10_MEANS,
                                                 std=config.CIFAR10_STDS)

    ######################################################################
    #                     SIMPLE FGSM TRAINING EXAMPLE                   #
    ######################################################################
    if True:
        # Steps
        # 0) initialize hyperparams for attack/training
        # 1) setup attack loss object
        # 2) build attack and parameters for attack
        # 3) build training object, training loss, data loader
        # 4) train

        # 0
        FGSM_L_INF = 8.0 / 255.0
        FGSM_TRAINING_ATTACK_PROPORTION = 0.5
        FGSM_TRAINING_EPOCHS = 10

        # 1
        fgsm_attack_loss = plf.VanillaXentropy(classifier_net, cifar_normer)

        # 2
        fgsm_xentropy_attack_obj = aa.FGSM(classifier_net, cifar_normer,
                                           fgsm_attack_loss)
        fgsm_xentropy_attack_params = advtrain.AdversarialAttackParameters(
            fgsm_xentropy_attack_obj, FGSM_TRAINING_ATTACK_PROPORTION,
            {'attack_kwargs': {
                'l_inf_bound': FGSM_L_INF
            }})

        # 3
        half_fgsm_cifar = advtrain.AdversarialTraining(classifier_net,
                                                       cifar_normer,
                                                       'half_fgsm_cifar',
                                                       'cifar_resnet32')
        train_loss = nn.CrossEntropyLoss()
        train_loader = cifar_loader.load_cifar_data('train', normalize=False)

        # 4
        half_fgsm_cifar.train(train_loader,
                              FGSM_TRAINING_EPOCHS,
                              train_loss,
                              attack_parameters=fgsm_xentropy_attack_params,
                              verbosity='snoop')
def main_evaluation_script():
    """ Here's a little script to show how to evaluate a trained model
        against varying attacks (on the fly, without saving adv examples)
    """

    # Steps
    # 0) Initialize a classifier/normalizer/evaluation loader
    # 1) Build some attack objects to try
    # 2) Run the evaluation and print results

    # 0
    classifier_net = cifar_loader.load_pretrained_cifar_resnet(flavor=32,
                                                               use_gpu=False)
    cifar_normer = utils.DifferentiableNormalize(mean=config.CIFAR10_MEANS,
                                                 std=config.CIFAR10_STDS)
    val_loader = cifar_loader.load_cifar_data('val', normalize=False)

    # 1
    L_INF_BOUND = 8.0 / 255.0
    # --- FGSM attack
    fgsm_xentropy_loss = plf.VanillaXentropy(classifier_net,
                                             normalizer=cifar_normer)

    fgsm_attack_obj = aa.FGSM(classifier_net, cifar_normer, fgsm_xentropy_loss)
    fgsm_spec_params = {'attack_kwargs': {'l_inf_bound': L_INF_BOUND}}
    fgsm_attack_params = advtrain.AdversarialAttackParameters(
        fgsm_attack_obj, 0.5, fgsm_spec_params)

    # --- BIM attack
    BIM_L_INF = 8.0 / 255.0

    BIM_STEP_SIZE = 1.0 / 255.0
    BIM_NUM_ITER = 16

    bim_xentropy_loss = plf.VanillaXentropy(classifier_net,
                                            normalizer=cifar_normer)

    bim_attack_obj = aa.BIM(classifier_net, cifar_normer, bim_xentropy_loss)
    bim_spec_params = {
        'attack_kwargs': {
            'l_inf_bound': L_INF_BOUND,
            'step_size': BIM_STEP_SIZE,
            'num_iterations': BIM_NUM_ITER
        }
    }
    bim_attack_params = advtrain.AdversarialAttackParameters(
        bim_attack_obj, 0.5, bim_spec_params)

    attack_ensemble = {'fgsm': fgsm_attack_params, 'bim': bim_attack_params}

    # 2
    eval_obj = advtrain.AdversarialEvaluation(classifier_net, cifar_normer)
    eval_out = eval_obj.evaluate(val_loader,
                                 attack_ensemble,
                                 num_minibatches=5)
Exemplo n.º 3
0
def main(architecture_name,
         experiment_name,
         num_epochs,
         batch_size=128,
         resume=False,
         verbosity='high'):
    use_gpu = torch.cuda.is_available()
    validate_architecture(architecture_name)
    validate_filenaming(experiment_name)

    ##########################################################################
    #   Load the model + data loader                                         #
    ##########################################################################

    if architecture_name.startswith('resnet'):
        flavor = int(re.sub('^resnet', '', architecture_name))
        model, normalizer = cl.load_pretrained_cifar_resnet(
            flavor=flavor, use_gpu=use_gpu, return_normalizer=True)

    elif architecture_name.startswith('wide-resnet'):
        model, normalizer = cl.load_pretrained_cifar_wide_resnet(
            use_gpu=use_gpu, return_normalizer=True)
    else:
        raise Exception("INVALID ARCHITECTURE")

    cifar_dataset = cl.load_cifar_data('train',
                                       batch_size=batch_size,
                                       use_gpu=use_gpu)

    #########################################################################
    #   Build the training object + Train                                   #
    #########################################################################

    train_obj = advtrain.AdversarialTraining(model, normalizer,
                                             experiment_name,
                                             architecture_name)

    if resume:
        train_fxn = train_obj.train_from_checkpoint
    else:
        train_fxn = train_obj.train

    attack_params = build_attack_params(model, normalizer, use_gpu)
    criterion = nn.CrossEntropyLoss()
    train_fxn(cifar_dataset,
              num_epochs,
              criterion,
              attack_parameters=attack_params,
              use_gpu=use_gpu,
              verbosity=verbosity)
Exemplo n.º 4
0
if __name__ == '__main__':
    # Define what device we are using
    print("CUDA Available: ", torch.cuda.is_available())
    device = torch.device("cuda" if (
        use_cuda and torch.cuda.is_available()) else "cpu")

    # net = resnet.ResNet18()
    # net = net.cuda()
    # net = torch.nn.DataParallel(net)
    # checkpoint = torch.load("H:/adversarial_attacks/pytorch-cifar/checkpoint/DataPackpt.pth")
    # net.load_state_dict(checkpoint['net'])
    # target_model = net

    # resnet32
    if args.target_model == 'resnet32':
        target_model = cifar_loader.load_pretrained_cifar_resnet(flavor=32)
    elif args.target_model == 'resnet20':
        target_model = cifar_loader.load_pretrained_cifar_resnet(flavor=20)
    elif args.target_model == 'wideresnet':
        target_model = cifar_loader.load_pretrained_cifar_wide_resnet()
    elif args.target_model == "mnist_2":
        target_model = models.LeNet5()
        target_model.load_state_dict(torch.load('./trained_lenet5.pkl'))
    # target_model = target_model.cuda()
    # target_model.eval()

    # resnet32_advtrain
    # target_model = resnet32()
    # target_model.load_state_dict(torch.load('./advtrain.resnet32.000100.path.tar'))

    target_model = target_model.cuda()
Exemplo n.º 5
0
from torchvision import transforms as transforms
import cifar10.cifar_loader as cifar_loader
from cifar10.cifar_resnets import resnet32
import torch.nn.functional as F

import models
import resnet

use_cuda=True
image_nc=3
batch_size = 64

# Define what device we are using
print("CUDA Available: ",torch.cuda.is_available())
device = torch.device("cuda" if (use_cuda and torch.cuda.is_available()) else "cpu")
target_model = cifar_loader.load_pretrained_cifar_resnet(flavor=32,return_normalizer=False)
# target_model = resnet32()
# target_model.load_state_dict(torch.load('./cifar10_resnet32.th')['state_dict'].items())
target_model = target_model.cuda()
target_model.eval()
transform = transforms.Compose([transforms.ToTensor()])
cifar10_dataset = torchvision.datasets.CIFAR10('../cifar-10-batches-py', train=False, transform=transform, download=True)
train_dataloader = DataLoader(cifar10_dataset, batch_size=batch_size, shuffle=False, num_workers=0)
num_correct = 0
for i, data in enumerate(train_dataloader, 0):
    test_img, test_label = data
    test_img, test_label = test_img.to(device), test_label.to(device)
    probs_model = F.softmax(target_model(test_img), dim=1)
    onehot_labels = torch.eye(10, device='cuda')[test_label]
    print(onehot_labels.size())
    real = torch.sum(onehot_labels * probs_model, dim=1)
Exemplo n.º 6
0
    def __init__(self, args):
        # parameters
        self.epoch = args.epoch
        self.sample_num = 100
        self.batch_size = args.batch_size
        self.save_dir = args.save_dir
        self.result_dir = args.result_dir
        self.dataset = args.dataset
        self.log_dir = args.log_dir
        self.gpu_mode = args.gpu_mode
        self.model_name = args.gan_type
        self.input_size = args.input_size
        self.z_dim = 62
        self.lambda_ = 10
        # the number of iterations of the critic per generator iteration
        self.n_critic = 5
        self.checkpoint = args.checkpoint

        self.loss_adv_avg = 1
        self.loss_perturb_avg = 1
        # load dataset
        self.data_loader = dataloader(self.dataset, self.input_size,
                                      self.batch_size)
        data = self.data_loader.__iter__().__next__()[0]

        # networks init
        self.G = generator(input_dim=self.z_dim,
                           output_dim=data.shape[1],
                           input_size=self.input_size)
        self.D = discriminator(input_dim=data.shape[1],
                               output_dim=1,
                               input_size=self.input_size)
        self.G_optimizer = optim.Adam(self.G.parameters(),
                                      lr=args.lrG,
                                      betas=(args.beta1, args.beta2))
        self.D_optimizer = optim.Adam(self.D.parameters(),
                                      lr=args.lrD,
                                      betas=(args.beta1, args.beta2))
        # load checkpoint
        if self.checkpoint != '':
            print(self.checkpoint + 'G.pkl')
            self.G.load_state_dict(torch.load(self.checkpoint + 'G.pkl'))
            self.D.load_state_dict(torch.load(self.checkpoint + 'D.pkl'))

        if self.gpu_mode:
            self.G.cuda()
            self.D.cuda()

        print('---------- Networks architecture -------------')
        Utils.print_network(self.G)
        Utils.print_network(self.D)
        print('-----------------------------------------------')
        #cifar10 targeted model
        self.target_model = args.target_model
        if self.target_model == "resnet20":
            self.model = cifar_loader.load_pretrained_cifar_resnet(flavor=20)
        elif self.target_model == "resnet32":
            self.model = cifar_loader.load_pretrained_cifar_resnet(flavor=32)
        elif self.target_model == "wideresnet":
            self.model = cifar_loader.load_pretrained_cifar_wide_resnet()
        elif self.target_model == "mnist_2":
            self.model = mnist.model.LeNet5()
            self.model.load_state_dict(torch.load('trained_lenet5.pkl'))
        #adv train
        # model = cifar_resnets.resnet32()
        # model.load_state_dict(torch.load('./advtrain.resnet32.000100.path.tar'))
        #mnist
        # from mnist import model, dataset
        # self.model = model.mnist(pretrained=os.path.join(os.path.expanduser('~/.torch/models'), 'mnist.pth'))

        self.model = self.model.cuda()
        self.model.eval()
        # fixed noise
        self.sample_z_ = torch.rand((self.batch_size, self.z_dim))
        if self.gpu_mode:
            self.sample_z_ = self.sample_z_.cuda()
        if not (os.path.join(self.save_dir, self.dataset, self.model_name,
                             self.model_name)):
            os.mkdir(
                os.path.join(self.save_dir, self.dataset, self.model_name,
                             self.model_name))
            os.mknod(
                os.path.join(self.save_dir, self.dataset, self.model_name,
                             self.model_name + 'log.txt'))
        logging.basicConfig(
            level=logging.INFO,
            filename=os.path.join(self.save_dir, self.dataset, self.model_name,
                                  self.model_name + 'log.txt'),
            filemode='w',
            format=
            '%(asctime)s - %(filename)s[line:%(lineno)d] - %(levelname)s: %(message)s'
        )
Exemplo n.º 7
0
def main(config):
    defence_method = config.defence
    flavor = config.architecture
    blackbox = config.blackbox
    flavor_blackbox = config.flavor_blackbox
    epoch = config.epoch
    #    assert defence_method in ['PLAIN','FGSM', 'PGD', 'CW'],"INVALID ATTACK: %s" % defence_method
    assert flavor in ['20', '56', 'wide'], "INVALID ARCHITECTURE: %s" % flavor

    # Load the trained model and normalizer
    if flavor in ['20', '56']:
        model, normalizer = cifar_loader.load_pretrained_cifar_resnet(
            flavor=int(flavor), return_normalizer=True)
    elif flavor == 'wide':
        model, normalizer = cifar_loader.load_pretrained_cifar_wide_resnet(
            return_normalizer=True)

    if defence_method in ['FGSM', 'PGD', 'CW', 'PGD40', 'PGD100']:
        model = checkpoints.load_state_dict(defence_method + 'ResNet' + flavor,
                                            'resnet' + flavor, epoch, model)
    elif defence_method != 'PLAIN':
        bad_state_dict = torch.load('./pretrained_models/' + defence_method +
                                    '.pth')
        correct_state_dict = {
            re.sub(r'^.*feature_extractor\.', '', k): v
            for k, v in bad_state_dict.items()
        }
        model.load_state_dict(correct_state_dict)

    # Load the evaluation dataset
    cifar_valset = cifar_loader.load_cifar_data('val',
                                                no_transform=True,
                                                shuffle=False,
                                                batch_size=100)

    # Put this into the AdversarialEvaluation object
    adv_eval_object = adveval.AdversarialEvaluation(model, normalizer)

    # Use blackbox attack or not
    if blackbox:
        surrogate, normalizer_surr = cifar_loader.load_pretrained_cifar_resnet(
            flavor=int(flavor_blackbox), return_normalizer=True)
        surrogate.cuda()
    else:
        surrogate = model
        normalizer_surr = normalizer
    # First let's build the attack parameters for each.
    # we'll reuse the loss function:
    attack_loss = plf.VanillaXentropy(surrogate, normalizer_surr)
    linf_8_threat = ap.ThreatModel(ap.DeltaAddition, {
        'lp_style': 'inf',
        'lp_bound': 8.0 / 255.0
    })

    #------ FGSM Block
    fgsm_attack = aa.FGSM(surrogate, normalizer_surr, linf_8_threat,
                          attack_loss)
    fgsm_attack_kwargs = {'step_size': 8.0 / 255.0, 'verbose': False}
    fgsm_attack_params = advtrain.AdversarialAttackParameters(
        fgsm_attack,
        attack_specific_params={'attack_kwargs': fgsm_attack_kwargs})

    # ------ pgd10 Block
    pgd10_attack = aa.PGD(surrogate, normalizer_surr, linf_8_threat,
                          attack_loss)
    pgd10_attack_kwargs = {
        'step_size': 8.0 / 255.0 / 4.0,
        'num_iterations': 10,
        'keep_best': True,
        'verbose': False
    }
    pgd10_attack_params = advtrain.AdversarialAttackParameters(
        pgd10_attack,
        attack_specific_params={'attack_kwargs': pgd10_attack_kwargs})

    # ------ pgd100 Block
    pgd100_attack = aa.PGD(surrogate, normalizer_surr, linf_8_threat,
                           attack_loss)
    pgd100_attack_kwargs = {
        'step_size': 8.0 / 255.0 / 12.0,
        'num_iterations': 100,
        'keep_best': True,
        'verbose': False
    }
    pgd100_attack_params = advtrain.AdversarialAttackParameters(
        pgd100_attack,
        attack_specific_params={'attack_kwargs': pgd100_attack_kwargs})

    # ------ CarliniWagner100 Block
    cwloss6 = lf.CWLossF6
    distance_fxn = lf.SoftLInfRegularization
    cw100_attack = aa.CarliniWagner(surrogate, normalizer_surr, linf_8_threat,
                                    distance_fxn, cwloss6)
    cw100_attack_kwargs = {'num_optim_steps': 100, 'verbose': False}
    cw100_attack_params = advtrain.AdversarialAttackParameters(
        cw100_attack,
        attack_specific_params={'attack_kwargs': cw100_attack_kwargs})

    # ------ CarliniWagner1000 Block
    cwloss6 = lf.CWLossF6
    distance_fxn = lf.SoftLInfRegularization
    cw1000_attack = aa.CarliniWagner(surrogate, normalizer_surr, linf_8_threat,
                                     distance_fxn, cwloss6)
    cw1000_attack_kwargs = {'num_optim_steps': 1000, 'verbose': False}
    cw1000_attack_params = advtrain.AdversarialAttackParameters(
        cw1000_attack,
        attack_specific_params={'attack_kwargs': cw1000_attack_kwargs})
    '''
    Next we'll build the EvaluationResult objects that wrap these. 
    And let's say we'll evaluate the:
    - top1 accuracy 
    - average loss 
    - average SSIM distance of successful perturbations [don't worry too much about this]
    
    The 'to_eval' dict as passed in the constructor has structure 
     {key : <shorthand fxn>}
    where key is just a human-readable handle for what's being evaluated
    and shorthand_fxn is either a string for prebuilt evaluators, or you can pass in a general function to evaluate
    '''

    to_eval_dict = {
        'top1': 'top1',
        'avg_loss_value': 'avg_loss_value',
        'avg_successful_ssim': 'avg_successful_ssim'
    }

    fgsm_eval = adveval.EvaluationResult(fgsm_attack_params,
                                         to_eval=to_eval_dict)

    pgd10_eval = adveval.EvaluationResult(pgd10_attack_params,
                                          to_eval=to_eval_dict)

    pgd100_eval = adveval.EvaluationResult(pgd100_attack_params,
                                           to_eval=to_eval_dict)

    cw100_eval = adveval.EvaluationResult(cw100_attack_params,
                                          to_eval=to_eval_dict)

    cw1000_eval = adveval.EvaluationResult(cw1000_attack_params,
                                           to_eval=to_eval_dict)

    attack_ensemble = {
        'fgsm': fgsm_eval,
        'pgd10': pgd10_eval,
        'pgd100': pgd100_eval,
        'cw100': cw100_eval,
        'cw1000': cw1000_eval
    }
    if blackbox:
        attack_ensemble = {
            'fgsm': fgsm_eval,
            'pgd10': pgd10_eval,
            'pgd100': pgd100_eval
        }
    ensemble_out = adv_eval_object.evaluate_ensemble(cifar_valset,
                                                     attack_ensemble,
                                                     verbose=True,
                                                     num_minibatches=None)

    filename = "result.txt"
    if blackbox:
        filename = "result_blackbox.txt"
    # Now let's build a little helper to print things out cleanly:

    sort_order = {
        'ground': 1,
        'fgsm': 2,
        'pgd10': 3,
        'pgd100': 4,
        'cw100': 5,
        'cw1000': 6
    }
    if blackbox:
        sort_order = {'ground': 1, 'fgsm': 2, 'pgd10': 3, 'pgd100': 4}

    def pretty_printer(eval_ensemble, result_type):
        f = open(filename, "a")
        print('~' * 10, result_type, '~' * 10)
        f.write('~' * 10 + result_type + '~' * 10 + "\n")
        for key in sorted(list(eval_ensemble.keys()),
                          key=lambda k: sort_order[k]):
            eval_result = eval_ensemble[key]
            pad = 6 - len(key)
            if result_type not in eval_result.results:
                continue
            avg_result = eval_result.results[result_type].avg
            print(key, pad * ' ', ': ', avg_result)
            f.write(key + pad * ' ' + ': ' + str(avg_result) + "\n")
        f.close()

    '''And then we can print out and look at the results:
    This prints the accuracy. 
    Ground is the unperturbed accuracy. 
    If everything is done right, we should see that PGD with an l_inf bound of 4 is a stronger attack 
    against undefended networks than FGSM with an l_inf bound of 8
    '''
    f = open(filename, "a")
    f.write('Result for ' + defence_method + 'ResNet{}'.format(flavor) + "\n")
    if blackbox:
        f.write('Blackbox' + flavor_blackbox + "\n")
    f.close()
    pretty_printer(ensemble_out, 'top1')
    # We can examine the loss (noting that we seek to 'maximize' loss in the adversarial example domain)
    pretty_printer(ensemble_out, 'avg_loss_value')
    # This is actually 1-SSIM, which can serve as a makeshift 'similarity index',
    # which essentially gives a meterstick for how similar the perturbed images are to the originals
    pretty_printer(ensemble_out, 'avg_successful_ssim')
    f = open(filename, "a")
    f.write("\n")
    f.close()
Exemplo n.º 8
0
def main_attack_script(attack_examples=None, show_images=False):

    # Which attacks to do...
    attack_examples = attack_examples or [
        'FGSM', 'BIM', 'PGD', 'CW2', 'CWLInf'
    ]

    ########################################################################
    #   SHARED BLOCK                                                       #
    ########################################################################

    # Initialize CIFAR classifier
    classifier_net = cifar_loader.load_pretrained_cifar_resnet(flavor=32)
    classifier_net.eval()

    # Collect one minibatch worth of data/targets
    val_loader = cifar_loader.load_cifar_data('val',
                                              normalize=False,
                                              batch_size=16)
    ex_minibatch, ex_targets = next(iter(val_loader))

    # Differentiable normalizer needed for classification
    cifar_normer = utils.DifferentiableNormalize(mean=config.CIFAR10_MEANS,
                                                 std=config.CIFAR10_STDS)

    #########################################################################
    #   FGSM ATTACK BLOCK                                                   #
    #########################################################################
    if 'FGSM' in attack_examples:
        # Example FGSM attack on a single minibatch
        # steps:
        #   0) initialize hyperparams
        #   1) setup loss object
        #   2) build attack object
        #   3) setup examples to attack
        #   4) perform attack
        #   5) evaluate attack (accuracy + display a few images )

        FGSM_L_INF = 8.0 / 255.0

        delta_threat = ap.ThreatModel(ap.DeltaAddition, {
            'lp_style': 'inf',
            'lp_bound': 8.0 / 255
        })

        fgsm_xentropy_loss = plf.VanillaXentropy(classifier_net,
                                                 normalizer=cifar_normer)

        fgsm_attack_obj = aa.FGSM(classifier_net, cifar_normer, delta_threat,
                                  fgsm_xentropy_loss)

        fgsm_original_images = ex_minibatch
        fgsm_original_labels = ex_targets

        fgsm_adv_images = fgsm_attack_obj.attack(
            fgsm_original_images, fgsm_original_labels,
            FGSM_L_INF).adversarial_tensors()

        fgsm_accuracy = fgsm_attack_obj.eval(fgsm_original_images,
                                             fgsm_adv_images,
                                             fgsm_original_labels)
        print("FGSM ATTACK ACCURACY: ")
        print("\t Original %% correct:    %s" % fgsm_accuracy[0])
        print("\t Adversarial %% correct: %s" % fgsm_accuracy[1])

        if show_images:
            img_utils.display_adversarial_2row(classifier_net, cifar_normer,
                                               fgsm_original_images,
                                               fgsm_adv_images, 4)

    ##########################################################################
    #   BIM ATTACK BLOCK                                                     #
    ##########################################################################

    if 'BIM' in attack_examples:
        # Example BIM attack on a single minibatch
        # steps:
        #   0) initialize hyperparams
        #   1) setup loss object
        #   2) build attack object
        #   3) setup examples to attack
        #   4) perform attack
        #   5) evaluate attack

        BIM_L_INF = 8.0 / 255.0
        BIM_STEP_SIZE = 1.0 / 255.0
        BIM_NUM_ITER = 16

        bim_xentropy_loss = plf.VanillaXentropy(classifier_net,
                                                normalizer=cifar_normer)

        bim_attack_obj = aa.BIM(classifier_net, cifar_normer,
                                bim_xentropy_loss)

        bim_original_images = ex_minibatch
        bim_original_labels = ex_targets

        bim_adv_images = bim_attack_obj.attack(bim_original_images,
                                               bim_original_labels,
                                               l_inf_bound=BIM_L_INF,
                                               step_size=BIM_STEP_SIZE,
                                               num_iterations=BIM_NUM_ITER)

        bim_accuracy = bim_attack_obj.eval(bim_original_images, bim_adv_images,
                                           bim_original_labels)
        print("BIM ATTACK ACCURACY: ")
        print("\t Original %% correct:    %s" % bim_accuracy[0])
        print("\t Adversarial %% correct: %s" % bim_accuracy[1])

        if show_images:
            img_utils.display_adversarial_2row(classifier_net, cifar_normer,
                                               bim_original_images,
                                               bim_adv_images, 4)

    ##########################################################################
    #   PGD ATTACK BLOCK                                                     #
    ##########################################################################

    if 'PGD' in attack_examples:
        # Example BIM attack on a single minibatch
        # steps:
        #   0) initialize hyperparams
        #   1) setup loss object
        #   2) build attack object
        #   3) setup examples to attack
        #   4) perform attack
        #   5) evaluate attack

        PGD_L_INF = 8.0 / 255.0
        PGD_STEP_SIZE = 1.0 / 255.0
        PGD_NUM_ITER = 16

        pgd_xentropy_loss = plf.VanillaXentropy(classifier_net,
                                                normalizer=cifar_normer)

        delta_threat = ap.ThreatModel(ap.DeltaAddition, {
            'lp_style': 'inf',
            'lp_bound': 8.0 / 255
        })

        pgd_attack_obj = aa.PGD(classifier_net, cifar_normer, delta_threat,
                                pgd_xentropy_loss)

        pgd_original_images = ex_minibatch
        pgd_original_labels = ex_targets

        pgd_adv_images = pgd_attack_obj.attack(
            pgd_original_images,
            pgd_original_labels,
            step_size=PGD_STEP_SIZE,
            num_iterations=PGD_NUM_ITER).adversarial_tensors()

        pgd_accuracy = pgd_attack_obj.eval(pgd_original_images, pgd_adv_images,
                                           pgd_original_labels)
        print("PGD ATTACK ACCURACY: ")
        print("\t Original %% correct:    %s" % pgd_accuracy[0])
        print("\t Adversarial %% correct: %s" % pgd_accuracy[1])

        if show_images:
            img_utils.display_adversarial_2row(classifier_net, cifar_normer,
                                               pgd_original_images,
                                               pgd_adv_images, 4)

    ##########################################################################
    #   CW L2 ATTACK                                                         #
    ##########################################################################

    if 'CWL2' in attack_examples:

        # Example Carlini Wagner L2 attack on a single minibatch
        # steps:
        #   0) initialize hyperparams
        #   1) setup loss object
        #   2) build attack object
        #   3) setup examples to attack
        #   4) perform attack
        #   5) evaluate attack

        CW_INITIAL_SCALE_CONSTANT = 0.1
        CW_NUM_BIN_SEARCH_STEPS = 5
        CW_NUM_OPTIM_STEPS = 1000
        CW_DISTANCE_METRIC = 'l2'
        CW_CONFIDENCE = 0.0

        cw_f6loss = lf.CWLossF6
        delta_threat = ap.ThreatModel(ap.DeltaAddition, {
            'lp_style': 2,
            'lp_bound': 3072.0
        })
        cwl2_obj = aa.CarliniWagner(classifier_net, cifar_normer, delta_threat,
                                    lf.L2Regularization, cw_f6loss)

        cwl2_original_images = ex_minibatch
        cwl2_original_labels = ex_targets

        cwl2_output = cwl2_obj.attack(
            ex_minibatch,
            ex_targets,
            num_bin_search_steps=CW_NUM_BIN_SEARCH_STEPS,
            num_optim_steps=CW_NUM_OPTIM_STEPS,
            verbose=True)

        print(cwl2_output['best_dist'])
        cwl2_adv_images = cwl2_output['best_adv_images']

        cwl2_accuracy = cwl2_obj.eval(cwl2_original_images, cwl2_adv_images,
                                      cwl2_original_labels)
        print("CWL2 ATTACK ACCURACY: ")
        print("\t Original %% correct:    %s" % cwl2_accuracy[0])
        print("\t Adversarial %% correct: %s" % cwl2_accuracy[1])

        if show_images:
            img_utils.display_adversarial_2row(classifier_net, cifar_normer,
                                               cwl2_original_images,
                                               cwl2_adv_images, 4)
def main_attack_script(attack_examples=None, show_images=False, use_gpu=False):

    # Which attacks to do...
    attack_examples = attack_examples or [
        'FGSM', 'BIM', 'PGD', 'CW2', 'CWLInf'
    ]

    ########################################################################
    #   SHARED BLOCK                                                       #
    ########################################################################

    # Initialize CIFAR classifier
    classifier_net = cifar_loader.load_pretrained_cifar_resnet(flavor=32,
                                                               use_gpu=use_gpu)
    classifier_net.eval()

    # Collect one minibatch worth of data/targets
    val_loader = cifar_loader.load_cifar_data('val',
                                              normalize=False,
                                              batch_size=16,
                                              use_gpu=use_gpu)
    ex_minibatch, ex_targets = next(iter(val_loader))

    # Differentiable normalizer needed for classification
    cifar_normer = utils.DifferentiableNormalize(mean=config.CIFAR10_MEANS,
                                                 std=config.CIFAR10_STDS)

    #########################################################################
    #   FGSM ATTACK BLOCK                                                   #
    #########################################################################
    if 'FGSM' in attack_examples:
        # Example FGSM attack on a single minibatch
        # steps:
        #   0) initialize hyperparams
        #   1) setup loss object
        #   2) build attack object
        #   3) setup examples to attack
        #   4) perform attack
        #   5) evaluate attack (accuracy + display a few images )

        FGSM_L_INF = 8.0 / 255.0

        fgsm_xentropy_loss = plf.VanillaXentropy(classifier_net,
                                                 normalizer=cifar_normer)

        fgsm_attack_obj = aa.FGSM(classifier_net, cifar_normer,
                                  fgsm_xentropy_loss)

        fgsm_original_images = ex_minibatch
        fgsm_original_labels = ex_targets

        fgsm_adv_images = fgsm_attack_obj.attack(fgsm_original_images,
                                                 fgsm_original_labels,
                                                 FGSM_L_INF)

        fgsm_accuracy = fgsm_attack_obj.eval(fgsm_original_images,
                                             fgsm_adv_images,
                                             fgsm_original_labels)
        print "FGSM ATTACK ACCURACY: "
        print "\t Original %% correct:    %s" % fgsm_accuracy[0]
        print "\t Adversarial %% correct: %s" % fgsm_accuracy[1]

        if show_images:
            img_utils.display_adversarial_2row(classifier_net, cifar_normer,
                                               fgsm_original_images,
                                               fgsm_adv_images, 4)

    ##########################################################################
    #   BIM ATTACK BLOCK                                                     #
    ##########################################################################

    if 'BIM' in attack_examples:
        # Example BIM attack on a single minibatch
        # steps:
        #   0) initialize hyperparams
        #   1) setup loss object
        #   2) build attack object
        #   3) setup examples to attack
        #   4) perform attack
        #   5) evaluate attack

        BIM_L_INF = 8.0 / 255.0
        BIM_STEP_SIZE = 1.0 / 255.0
        BIM_NUM_ITER = 16

        bim_xentropy_loss = plf.VanillaXentropy(classifier_net,
                                                normalizer=cifar_normer)

        bim_attack_obj = aa.BIM(classifier_net, cifar_normer,
                                bim_xentropy_loss)

        bim_original_images = ex_minibatch
        bim_original_labels = ex_targets

        bim_adv_images = bim_attack_obj.attack(bim_original_images,
                                               bim_original_labels,
                                               l_inf_bound=BIM_L_INF,
                                               step_size=BIM_STEP_SIZE,
                                               num_iterations=BIM_NUM_ITER)

        bim_accuracy = bim_attack_obj.eval(bim_original_images, bim_adv_images,
                                           bim_original_labels)
        print "BIM ATTACK ACCURACY: "
        print "\t Original %% correct:    %s" % bim_accuracy[0]
        print "\t Adversarial %% correct: %s" % bim_accuracy[1]

        if show_images:
            img_utils.display_adversarial_2row(classifier_net, cifar_normer,
                                               bim_original_images,
                                               bim_adv_images, 4)

    ##########################################################################
    #   PGD ATTACK BLOCK                                                     #
    ##########################################################################

    if 'PGD' in attack_examples:
        # Example BIM attack on a single minibatch
        # steps:
        #   0) initialize hyperparams
        #   1) setup loss object
        #   2) build attack object
        #   3) setup examples to attack
        #   4) perform attack
        #   5) evaluate attack

        PGD_L_INF = 8.0 / 255.0
        PGD_STEP_SIZE = 1.0 / 255.0
        PGD_NUM_ITER = 16

        pgd_xentropy_loss = plf.VanillaXentropy(classifier_net,
                                                normalizer=cifar_normer)

        pgd_attack_obj = aa.LInfPGD(classifier_net, cifar_normer,
                                    pgd_xentropy_loss)

        pgd_original_images = ex_minibatch
        pgd_original_labels = ex_targets

        pgd_adv_images = pgd_attack_obj.attack(pgd_original_images,
                                               pgd_original_labels,
                                               l_inf_bound=PGD_L_INF,
                                               step_size=PGD_STEP_SIZE,
                                               num_iterations=PGD_NUM_ITER)

        pgd_accuracy = pgd_attack_obj.eval(pgd_original_images, pgd_adv_images,
                                           pgd_original_labels)
        print "PGD ATTACK ACCURACY: "
        print "\t Original %% correct:    %s" % pgd_accuracy[0]
        print "\t Adversarial %% correct: %s" % pgd_accuracy[1]

        if show_images:
            img_utils.display_adversarial_2row(classifier_net, cifar_normer,
                                               pgd_original_images,
                                               pgd_adv_images, 4)

    ##########################################################################
    #   CW L2 ATTACK                                                         #
    ##########################################################################

    if 'CWL2' in attack_examples:

        # Example Carlini Wagner L2 attack on a single minibatch
        # steps:
        #   0) initialize hyperparams
        #   1) setup loss object
        #   2) build attack object
        #   3) setup examples to attack
        #   4) perform attack
        #   5) evaluate attack

        CW_INITIAL_SCALE_CONSTANT = 0.1
        CW_NUM_BIN_SEARCH_STEPS = 5
        CW_NUM_OPTIM_STEPS = 1000
        CW_DISTANCE_METRIC = 'l2'
        CW_CONFIDENCE = 0.0

        cwl2_loss = plf.CWL2Loss(classifier_net, cifar_normer, kappa=0.0)
        cwl2_obj = aa.CW(classifier_net,
                         cifar_normer,
                         cwl2_loss,
                         CW_INITIAL_SCALE_CONSTANT,
                         num_bin_search_steps=CW_NUM_BIN_SEARCH_STEPS,
                         num_optim_steps=CW_NUM_OPTIM_STEPS,
                         distance_metric_type=CW_DISTANCE_METRIC,
                         confidence=CW_CONFIDENCE)

        cwl2_original_images = ex_minibatch
        cwl2_original_labels = ex_targets

        cwl2_output = cwl2_obj.attack(ex_minibatch, ex_targets, verbose=True)

        print cwl2_output['best_dist']
        cwl2_adv_images = cwl2_output['best_adv_images']

        cwl2_accuracy = cwl2_obj.eval(cwl2_original_images, cwl2_adv_images,
                                      cwl2_original_labels)
        print "CWL2 ATTACK ACCURACY: "
        print "\t Original %% correct:    %s" % cwl2_accuracy[0]
        print "\t Adversarial %% correct: %s" % cwl2_accuracy[1]

        if show_images:
            img_utils.display_adversarial_2row(classifier_net, cifar_normer,
                                               cwl2_original_images,
                                               cwl2_adv_images, 4)

    ##########################################################################
    #   CW LINF ATTACK                                                       #
    ##########################################################################

    if 'CWLInf' in attack_examples:

        # Example Carlini Wagner L2 attack on a single minibatch
        # steps:
        #   0) initialize hyperparams
        #   1) setup loss object
        #   2) build attack object
        #   3) setup examples to attack
        #   4) perform attack
        #   5) evaluate attack

        CW_INITIAL_SCALE_CONSTANT = 0.1
        CW_NUM_BIN_SEARCH_STEPS = 5
        CW_NUM_OPTIM_STEPS = 1000
        CW_DISTANCE_METRIC = 'linf'
        CW_CONFIDENCE = 0.0

        cwlinf_loss = plf.CWLInfLoss(classifier_net, cifar_normer, kappa=0.0)
        cwlinf_obj = aa.CW(classifier_net,
                           cifar_normer,
                           cwlinf_loss,
                           CW_INITIAL_SCALE_CONSTANT,
                           num_bin_search_steps=CW_NUM_BIN_SEARCH_STEPS,
                           num_optim_steps=CW_NUM_OPTIM_STEPS,
                           distance_metric_type=CW_DISTANCE_METRIC,
                           confidence=CW_CONFIDENCE)

        cwlinf_original_images = ex_minibatch
        cwlinf_original_labels = ex_targets

        cwlinf_output = cwlinf_obj.attack(ex_minibatch,
                                          ex_targets,
                                          verbose=True)

        print cwlinf_output['best_dist'] * 255.0
        cwlinf_adv_images = cwlinf_output['best_adv_images']

        cwlinf_accuracy = cwlinf_obj.eval(cwlinf_original_images,
                                          cwlinf_adv_images,
                                          cwlinf_original_labels)
        print "CWLinf ATTACK ACCURACY: "
        print "\t Original %% correct:    %s" % cwlinf_accuracy[0]
        print "\t Adversarial %% correct: %s" % cwlinf_accuracy[1]

        if show_images:
            img_utils.display_adversarial_2row(classifier_net, cifar_normer,
                                               cwlinf_original_images,
                                               cwlinf_adv_images, 4)

    ##########################################################################
    #   URM ATTACK                                                           #
    ##########################################################################

    if 'URM' in attack_examples:

        # Example Uniform Random Method
        # steps:
        #   0) initialize hyperparams
        #   1) setup loss object
        #   2) build attack object
        #   3) setup examples to attack
        #   4) perform attack
        #   5) evaluate attack

        URM_BOUND = 8.0 / 255.0
        URM_TRIES = 100

        urm_loss = lf.IncorrectIndicator(classifier_net,
                                         normalizer=cifar_normer)

        urm_attack = aa.URM(classifier_net,
                            cifar_normer,
                            urm_loss,
                            use_gpu=use_gpu)

        urm_original_images = ex_minibatch
        urm_original_labels = ex_targets

        urm_output = urm_attack.attack(ex_minibatch,
                                       ex_targets,
                                       URM_BOUND,
                                       num_tries=URM_TRIES)

        import interact
Exemplo n.º 10
0
import config
import prebuilt_loss_functions as plf
import loss_functions as lf
import utils.pytorch_utils as utils
import utils.image_utils as img_utils
import cifar10.cifar_loader as cifar_loader
import cifar10.cifar_resnets as cifar_resnets
import adversarial_attacks as aa
import adversarial_training as advtrain
import adversarial_evaluation as adveval
import utils.checkpoints as checkpoints

# Load up dataLoader, classifier, normer
use_gpu = torch.cuda.is_available()
classifier_net = cifar_loader.load_pretrained_cifar_resnet(flavor=32,
                                                           use_gpu=use_gpu)
classifier_net.eval()

val_loader = cifar_loader.load_cifar_data('val',
                                          normalize=False,
                                          use_gpu=use_gpu)

cifar_normer = utils.DifferentiableNormalize(mean=config.CIFAR10_MEANS,
                                             std=config.CIFAR10_STDS)

examples, labels = next(iter(val_loader))

# build loss fxn and attack object
loss_fxn = plf.VanillaXentropy(classifier_net, normalizer=cifar_normer)

spatial_attack = aa.SpatialPGDLp(classifier_net, cifar_normer, loss_fxn, 'inf')