Exemplo n.º 1
0
    def get_test_attacks(self):
        """
        Get attacks to test.
        """

        pgd = attacks.BatchGradientDescent()
        pgd.max_iterations = 200
        pgd.base_lr = 0.005
        pgd.momentum = 0.9
        pgd.c = 0
        pgd.lr_factor = 1.25
        pgd.normalized = True
        pgd.backtrack = True
        pgd.initialization = attacks.initializations.LInfUniformNormInitialization(
            self.epsilon)
        pgd.projection = attacks.projections.SequentialProjections([
            attacks.projections.LInfProjection(self.epsilon),
            attacks.projections.BoxProjection()
        ])
        pgd.norm = attacks.norms.LInfNorm()
        untargetedf0 = attacks.objectives.UntargetedF0Objective()

        ours = attacks.BatchGradientDescent()
        ours.max_iterations = 1000
        ours.base_lr = 0.001
        ours.momentum = 0.9
        ours.c = 0
        ours.lr_factor = 1.1
        ours.normalized = True
        ours.backtrack = True
        ours.initialization = attacks.initializations.LInfUniformNormInitialization(
            self.epsilon)
        ours.projection = attacks.projections.SequentialProjections([
            attacks.projections.LInfProjection(self.epsilon),
            attacks.projections.BoxProjection()
        ])
        ours.norm = attacks.norms.LInfNorm()
        untargetedf7p = attacks.objectives.UntargetedF7PObjective()

        ours_zero = attacks.BatchGradientDescent()
        ours_zero.max_iterations = 1000
        ours_zero.base_lr = 0.001
        ours_zero.momentum = 0.9
        ours_zero.c = 0
        ours_zero.lr_factor = 1.1
        ours_zero.normalized = True
        ours_zero.backtrack = True
        ours_zero.initialization = attacks.initializations.ZeroInitialization()
        ours_zero.projection = attacks.projections.SequentialProjections([
            attacks.projections.LInfProjection(self.epsilon),
            attacks.projections.BoxProjection()
        ])
        ours_zero.norm = attacks.norms.LInfNorm()

        return [[pgd, untargetedf0, 50], [ours, untargetedf7p, 10],
                [ours_zero, untargetedf7p, 1]]
    def testAdversarialTraining(self):
        model = models.LeNet(10, [1, 28, 28], channels=12)

        cuda = True
        if cuda:
            model = model.cuda()

        optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1)
        writer = torch.utils.tensorboard.SummaryWriter('./logs/')
        augmentation = None

        epsilon = 0.3
        attack = attacks.BatchGradientDescent()
        attack.max_iterations = 2
        attack.base_lr = 0.1
        attack.momentum = 0
        attack.c = 0
        attack.lr_factor = 1.5
        attack.normalized = True
        attack.backtrack = True
        attack.initialization = attacks.initializations.LInfUniformInitialization(epsilon)
        attack.norm = attacks.norms.LInfNorm()
        attack.projection = attacks.projections.SequentialProjections([attacks.projections.LInfProjection(epsilon), attacks.projections.BoxProjection()])
        objective = attacks.objectives.UntargetedF0Objective()

        trainer = common.train.AdversarialTraining(model, self.trainset, self.testset, optimizer, scheduler, attack, objective, fraction=0.5, augmentation=augmentation, writer=writer, cuda=cuda)
        trainer.summary_gradients = True

        epochs = 10
        trainer.test(-1)
        for e in range(epochs):
            trainer.step(e)
            writer.flush()
            print(e)
Exemplo n.º 3
0
    def testConfidenceCalibratedAdversarialTrainingFraction(self):
        optimizer = torch.optim.SGD(self.model.parameters(),
                                    lr=0.1,
                                    momentum=0.9)
        scheduler = common.train.get_exponential_scheduler(
            optimizer, batches_per_epoch=len(self.trainset))
        writer = common.summary.SummaryDictWriter()
        augmentation = None

        epsilon = 0.3
        attack = attacks.BatchGradientDescent()
        attack.max_iterations = 2
        attack.base_lr = 0.1
        attack.momentum = 0
        attack.c = 0
        attack.lr_factor = 1.5
        attack.normalized = True
        attack.backtrack = True
        attack.initialization = attacks.initializations.LInfUniformInitialization(
            epsilon)
        attack.norm = attacks.norms.LInfNorm()
        attack.projection = attacks.projections.SequentialProjections([
            attacks.projections.LInfProjection(epsilon),
            attacks.projections.BoxProjection()
        ])
        objective = attacks.objectives.UntargetedF7PObjective()

        loss = common.torch.cross_entropy_divergence
        transition = common.utils.partial(common.torch.linear_transition,
                                          norm=attack.norm)

        self.assertRaises(AssertionError,
                          common.train.ConfidenceCalibratedAdversarialTraining,
                          self.model,
                          self.trainset,
                          self.testset,
                          optimizer,
                          scheduler,
                          attack,
                          objective,
                          loss,
                          transition,
                          fraction=1,
                          augmentation=augmentation,
                          writer=writer,
                          cuda=self.cuda)
    def testBatchGradientDescentNormalizedBacktrack(self):
        epsilon = 0.3
        attack = attacks.BatchGradientDescent()
        attack.max_iterations = 10
        attack.base_lr = 0.1
        attack.momentum = 0
        attack.lr_factor = 1
        attack.c = 0
        attack.normalized = True
        attack.backtrack = True
        attack.initialization = attacks.initializations.LInfUniformInitialization(
            epsilon)
        attack.projection = attacks.projections.SequentialProjections([
            attacks.projections.LInfProjection(epsilon),
            attacks.projections.BoxProjection()
        ])
        attack.norm = attacks.norms.LInfNorm()

        roc_auc = self.runTestAttackPerformance(attack)
        self.assertGreaterEqual(0.7, roc_auc)
Exemplo n.º 5
0
    def get_attack(self):
        """
        Get attacks to test.
        """

        pgd = attacks.BatchGradientDescent()
        pgd.max_iterations = 40
        pgd.base_lr = 0.005
        pgd.momentum = 0.9
        pgd.c = 0
        pgd.lr_factor = 1.5
        pgd.normalized = True
        pgd.backtrack = True
        pgd.initialization = attacks.initializations.LInfUniformNormInitialization(self.epsilon)
        pgd.projection = attacks.projections.SequentialProjections([
            attacks.projections.LInfProjection(self.epsilon),
            attacks.projections.BoxProjection()
        ])
        pgd.norm = attacks.norms.LInfNorm()
        objective = attacks.objectives.UntargetedF0Objective()

        return pgd, objective
Exemplo n.º 6
0
# common.state.State will automatically determine the corresponding architecture
state = common.state.State.load(model_file)
model = state.model

cuda = True
if cuda:
    model = model.cuda()

batch_size = 100
testset = common.datasets.MNISTTestSet()
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False)
adversarialset = common.datasets.MNISTTestSet(indices=range(100))
adversarialloader = torch.utils.data.DataLoader(adversarialset, batch_size=batch_size, shuffle=False)

epsilon = 0.3
attack = attacks.BatchGradientDescent()
attack.max_iterations = 40
attack.base_lr = 0.05
attack.momentum = 0.9  # use momentum
attack.c = 0
attack.lr_factor = 1.5
attack.normalized = True
attack.backtrack = True
attack.initialization = attacks.initializations.LInfUniformNormInitialization(epsilon)
attack.projection = attacks.projections.SequentialProjections([
    attacks.projections.LInfProjection(epsilon),
    attacks.projections.BoxProjection()
])
attack.norm = attacks.norms.LInfNorm()
objective = attacks.objectives.UntargetedF0Objective()
Exemplo n.º 7
0
    def testNormalTraining(self):
        optimizer = torch.optim.SGD(self.model.parameters(),
                                    lr=0.1,
                                    momentum=0.9)
        scheduler = common.train.get_exponential_scheduler(
            optimizer, batches_per_epoch=len(self.trainset))
        writer = common.summary.SummaryDictWriter()
        augmentation = None

        trainer = common.train.NormalTraining(self.model,
                                              self.trainset,
                                              self.testset,
                                              optimizer,
                                              scheduler,
                                              augmentation=augmentation,
                                              writer=writer,
                                              cuda=self.cuda)
        trainer.summary_gradients = False

        epochs = 10
        for e in range(epochs):
            trainer.step(e)

        probabilities = common.test.test(self.model,
                                         self.testset,
                                         cuda=self.cuda)
        eval = common.eval.CleanEvaluation(probabilities,
                                           self.testset.dataset.labels,
                                           validation=0)
        self.assertGreaterEqual(0.05, eval.test_error())

        epsilon = 0.3
        attack = attacks.BatchGradientDescent()
        attack.max_iterations = 2
        attack.base_lr = 0.1
        attack.momentum = 0
        attack.c = 0
        attack.lr_factor = 1.5
        attack.normalized = True
        attack.backtrack = True
        attack.initialization = attacks.initializations.LInfUniformInitialization(
            epsilon)
        attack.norm = attacks.norms.LInfNorm()
        attack.projection = attacks.projections.SequentialProjections([
            attacks.projections.LInfProjection(epsilon),
            attacks.projections.BoxProjection()
        ])
        objective = attacks.objectives.UntargetedF7PObjective()

        distal_attack = attacks.BatchGradientDescent()
        distal_attack.max_iterations = 2
        distal_attack.base_lr = 0.1
        distal_attack.momentum = 0
        distal_attack.c = 0
        distal_attack.lr_factor = 1.5
        distal_attack.normalized = True
        distal_attack.backtrack = True
        distal_attack.initialization = attacks.initializations.RandomInitializations(
            [
                attacks.initializations.LInfUniformNormInitialization(
                    epsilon),  # !
                attacks.initializations.SequentialInitializations([
                    attacks.initializations.LInfUniformNormInitialization(
                        epsilon),  # !
                    attacks.initializations.SmoothInitialization()
                ])
            ])
        distal_attack.norm = attacks.norms.LInfNorm()
        distal_attack.projection = attacks.projections.SequentialProjections([
            attacks.projections.LInfProjection(epsilon),
            attacks.projections.BoxProjection()
        ])
        distal_objective = attacks.objectives.UntargetedF0Objective(
            loss=common.torch.max_log_loss)

        adversarial_perturbations, adversarial_probabilities, _ = common.test.attack(
            self.model,
            self.adversarialset,
            attack,
            objective,
            attempts=1,
            writer=writer,
            cuda=self.cuda)
        eval = common.eval.AdversarialEvaluation(
            probabilities[:len(self.adversarialset.dataset)],
            adversarial_probabilities,
            self.adversarialset.dataset.labels,
            validation=0)
        self.assertGreaterEqual(0.8,
                                eval.receiver_operating_characteristic_auc())

        distal_perturbations, distal_probabilities, _ = common.test.attack(
            self.model,
            self.randomset,
            distal_attack,
            distal_objective,
            attempts=1,
            writer=writer,
            cuda=self.cuda)
        eval = common.eval.DistalEvaluation(
            probabilities[:len(self.adversarialset.dataset)],
            distal_probabilities,
            self.adversarialset.dataset.labels,
            validation=0)
        self.assertGreaterEqual(0.8,
                                eval.receiver_operating_characteristic_auc())
Exemplo n.º 8
0
    def testConfidenceCalibratedAdversarialTraining(self):
        optimizer = torch.optim.SGD(self.model.parameters(),
                                    lr=0.1,
                                    momentum=0.9)
        scheduler = common.train.get_exponential_scheduler(
            optimizer, batches_per_epoch=len(self.trainset))
        writer = common.summary.SummaryDictWriter()
        augmentation = None

        epsilon = 0.3
        attack = attacks.BatchGradientDescent()
        attack.max_iterations = 2
        attack.base_lr = 0.1
        attack.momentum = 0
        attack.c = 0
        attack.lr_factor = 1.5
        attack.normalized = True
        attack.backtrack = True
        attack.initialization = attacks.initializations.LInfUniformInitialization(
            epsilon)
        attack.norm = attacks.norms.LInfNorm()
        attack.projection = attacks.projections.SequentialProjections([
            attacks.projections.LInfProjection(epsilon),
            attacks.projections.BoxProjection()
        ])
        objective = attacks.objectives.UntargetedF7PObjective()

        loss = common.torch.cross_entropy_divergence
        transition = common.utils.partial(common.torch.linear_transition,
                                          norm=attack.norm)

        trainer = common.train.ConfidenceCalibratedAdversarialTraining(
            self.model,
            self.trainset,
            self.testset,
            optimizer,
            scheduler,
            attack,
            objective,
            loss,
            transition,
            fraction=0.5,
            augmentation=augmentation,
            writer=writer,
            cuda=self.cuda)
        trainer.summary_gradients = False

        epochs = 10
        for e in range(epochs):
            trainer.step(e)

        probabilities = common.test.test(self.model,
                                         self.testset,
                                         cuda=self.cuda)
        eval = common.eval.CleanEvaluation(probabilities,
                                           self.testset.dataset.labels,
                                           validation=0)
        self.assertGreaterEqual(0.05, eval.test_error())

        adversarial_perturbations, adversarial_probabilities, _ = common.test.attack(
            self.model,
            self.adversarialset,
            attack,
            objective,
            attempts=1,
            writer=writer,
            cuda=self.cuda)
        eval = common.eval.AdversarialEvaluation(
            probabilities[:len(self.adversarialset.dataset)],
            adversarial_probabilities,
            self.adversarialset.dataset.labels,
            validation=0)
        self.assertGreaterEqual(eval.receiver_operating_characteristic_auc(),
                                0.95)
    model = model.cuda()

batch_size = 100
testset = common.datasets.MNISTTestSet()
testloader = torch.utils.data.DataLoader(testset,
                                         batch_size=batch_size,
                                         shuffle=False,
                                         num_workers=0)
adversarialset = common.datasets.MNISTTestSet(indices=range(100))
adversarialloader = torch.utils.data.DataLoader(adversarialset,
                                                batch_size=batch_size,
                                                shuffle=False,
                                                num_workers=0)

linf_epsilon = 0.3
linf_attack = attacks.BatchGradientDescent()
linf_attack.max_iterations = 40
linf_attack.base_lr = 0.05
linf_attack.momentum = 0.9
linf_attack.c = 0
linf_attack.lr_factor = 1.5
linf_attack.normalized = True
linf_attack.backtrack = True
linf_attack.initialization = attacks.initializations.LInfUniformNormInitialization(
    linf_epsilon)
linf_attack.projection = attacks.projections.SequentialProjections([
    attacks.projections.LInfProjection(linf_epsilon),
    attacks.projections.BoxProjection()
])
linf_attack.norm = attacks.norms.LInfNorm()
Exemplo n.º 10
0
    def testAttack(self):
        model = models.LeNet(10, [1, 28, 28], channels=12)
        #state = common.state.State.load('mnist_lenet.pth.tar')
        #model = state.model

        if self.cuda:
            model = model.cuda()

        epsilon = 0.3
        attack = attacks.BatchGradientDescent()
        attack.max_iterations = 2
        attack.base_lr = 0.1
        attack.momentum = 0
        attack.c = 0
        attack.lr_factor = 1
        attack.normalized = True
        attack.backtrack = False
        attack.initialization = attacks.initializations.LInfUniformInitialization(
            epsilon)
        attack.norm = attacks.norms.LInfNorm()
        attack.projection = attacks.projections.SequentialProjections([
            attacks.projections.LInfProjection(epsilon),
            attacks.projections.BoxProjection()
        ])
        objective = attacks.objectives.UntargetedF0Objective()

        model.eval()
        attempts = 1
        perturbations, adversarial_probabilities, errors = common.test.attack(
            model,
            self.adversarialset,
            attack,
            objective,
            attempts=attempts,
            writer=common.summary.SummaryWriter(),
            cuda=self.cuda)

        self.assertEqual(perturbations.shape[0], attempts)
        self.assertEqual(perturbations.shape[1],
                         self.adversarialset.dataset.images.shape[0])
        self.assertEqual(perturbations.shape[2],
                         self.adversarialset.dataset.images.shape[3])
        self.assertEqual(perturbations.shape[3],
                         self.adversarialset.dataset.images.shape[1])
        self.assertEqual(perturbations.shape[4],
                         self.adversarialset.dataset.images.shape[2])
        self.assertEqual(adversarial_probabilities.shape[0], attempts)
        self.assertEqual(adversarial_probabilities.shape[1],
                         perturbations.shape[1])
        self.assertEqual(adversarial_probabilities.shape[2],
                         numpy.max(self.adversarialset.dataset.labels) + 1)

        perturbations = numpy.transpose(perturbations, (0, 1, 3, 4, 2))
        adversarialloader = torch.utils.data.DataLoader(
            common.datasets.AdversarialDataset(
                self.adversarialset.dataset.images, perturbations,
                self.adversarialset.dataset.labels),
            batch_size=100,
            shuffle=False)
        self.assertEqual(len(adversarialloader),
                         attempts * len(self.adversarialset))
        clean_probabilities = common.test.test(model,
                                               adversarialloader,
                                               cuda=self.cuda)

        adversarial_probabilities = adversarial_probabilities.reshape(
            adversarial_probabilities.shape[0] *
            adversarial_probabilities.shape[1],
            adversarial_probabilities.shape[2])
        self.assertTrue(
            numpy.all(
                numpy.sum(perturbations.reshape(
                    perturbations.shape[0] * perturbations.shape[1], -1),
                          axis=1) > 0))
        numpy.testing.assert_array_almost_equal(clean_probabilities,
                                                adversarial_probabilities)