def get_test_attacks(self): """ Get attacks to test. """ pgd = attacks.BatchGradientDescent() pgd.max_iterations = 200 pgd.base_lr = 0.005 pgd.momentum = 0.9 pgd.c = 0 pgd.lr_factor = 1.25 pgd.normalized = True pgd.backtrack = True pgd.initialization = attacks.initializations.LInfUniformNormInitialization( self.epsilon) pgd.projection = attacks.projections.SequentialProjections([ attacks.projections.LInfProjection(self.epsilon), attacks.projections.BoxProjection() ]) pgd.norm = attacks.norms.LInfNorm() untargetedf0 = attacks.objectives.UntargetedF0Objective() ours = attacks.BatchGradientDescent() ours.max_iterations = 1000 ours.base_lr = 0.001 ours.momentum = 0.9 ours.c = 0 ours.lr_factor = 1.1 ours.normalized = True ours.backtrack = True ours.initialization = attacks.initializations.LInfUniformNormInitialization( self.epsilon) ours.projection = attacks.projections.SequentialProjections([ attacks.projections.LInfProjection(self.epsilon), attacks.projections.BoxProjection() ]) ours.norm = attacks.norms.LInfNorm() untargetedf7p = attacks.objectives.UntargetedF7PObjective() ours_zero = attacks.BatchGradientDescent() ours_zero.max_iterations = 1000 ours_zero.base_lr = 0.001 ours_zero.momentum = 0.9 ours_zero.c = 0 ours_zero.lr_factor = 1.1 ours_zero.normalized = True ours_zero.backtrack = True ours_zero.initialization = attacks.initializations.ZeroInitialization() ours_zero.projection = attacks.projections.SequentialProjections([ attacks.projections.LInfProjection(self.epsilon), attacks.projections.BoxProjection() ]) ours_zero.norm = attacks.norms.LInfNorm() return [[pgd, untargetedf0, 50], [ours, untargetedf7p, 10], [ours_zero, untargetedf7p, 1]]
def testAdversarialTraining(self): model = models.LeNet(10, [1, 28, 28], channels=12) cuda = True if cuda: model = model.cuda() optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.1) writer = torch.utils.tensorboard.SummaryWriter('./logs/') augmentation = None epsilon = 0.3 attack = attacks.BatchGradientDescent() attack.max_iterations = 2 attack.base_lr = 0.1 attack.momentum = 0 attack.c = 0 attack.lr_factor = 1.5 attack.normalized = True attack.backtrack = True attack.initialization = attacks.initializations.LInfUniformInitialization(epsilon) attack.norm = attacks.norms.LInfNorm() attack.projection = attacks.projections.SequentialProjections([attacks.projections.LInfProjection(epsilon), attacks.projections.BoxProjection()]) objective = attacks.objectives.UntargetedF0Objective() trainer = common.train.AdversarialTraining(model, self.trainset, self.testset, optimizer, scheduler, attack, objective, fraction=0.5, augmentation=augmentation, writer=writer, cuda=cuda) trainer.summary_gradients = True epochs = 10 trainer.test(-1) for e in range(epochs): trainer.step(e) writer.flush() print(e)
def testConfidenceCalibratedAdversarialTrainingFraction(self): optimizer = torch.optim.SGD(self.model.parameters(), lr=0.1, momentum=0.9) scheduler = common.train.get_exponential_scheduler( optimizer, batches_per_epoch=len(self.trainset)) writer = common.summary.SummaryDictWriter() augmentation = None epsilon = 0.3 attack = attacks.BatchGradientDescent() attack.max_iterations = 2 attack.base_lr = 0.1 attack.momentum = 0 attack.c = 0 attack.lr_factor = 1.5 attack.normalized = True attack.backtrack = True attack.initialization = attacks.initializations.LInfUniformInitialization( epsilon) attack.norm = attacks.norms.LInfNorm() attack.projection = attacks.projections.SequentialProjections([ attacks.projections.LInfProjection(epsilon), attacks.projections.BoxProjection() ]) objective = attacks.objectives.UntargetedF7PObjective() loss = common.torch.cross_entropy_divergence transition = common.utils.partial(common.torch.linear_transition, norm=attack.norm) self.assertRaises(AssertionError, common.train.ConfidenceCalibratedAdversarialTraining, self.model, self.trainset, self.testset, optimizer, scheduler, attack, objective, loss, transition, fraction=1, augmentation=augmentation, writer=writer, cuda=self.cuda)
def testBatchGradientDescentNormalizedBacktrack(self): epsilon = 0.3 attack = attacks.BatchGradientDescent() attack.max_iterations = 10 attack.base_lr = 0.1 attack.momentum = 0 attack.lr_factor = 1 attack.c = 0 attack.normalized = True attack.backtrack = True attack.initialization = attacks.initializations.LInfUniformInitialization( epsilon) attack.projection = attacks.projections.SequentialProjections([ attacks.projections.LInfProjection(epsilon), attacks.projections.BoxProjection() ]) attack.norm = attacks.norms.LInfNorm() roc_auc = self.runTestAttackPerformance(attack) self.assertGreaterEqual(0.7, roc_auc)
def get_attack(self): """ Get attacks to test. """ pgd = attacks.BatchGradientDescent() pgd.max_iterations = 40 pgd.base_lr = 0.005 pgd.momentum = 0.9 pgd.c = 0 pgd.lr_factor = 1.5 pgd.normalized = True pgd.backtrack = True pgd.initialization = attacks.initializations.LInfUniformNormInitialization(self.epsilon) pgd.projection = attacks.projections.SequentialProjections([ attacks.projections.LInfProjection(self.epsilon), attacks.projections.BoxProjection() ]) pgd.norm = attacks.norms.LInfNorm() objective = attacks.objectives.UntargetedF0Objective() return pgd, objective
# common.state.State will automatically determine the corresponding architecture state = common.state.State.load(model_file) model = state.model cuda = True if cuda: model = model.cuda() batch_size = 100 testset = common.datasets.MNISTTestSet() testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False) adversarialset = common.datasets.MNISTTestSet(indices=range(100)) adversarialloader = torch.utils.data.DataLoader(adversarialset, batch_size=batch_size, shuffle=False) epsilon = 0.3 attack = attacks.BatchGradientDescent() attack.max_iterations = 40 attack.base_lr = 0.05 attack.momentum = 0.9 # use momentum attack.c = 0 attack.lr_factor = 1.5 attack.normalized = True attack.backtrack = True attack.initialization = attacks.initializations.LInfUniformNormInitialization(epsilon) attack.projection = attacks.projections.SequentialProjections([ attacks.projections.LInfProjection(epsilon), attacks.projections.BoxProjection() ]) attack.norm = attacks.norms.LInfNorm() objective = attacks.objectives.UntargetedF0Objective()
def testNormalTraining(self): optimizer = torch.optim.SGD(self.model.parameters(), lr=0.1, momentum=0.9) scheduler = common.train.get_exponential_scheduler( optimizer, batches_per_epoch=len(self.trainset)) writer = common.summary.SummaryDictWriter() augmentation = None trainer = common.train.NormalTraining(self.model, self.trainset, self.testset, optimizer, scheduler, augmentation=augmentation, writer=writer, cuda=self.cuda) trainer.summary_gradients = False epochs = 10 for e in range(epochs): trainer.step(e) probabilities = common.test.test(self.model, self.testset, cuda=self.cuda) eval = common.eval.CleanEvaluation(probabilities, self.testset.dataset.labels, validation=0) self.assertGreaterEqual(0.05, eval.test_error()) epsilon = 0.3 attack = attacks.BatchGradientDescent() attack.max_iterations = 2 attack.base_lr = 0.1 attack.momentum = 0 attack.c = 0 attack.lr_factor = 1.5 attack.normalized = True attack.backtrack = True attack.initialization = attacks.initializations.LInfUniformInitialization( epsilon) attack.norm = attacks.norms.LInfNorm() attack.projection = attacks.projections.SequentialProjections([ attacks.projections.LInfProjection(epsilon), attacks.projections.BoxProjection() ]) objective = attacks.objectives.UntargetedF7PObjective() distal_attack = attacks.BatchGradientDescent() distal_attack.max_iterations = 2 distal_attack.base_lr = 0.1 distal_attack.momentum = 0 distal_attack.c = 0 distal_attack.lr_factor = 1.5 distal_attack.normalized = True distal_attack.backtrack = True distal_attack.initialization = attacks.initializations.RandomInitializations( [ attacks.initializations.LInfUniformNormInitialization( epsilon), # ! attacks.initializations.SequentialInitializations([ attacks.initializations.LInfUniformNormInitialization( epsilon), # ! attacks.initializations.SmoothInitialization() ]) ]) distal_attack.norm = attacks.norms.LInfNorm() distal_attack.projection = attacks.projections.SequentialProjections([ attacks.projections.LInfProjection(epsilon), attacks.projections.BoxProjection() ]) distal_objective = attacks.objectives.UntargetedF0Objective( loss=common.torch.max_log_loss) adversarial_perturbations, adversarial_probabilities, _ = common.test.attack( self.model, self.adversarialset, attack, objective, attempts=1, writer=writer, cuda=self.cuda) eval = common.eval.AdversarialEvaluation( probabilities[:len(self.adversarialset.dataset)], adversarial_probabilities, self.adversarialset.dataset.labels, validation=0) self.assertGreaterEqual(0.8, eval.receiver_operating_characteristic_auc()) distal_perturbations, distal_probabilities, _ = common.test.attack( self.model, self.randomset, distal_attack, distal_objective, attempts=1, writer=writer, cuda=self.cuda) eval = common.eval.DistalEvaluation( probabilities[:len(self.adversarialset.dataset)], distal_probabilities, self.adversarialset.dataset.labels, validation=0) self.assertGreaterEqual(0.8, eval.receiver_operating_characteristic_auc())
def testConfidenceCalibratedAdversarialTraining(self): optimizer = torch.optim.SGD(self.model.parameters(), lr=0.1, momentum=0.9) scheduler = common.train.get_exponential_scheduler( optimizer, batches_per_epoch=len(self.trainset)) writer = common.summary.SummaryDictWriter() augmentation = None epsilon = 0.3 attack = attacks.BatchGradientDescent() attack.max_iterations = 2 attack.base_lr = 0.1 attack.momentum = 0 attack.c = 0 attack.lr_factor = 1.5 attack.normalized = True attack.backtrack = True attack.initialization = attacks.initializations.LInfUniformInitialization( epsilon) attack.norm = attacks.norms.LInfNorm() attack.projection = attacks.projections.SequentialProjections([ attacks.projections.LInfProjection(epsilon), attacks.projections.BoxProjection() ]) objective = attacks.objectives.UntargetedF7PObjective() loss = common.torch.cross_entropy_divergence transition = common.utils.partial(common.torch.linear_transition, norm=attack.norm) trainer = common.train.ConfidenceCalibratedAdversarialTraining( self.model, self.trainset, self.testset, optimizer, scheduler, attack, objective, loss, transition, fraction=0.5, augmentation=augmentation, writer=writer, cuda=self.cuda) trainer.summary_gradients = False epochs = 10 for e in range(epochs): trainer.step(e) probabilities = common.test.test(self.model, self.testset, cuda=self.cuda) eval = common.eval.CleanEvaluation(probabilities, self.testset.dataset.labels, validation=0) self.assertGreaterEqual(0.05, eval.test_error()) adversarial_perturbations, adversarial_probabilities, _ = common.test.attack( self.model, self.adversarialset, attack, objective, attempts=1, writer=writer, cuda=self.cuda) eval = common.eval.AdversarialEvaluation( probabilities[:len(self.adversarialset.dataset)], adversarial_probabilities, self.adversarialset.dataset.labels, validation=0) self.assertGreaterEqual(eval.receiver_operating_characteristic_auc(), 0.95)
model = model.cuda() batch_size = 100 testset = common.datasets.MNISTTestSet() testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=0) adversarialset = common.datasets.MNISTTestSet(indices=range(100)) adversarialloader = torch.utils.data.DataLoader(adversarialset, batch_size=batch_size, shuffle=False, num_workers=0) linf_epsilon = 0.3 linf_attack = attacks.BatchGradientDescent() linf_attack.max_iterations = 40 linf_attack.base_lr = 0.05 linf_attack.momentum = 0.9 linf_attack.c = 0 linf_attack.lr_factor = 1.5 linf_attack.normalized = True linf_attack.backtrack = True linf_attack.initialization = attacks.initializations.LInfUniformNormInitialization( linf_epsilon) linf_attack.projection = attacks.projections.SequentialProjections([ attacks.projections.LInfProjection(linf_epsilon), attacks.projections.BoxProjection() ]) linf_attack.norm = attacks.norms.LInfNorm()
def testAttack(self): model = models.LeNet(10, [1, 28, 28], channels=12) #state = common.state.State.load('mnist_lenet.pth.tar') #model = state.model if self.cuda: model = model.cuda() epsilon = 0.3 attack = attacks.BatchGradientDescent() attack.max_iterations = 2 attack.base_lr = 0.1 attack.momentum = 0 attack.c = 0 attack.lr_factor = 1 attack.normalized = True attack.backtrack = False attack.initialization = attacks.initializations.LInfUniformInitialization( epsilon) attack.norm = attacks.norms.LInfNorm() attack.projection = attacks.projections.SequentialProjections([ attacks.projections.LInfProjection(epsilon), attacks.projections.BoxProjection() ]) objective = attacks.objectives.UntargetedF0Objective() model.eval() attempts = 1 perturbations, adversarial_probabilities, errors = common.test.attack( model, self.adversarialset, attack, objective, attempts=attempts, writer=common.summary.SummaryWriter(), cuda=self.cuda) self.assertEqual(perturbations.shape[0], attempts) self.assertEqual(perturbations.shape[1], self.adversarialset.dataset.images.shape[0]) self.assertEqual(perturbations.shape[2], self.adversarialset.dataset.images.shape[3]) self.assertEqual(perturbations.shape[3], self.adversarialset.dataset.images.shape[1]) self.assertEqual(perturbations.shape[4], self.adversarialset.dataset.images.shape[2]) self.assertEqual(adversarial_probabilities.shape[0], attempts) self.assertEqual(adversarial_probabilities.shape[1], perturbations.shape[1]) self.assertEqual(adversarial_probabilities.shape[2], numpy.max(self.adversarialset.dataset.labels) + 1) perturbations = numpy.transpose(perturbations, (0, 1, 3, 4, 2)) adversarialloader = torch.utils.data.DataLoader( common.datasets.AdversarialDataset( self.adversarialset.dataset.images, perturbations, self.adversarialset.dataset.labels), batch_size=100, shuffle=False) self.assertEqual(len(adversarialloader), attempts * len(self.adversarialset)) clean_probabilities = common.test.test(model, adversarialloader, cuda=self.cuda) adversarial_probabilities = adversarial_probabilities.reshape( adversarial_probabilities.shape[0] * adversarial_probabilities.shape[1], adversarial_probabilities.shape[2]) self.assertTrue( numpy.all( numpy.sum(perturbations.reshape( perturbations.shape[0] * perturbations.shape[1], -1), axis=1) > 0)) numpy.testing.assert_array_almost_equal(clean_probabilities, adversarial_probabilities)