示例#1
0
    def _test_mnist_targeted(self, classifier, x_test):
        x_test_original = x_test.copy()

        # Test FGSM with np.inf norm
        attack = BasicIterativeMethod(classifier,
                                      eps=1.0,
                                      eps_step=0.01,
                                      targeted=True,
                                      batch_size=128)
        # y_test_adv = to_categorical((np.argmax(y_test, axis=1) + 1)  % 10, 10)
        pred_sort = classifier.predict(x_test).argsort(axis=1)
        y_test_adv = np.zeros((x_test.shape[0], 10))
        for i in range(x_test.shape[0]):
            y_test_adv[i, pred_sort[i, -2]] = 1.0
        x_test_adv = attack.generate(x_test, y=y_test_adv)

        self.assertFalse((x_test == x_test_adv).all())

        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))

        self.assertEqual(y_test_adv.shape, test_y_pred.shape)
        # This doesn't work all the time, especially with small networks
        self.assertGreaterEqual((y_test_adv == test_y_pred).sum(),
                                x_test.shape[0] // 2)

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))),
                               0.0,
                               delta=0.00001)
    def _test_mnist_targeted(self, classifier):
        # Get MNIST
        (_, _), (x_test, _) = self.mnist

        # Test FGSM with np.inf norm
        attack = BasicIterativeMethod(classifier,
                                      eps=1.0,
                                      eps_step=0.01,
                                      targeted=True,
                                      batch_size=128)
        # y_test_adv = to_categorical((np.argmax(y_test, axis=1) + 1)  % 10, 10)
        pred_sort = classifier.predict(x_test).argsort(axis=1)
        y_test_adv = np.zeros((x_test.shape[0], 10))
        for i in range(x_test.shape[0]):
            y_test_adv[i, pred_sort[i, -2]] = 1.0
        x_test_adv = attack.generate(x_test, y=y_test_adv)

        self.assertFalse((x_test == x_test_adv).all())

        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))

        self.assertEqual(y_test_adv.shape, test_y_pred.shape)
        # This doesn't work all the time, especially with small networks
        self.assertGreaterEqual((y_test_adv == test_y_pred).sum(),
                                x_test.shape[0] // 2)
    def _test_backend_mnist(self, classifier):
        # Get MNIST
        (x_train, y_train), (x_test, y_test) = self.mnist

        # Test BIM with np.inf norm
        attack = BasicIterativeMethod(classifier,
                                      eps=1,
                                      eps_step=0.1,
                                      batch_size=128)
        x_train_adv = attack.generate(x_train)
        x_test_adv = attack.generate(x_test)

        self.assertFalse((x_train == x_train_adv).all())
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = get_labels_np_array(classifier.predict(x_train_adv))
        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))

        self.assertFalse((y_train == train_y_pred).all())
        self.assertFalse((y_test == test_y_pred).all())

        acc = np.sum(
            np.argmax(train_y_pred, axis=1) == np.argmax(
                y_train, axis=1)) / y_train.shape[0]
        logger.info('Accuracy on adversarial train examples: %.2f%%',
                    (acc * 100))

        acc = np.sum(
            np.argmax(test_y_pred, axis=1) == np.argmax(
                y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on adversarial test examples: %.2f%%',
                    (acc * 100))
    def _test_backend_mnist(self, classifier, x_train, y_train, x_test, y_test):
        x_test_original = x_test.copy()

        # Test BIM with np.inf norm
        attack = BasicIterativeMethod(classifier, eps=1, eps_step=0.1, batch_size=128)
        x_train_adv = attack.generate(x_train)
        x_test_adv = attack.generate(x_test)

        self.assertFalse((x_train == x_train_adv).all())
        self.assertFalse((x_test == x_test_adv).all())

        train_y_pred = get_labels_np_array(classifier.predict(x_train_adv))
        test_y_pred = get_labels_np_array(classifier.predict(x_test_adv))

        self.assertFalse((y_train == train_y_pred).all())
        self.assertFalse((y_test == test_y_pred).all())

        acc = np.sum(np.argmax(train_y_pred, axis=1) == np.argmax(y_train, axis=1)) / y_train.shape[0]
        logger.info("Accuracy on adversarial train examples: %.2f%%", (acc * 100))

        acc = np.sum(np.argmax(test_y_pred, axis=1) == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info("Accuracy on adversarial test examples: %.2f%%", (acc * 100))

        # Check that x_test has not been modified by attack and classifier
        self.assertAlmostEqual(float(np.max(np.abs(x_test_original - x_test))), 0.0, delta=0.00001)
示例#5
0
def atk_BasicIterative(x_train, x_test, y_train, y_test, classifier):
    #print('Create BasicIterativeMethod attack \n')
    adv_crafter = BasicIterativeMethod(classifier, eps=1, eps_step=0.1)
    x_train_adv = adv_crafter.generate(x_train)
    x_test_adv = adv_crafter.generate(x_test)
    
    print("After BasicIterative Attack  \n")
    evaluate(x_train, x_test, y_train, y_test, x_train_adv, x_test_adv, classifier)
    return x_test_adv, x_train_adv
示例#6
0
def basic_iter(x_test, model, eps, eps_step, max_iter, targeted, batch_size):
    classifier = KerasClassifier(model=model, clip_values=(0, 1))
    attack_cw = BasicIterativeMethod(classifier=classifier,
                                     eps=eps,
                                     eps_step=eps_step,
                                     max_iter=max_iter,
                                     targeted=targeted,
                                     batch_size=batch_size)
    x_test_adv = attack_cw.generate(x_test)
    return np.reshape(x_test_adv, (32, 32, 3))
    def test_keras_iris_unbounded(self):
        classifier = get_tabular_classifier_kr()

        # Recreate a classifier without clip values
        classifier = KerasClassifier(model=classifier._model, use_logits=False, channel_index=1)
        attack = BasicIterativeMethod(classifier, eps=1, eps_step=0.2, batch_size=128)
        x_test_adv = attack.generate(self.x_test_iris)
        self.assertFalse((self.x_test_iris == x_test_adv).all())
        self.assertTrue((x_test_adv > 1).any())
        self.assertTrue((x_test_adv < 0).any())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0]
        logger.info("Accuracy on Iris with BIM adversarial examples: %.2f%%", (acc * 100))
class BIMAttack(AdversarialAttack):
    def __init__(self,
                 model,
                 step_size_iter=0.1,
                 max_perturbation=0.3,
                 max_iterations=100,
                 targeted=False,
                 batch_size=16):
        super().__init__(model=model)
        self._targeted = targeted
        self._step_size_iter = step_size_iter
        self._max_perturbation = max_perturbation
        self._max_iterations = max_iterations
        self._method = BasicIterativeMethod(classifier=self.model,
                                            targeted=self._targeted,
                                            eps=self._max_perturbation,
                                            eps_step=self._step_size_iter,
                                            max_iter=self._max_iterations,
                                            batch_size=batch_size)

    def attack_method(self, x, y=None):
        params = {}
        if y is not None:
            params['y'] = y
        return self._method.generate(x=x, **params)
def general_test(model,
                 optimizer,
                 input_shape,
                 nb_classes,
                 test_loader,
                 method,
                 btrain=False,
                 model_file='last_model_92_sgd.pkl'):
    global _classes
    if not btrain:
        model.load_state_dict(torch.load(model_file))
    model.eval()

    loss = nn.CrossEntropyLoss()
    warped_model = PyTorchClassifier(model,
                                     loss,
                                     optimizer,
                                     input_shape,
                                     nb_classes,
                                     clip_values=(.0, 1.))
    if method == 'Deepfool':
        adv_crafter = DeepFool(warped_model)
    elif method == 'BIM':
        adv_crafter = BasicIterativeMethod(warped_model, batch_size=20)
    elif method == 'JSMA':
        adv_crafter = SaliencyMapMethod(warped_model, batch_size=20)
    elif method == 'CW2':
        adv_crafter = CarliniL2Method(warped_model, batch_size=20)
    elif method == 'CWI':
        adv_crafter = CarliniLInfMethod(warped_model, batch_size=20)

    correct, total = 0, 0
    class_correct = list(0. for _ in range(10))
    class_total = list(0. for _ in range(10))

    for images, labels in test_loader:
        images = adv_crafter.generate(images.numpy())

        images = Variable(torch.from_numpy(images).cuda())
        labels = Variable(labels.cuda())

        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels.data).sum()
        c = (predicted == labels.data).squeeze()
        for i in range(20):
            label = labels.data[i]
            class_correct[label] += c[i]
            class_total[label] += 1

    print('Accuracy of the model on the test images: %d %%' %
          (100 * float(correct) / total))
    print('Accuracy of the model on the test images:', float(correct) / total)
    for i in range(10):
        print('Accuracy of %5s : %2d %%' %
              (_classes[i], 100 * class_correct[i] / class_total[i]))
    return correct / total
def general_test_v2(model,
                    optimizer,
                    input_shape,
                    nb_classes,
                    test_loader,
                    method,
                    conf,
                    btrain=False,
                    model_file='last_model_92_sgd.pkl'):
    global _classes
    if not btrain:
        checked_state = torch.load(model_file)['state_dict']
        model.load_state_dict(checked_state)
    model.eval()

    loss = nn.CrossEntropyLoss()
    warped_model = PyTorchClassifier(model,
                                     loss,
                                     optimizer,
                                     input_shape,
                                     nb_classes,
                                     clip_values=(.0, 1.))
    if method == 'Deepfool':
        adv_crafter = DeepFool(warped_model)
    elif method == 'BIM':
        adv_crafter = BasicIterativeMethod(warped_model, batch_size=32)
    elif method == 'JSMA':
        adv_crafter = SaliencyMapMethod(warped_model, batch_size=32)
    elif method == 'CW2':
        adv_crafter = CarliniL2Method(warped_model, batch_size=32)
    elif method == 'CWI':
        adv_crafter = CarliniLInfMethod(warped_model, batch_size=32)
    elif method == 'FGSM':
        adv_crafter = FastGradientMethod(warped_model, batch_size=32)

    correct, total = 0, 0

    adv_dataset = adv_generalization(test_loader, adv_crafter, conf)
    temp_loader = DataLoader(dataset=adv_dataset,
                             batch_size=32,
                             shuffle=False,
                             drop_last=True)
    # temp_loader = test_loader

    for images, labels in temp_loader:
        images = Variable(images.cuda())
        labels = Variable(labels.cuda())

        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels.data).sum()

    print('Accuracy of the model on the test images: %d %%' %
          (100 * float(correct) / total))
    print('Accuracy of the model on the test images:', float(correct) / total)
    return correct / total
 def __init__(self,
              model,
              step_size_iter=0.1,
              max_perturbation=0.3,
              max_iterations=100,
              targeted=False,
              batch_size=16):
     super().__init__(model=model)
     self._targeted = targeted
     self._step_size_iter = step_size_iter
     self._max_perturbation = max_perturbation
     self._max_iterations = max_iterations
     self._method = BasicIterativeMethod(classifier=self.model,
                                         targeted=self._targeted,
                                         eps=self._max_perturbation,
                                         eps_step=self._step_size_iter,
                                         max_iter=self._max_iterations,
                                         batch_size=batch_size)
示例#12
0
def GetAttackers(classifier, x_test, attacker_name):
    """
    Function:
        Load classifier and generate adversarial samples
    """
    t_start = time.time()
    if attacker_name == "FGSM":
        attacker = FastGradientMethod(classifier=classifier, eps=0.3)
    elif attacker_name == "Elastic":
        attacker = ElasticNet(classifier=classifier, confidence=0.5)
    elif attacker_name == "BasicIterativeMethod":
        attacker = BasicIterativeMethod(classifier=classifier, eps=0.3)
    elif attacker_name == "NewtonFool":
        attacker = NewtonFool(classifier=classifier, max_iter=20)
    elif attacker_name == "HopSkipJump":
        attacker = HopSkipJump(classifier=classifier, max_iter=20)
    elif attacker_name == "ZooAttack":
        attacker = ZooAttack(classifier=classifier, max_iter=20)
    elif attacker_name == "VirtualAdversarialMethod":
        attacker = VirtualAdversarialMethod(classifier=classifier, max_iter=20)
    elif attacker_name == "UniversalPerturbation":
        attacker = UniversalPerturbation(classifier=classifier, max_iter=20)
    elif attacker_name == "AdversarialPatch":
        attacker = AdversarialPatch(classifier=classifier, max_iter=20)
    elif attacker_name == "Attack":
        attacker = Attack(classifier=classifier)
    elif attacker_name == "BoundaryAttack":
        attacker = BoundaryAttack(classifier=classifier,
                                  targeted=False,
                                  epsilon=0.05,
                                  max_iter=20)  #, max_iter=20
    elif attacker_name == "CarliniL2":
        attacker = CarliniL2Method(classifier=classifier,
                                   confidence=0.5,
                                   learning_rate=0.001,
                                   max_iter=15)
    elif attacker_name == "CarliniLinf":
        attacker = CarliniLInfMethod(classifier=classifier,
                                     confidence=0.5,
                                     learning_rate=0.001,
                                     max_iter=15)
    elif attacker_name == "DeepFool":
        attacker = DeepFool(classifier)
    elif attacker_name == "SMM":
        attacker = SaliencyMapMethod(classifier=classifier, theta=2)
    elif attacker_name == "PGD":
        attacker = ProjectedGradientDescent(classifier=classifier,
                                            norm=2,
                                            eps=1,
                                            eps_step=0.5)
    else:
        raise ValueError("Please get the right attacker's name for the input.")
    test_adv = attacker.generate(x_test)
    dt = time.time() - t_start
    return test_adv, dt
示例#13
0
    def test_tensorflow_iris(self):
        (_, _), (x_test, y_test) = self.iris
        classifier, _ = get_iris_classifier_tf()

        # Test untargeted attack
        attack = BasicIterativeMethod(classifier, eps=1, eps_step=0.1)
        x_test_adv = attack.generate(x_test)
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
        logger.info('Accuracy on Iris with BIM adversarial examples: %.2f%%',
                    (acc * 100))

        # Test targeted attack
        targets = random_targets(y_test, nb_classes=3)
        attack = BasicIterativeMethod(classifier,
                                      targeted=True,
                                      eps=1,
                                      eps_step=0.1)
        x_test_adv = attack.generate(x_test, **{'y': targets})
        self.assertFalse((x_test == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any())
        acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / y_test.shape[0]
        logger.info('Success rate of targeted BIM on Iris: %.2f%%',
                    (acc * 100))
    def test_pytorch_iris(self):
        classifier = get_tabular_classifier_pt()

        # Test untargeted attack
        attack = BasicIterativeMethod(classifier, eps=1, eps_step=0.1)
        x_test_adv = attack.generate(self.x_test_iris)
        self.assertFalse((self.x_test_iris == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all())
        acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0]
        logger.info("Accuracy on Iris with BIM adversarial examples: %.2f%%", (acc * 100))

        # Test targeted attack
        targets = random_targets(self.y_test_iris, nb_classes=3)
        attack = BasicIterativeMethod(classifier, targeted=True, eps=1, eps_step=0.1, batch_size=128)
        x_test_adv = attack.generate(self.x_test_iris, **{"y": targets})
        self.assertFalse((self.x_test_iris == x_test_adv).all())
        self.assertTrue((x_test_adv <= 1).all())
        self.assertTrue((x_test_adv >= 0).all())

        preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
        self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any())
        acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / self.y_test_iris.shape[0]
        logger.info("Success rate of targeted BIM on Iris: %.2f%%", (acc * 100))
示例#15
0
    def test_classifier_type_check_fail_classifier(self):
        # Use a useless test classifier to test basic classifier properties
        class ClassifierNoAPI:
            pass

        classifier = ClassifierNoAPI
        with self.assertRaises(TypeError) as context:
            _ = BasicIterativeMethod(classifier=classifier)

        self.assertIn(
            'For `BasicIterativeMethod` classifier must be an instance of '
            '`art.classifiers.classifier.Classifier`, the provided classifier is instance of '
            '(<class \'object\'>,).', str(context.exception))
示例#16
0
    def test_classifier_type_check_fail_gradients(self):
        # Use a test classifier not providing gradients required by white-box attack
        from art.classifiers.scikitlearn import ScikitlearnDecisionTreeClassifier
        from sklearn.tree import DecisionTreeClassifier

        classifier = ScikitlearnDecisionTreeClassifier(
            model=DecisionTreeClassifier())
        with self.assertRaises(TypeError) as context:
            _ = BasicIterativeMethod(classifier=classifier)

        self.assertIn(
            'For `BasicIterativeMethod` classifier must be an instance of '
            '`art.classifiers.classifier.ClassifierGradients`, the provided classifier is instance of '
            '(<class \'art.classifiers.scikitlearn.ScikitlearnClassifier\'>,).',
            str(context.exception))
    def test_scikitlearn(self):
        from sklearn.linear_model import LogisticRegression
        from sklearn.svm import SVC, LinearSVC

        from art.classifiers.scikitlearn import ScikitlearnLogisticRegression, ScikitlearnSVC

        scikitlearn_test_cases = {
            LogisticRegression: ScikitlearnLogisticRegression,
            SVC: ScikitlearnSVC,
            LinearSVC: ScikitlearnSVC
        }

        (_, _), (x_test, y_test) = self.iris

        for (model_class, classifier_class) in scikitlearn_test_cases.items():
            model = model_class()
            classifier = classifier_class(model=model, clip_values=(0, 1))
            classifier.fit(x=x_test, y=y_test)

            # Test untargeted attack
            attack = BasicIterativeMethod(classifier, eps=1, eps_step=0.1)
            x_test_adv = attack.generate(x_test)
            self.assertFalse((x_test == x_test_adv).all())
            self.assertTrue((x_test_adv <= 1).all())
            self.assertTrue((x_test_adv >= 0).all())

            preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
            self.assertFalse((np.argmax(y_test, axis=1) == preds_adv).all())
            acc = np.sum(
                preds_adv == np.argmax(y_test, axis=1)) / y_test.shape[0]
            logger.info(
                'Accuracy of ' + classifier.__class__.__name__ +
                ' on Iris with BIM adversarial examples: '
                '%.2f%%', (acc * 100))

            # Test targeted attack
            targets = random_targets(y_test, nb_classes=3)
            attack = BasicIterativeMethod(classifier,
                                          targeted=True,
                                          eps=1,
                                          eps_step=0.1,
                                          batch_size=128)
            x_test_adv = attack.generate(x_test, **{'y': targets})
            self.assertFalse((x_test == x_test_adv).all())
            self.assertTrue((x_test_adv <= 1).all())
            self.assertTrue((x_test_adv >= 0).all())

            preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
            self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any())
            acc = np.sum(
                preds_adv == np.argmax(targets, axis=1)) / y_test.shape[0]
            logger.info(
                'Success rate of ' + classifier.__class__.__name__ +
                ' on targeted BIM on Iris: %.2f%%', (acc * 100))
    def test_scikitlearn(self):
        from sklearn.linear_model import LogisticRegression
        from sklearn.svm import SVC, LinearSVC

        from art.classifiers.scikitlearn import SklearnClassifier

        scikitlearn_test_cases = [
            LogisticRegression(solver="lbfgs", multi_class="auto"),
            SVC(gamma="auto"),
            LinearSVC(),
        ]

        x_test_original = self.x_test_iris.copy()

        for model in scikitlearn_test_cases:
            classifier = SklearnClassifier(model=model, clip_values=(0, 1))
            classifier.fit(x=self.x_test_iris, y=self.y_test_iris)

            # Test untargeted attack
            attack = BasicIterativeMethod(classifier, eps=1, eps_step=0.1, max_iter=5)
            x_test_adv = attack.generate(self.x_test_iris)
            self.assertFalse((self.x_test_iris == x_test_adv).all())
            self.assertTrue((x_test_adv <= 1).all())
            self.assertTrue((x_test_adv >= 0).all())

            preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
            self.assertFalse((np.argmax(self.y_test_iris, axis=1) == preds_adv).all())
            acc = np.sum(preds_adv == np.argmax(self.y_test_iris, axis=1)) / self.y_test_iris.shape[0]
            logger.info(
                "Accuracy of " + classifier.__class__.__name__ + " on Iris with BIM adversarial examples: " "%.2f%%",
                (acc * 100),
            )

            # Test targeted attack
            targets = random_targets(self.y_test_iris, nb_classes=3)
            attack = BasicIterativeMethod(classifier, targeted=True, eps=1, eps_step=0.1, batch_size=128, max_iter=5)
            x_test_adv = attack.generate(self.x_test_iris, **{"y": targets})
            self.assertFalse((self.x_test_iris == x_test_adv).all())
            self.assertTrue((x_test_adv <= 1).all())
            self.assertTrue((x_test_adv >= 0).all())

            preds_adv = np.argmax(classifier.predict(x_test_adv), axis=1)
            self.assertTrue((np.argmax(targets, axis=1) == preds_adv).any())
            acc = np.sum(preds_adv == np.argmax(targets, axis=1)) / self.y_test_iris.shape[0]
            logger.info(
                "Success rate of " + classifier.__class__.__name__ + " on targeted BIM on Iris: %.2f%%", (acc * 100)
            )

            # Check that x_test has not been modified by attack and classifier
            self.assertAlmostEqual(float(np.max(np.abs(x_test_original - self.x_test_iris))), 0.0, delta=0.00001)
示例#19
0
文件: utils.py 项目: voidstrike/AIAML
def build_adversarial(model, optimizer, loss, input_shape, nb_class, method, batch_size=32, pgd_eps=0.3):
    model.eval()
    wmodel = PyTorchClassifier(model, loss, optimizer, input_shape, nb_class)

    if method == 'deepfool':
        adv_crafter = DeepFool(wmodel)
    elif method == 'bim':
        adv_crafter = BasicIterativeMethod(wmodel, batch_size=batch_size)
    elif method == 'jsma':
        adv_crafter = SaliencyMapMethod(wmodel, batch_size=batch_size)
    elif method == 'cw2':
        adv_crafter = CarliniL2Method(wmodel, batch_size=batch_size)
    elif method == 'cwi':
        adv_crafter = CarliniLInfMethod(wmodel, batch_size=batch_size)
    elif method == 'fgsm':
        adv_crafter = FastGradientMethod(wmodel, batch_size=batch_size)
    elif method == 'pgd':
        adv_crafter = ProjectedGradientDescent(wmodel, batch_size=batch_size, eps=pgd_eps)
    else:
        raise NotImplementedError('Unsupported Attack Method: {}'.format(method))

    return adv_crafter
                                         clip_values=(-0.5, 0.5),
                                         use_logits=False)
        elif args.model == 'densenet201':
            model = DenseNet201(weights='imagenet')
            classifier = KerasClassifier(model=model, use_logits=False)

    if args.attack == 'fgsm':
        attack = FastGradientMethod(classifier=classifier,
                                    eps=0.6,
                                    eps_step=0.6,
                                    batch_size=64)

    if args.attack == 'bim':
        if args.d == 'imagenet':
            attack = BasicIterativeMethod(classifier=classifier,
                                          eps=0.6,
                                          batch_size=64,
                                          max_iter=25)
        else:
            attack = BasicIterativeMethod(classifier=classifier,
                                          eps=0.6,
                                          batch_size=64)

    if args.attack == 'jsma':
        attack = SaliencyMapMethod(classifier=classifier, batch_size=64)

    if args.attack == 'c+w':
        attack = CarliniL2Method(classifier=classifier, batch_size=64)

    # generating adversarial of the testing dataset and save it to the folder './adv'
    if args.d == 'mnist' or args.d == 'cifar':
        x_adv = attack.generate(x=x_test)
            )  # only use without modifying batch size (default: 1)
            classifier = KerasClassifier(model=model, use_logits=False)

        for i in range(args.val_start, args.val_end):
            x_test, y_test = pickle.load(
                open(
                    './dataset_imagenet/%s_%s_val_%i.p' %
                    (args.d, args.model, int(i)), 'rb'))

            if args.attack == 'fgsm':
                attack = FastGradientMethod(classifier=classifier,
                                            eps=0.6,
                                            eps_step=0.6)
            if args.attack == 'bim':
                attack = BasicIterativeMethod(classifier=classifier,
                                              eps=0.6,
                                              max_iter=5)
            if args.attack == 'jsma':
                # attack = ProjectedGradientDescent(classifier=classifier, eps=0.6, max_iter=5)
                attack = SaliencyMapMethod(classifier=classifier)
            if args.attack == 'c+w':
                attack = CarliniL2Method(classifier=classifier)
                # attack = CarliniLInfMethod(classifier=classifier, batch_size=1, max_iter=2)
                # attack = FastGradientMethod(classifier=classifier)
            from datetime import datetime
            now = datetime.now()
            current_time_after = now.strftime("%H:%M:%S")
            print("Current Time After=", current_time_after)

            print('Generating adversarial examples----------------')
            print(i, x_test.shape, y_test.shape)
示例#22
0
def run():
    log_dir = cfg.log_dir
    now = datetime.now()
    now_datetime = now.strftime("%d-%m-%y_%H:%M:%S")
    log_dir = os.path.join(
        log_dir,
        f"ESKD_baseline_noise_and_adv_evaluation_{cfg.dataset}_{cfg.student_model_size}_{now_datetime}"
    )
    os.mkdir(log_dir)
    RESULTS_FILE = os.path.join(log_dir, "results.csv")

    DIR_QUERY = os.path.join(cfg.MODEL_DIR4, "*.h5")
    STUDENT_MODEL_WEIGHT_PATHS = glob.glob(DIR_QUERY)
    # generate a list of parsed student model information
    rm_path = cfg.MODEL_DIR4 + "/"
    STUDENT_MODEL_NAMES = [
        x[len(rm_path):] for x in STUDENT_MODEL_WEIGHT_PATHS
    ]
    # parse values out of model names
    sizes = []
    intervals = []
    test_accs = []
    train_accs = []
    for name in STUDENT_MODEL_NAMES:
        size, interval, test_acc, train_acc = re.findall(
            rf"model_(\d+)_(\d+)\|\d+_(\d+.\d+)_(\d+.\d+).", name)[0]
        sizes.append(int(size))
        intervals.append(int(interval))
        test_accs.append(float(test_acc))
        train_accs.append(float(train_acc))

    # create dataframe with the parsed data
    df = pd.DataFrame(list(zip(sizes, intervals, test_accs, train_accs)),
                      columns=['size', 'interval', 'test_acc', 'train_acc'])

    # loading dataset and "centering" data samples
    X_train, Y_train, X_test, Y_test = load_dataset.load_cifar_100(None)
    X_train, X_test = load_dataset.z_standardization(X_train, X_test)

    # creating gaussian noised set of images for evaluation
    X_test_gauss_noised_sets = []
    X_train_gauss_noised_sets = []
    for i in range(len(cfg.SIGMA_VALS4)):
        X_test_gauss_noised = np.zeros_like(X_test)
        X_train_gauss_noised = np.zeros_like(X_train)
        for j in range(len(X_test)):
            X_test_gauss_noised[j] = X_test[j] + np.random.normal(
                cfg.MEAN4, cfg.SIGMA_VALS4[i],
                (X_test[j].shape[0], X_test[j].shape[1], X_test[j].shape[2]))
        for j in range(len(X_train)):
            X_train_gauss_noised[j] = X_train[j] + np.random.normal(
                cfg.MEAN4, cfg.SIGMA_VALS4[i],
                (X_train[j].shape[0], X_train[j].shape[1],
                 X_train[j].shape[2]))
        X_test_gauss_noised_sets.append(X_test_gauss_noised)
        X_train_gauss_noised_sets.append(X_train_gauss_noised)

    # min and max values of the test set for adversarial example generation
    dataset_min = np.min(X_test)
    dataset_max = np.max(X_test)

    # create column in dataframe for each adversarial accuracy value
    zeros = [0 for name in STUDENT_MODEL_NAMES]
    for eps in cfg.EPS_VALS4:
        df[("eps_" + str(format(eps, '.3f')))] = zeros
    for sig in cfg.SIGMA_VALS4:
        df[("sig_" + str(format(sig, '.3f')))] = zeros

    print("[INFO] Loading student model...")
    curr_student_model = knowledge_distillation_models.get_model(
        cfg.dataset, 100, X_train, int(size), cfg.model_type)
    optimizer = SGD(lr=0.01, momentum=0.9, nesterov=True)
    curr_student_model.compile(optimizer=optimizer,
                               loss="categorical_crossentropy",
                               metrics=["accuracy"])
    # curr_student_model.summary()

    for j in range(len(STUDENT_MODEL_WEIGHT_PATHS)):
        print(
            "\n--------------------------Starting new AR step--------------------------"
        )
        # load weights for the student model
        print("[INFO] Loading student model weights...")
        curr_student_model.load_weights(STUDENT_MODEL_WEIGHT_PATHS[j])
        for i in range(max(len(cfg.EPS_VALS4), len(cfg.SIGMA_VALS4))):
            if cfg.USE_ADV_ATTACK:
                if i < len(cfg.EPS_VALS4):
                    # evaluating adversarial attack robustness
                    curr_eps = cfg.EPS_VALS4[i]
                    print(
                        f"[INFO] Evaluating {STUDENT_MODEL_NAMES[j]} with attack at epsilon {format(curr_eps, '.3f')}..."
                    )
                    student_art_model = KerasClassifier(
                        model=curr_student_model,
                        clip_values=(dataset_min, dataset_max),
                        use_logits=False)
                    print(
                        "[INFO] Generating adversarial examples for the current model..."
                    )
                    if cfg.attack_type4 is "fgm":
                        attack_student_model = FastGradientMethod(
                            classifier=student_art_model, eps=curr_eps)
                    elif cfg.attack_type4 is "bim":
                        attack_student_model = BasicIterativeMethod(
                            classifier=student_art_model,
                            eps_step=0.025,
                            eps=curr_eps,
                            max_iter=4,
                            targeted=False,
                            batch_size=1)
                    else:
                        print("[WARNING] attack type not supported!")
                        break
                    X_test_adv = attack_student_model.generate(x=X_test)
                    print(
                        "[INFO] Evaluating student model's adversarial accuracy..."
                    )
                    predictions = student_art_model.predict(X_test_adv)
                    adv_acc = np.sum(
                        np.argmax(predictions, axis=1) == np.argmax(
                            Y_test, axis=1)) / len(Y_test)
                    df.iloc[j,
                            df.columns.
                            get_loc("eps_" +
                                    str(format(curr_eps, '.3f')))] = adv_acc
                    print(f"Adversarial accuracy: {adv_acc}")
            if cfg.USE_GAUSS_NOISE:
                if i < len(cfg.SIGMA_VALS4):
                    # evaluating gaussian noise robustness
                    curr_sig = cfg.SIGMA_VALS4[i]
                    print(
                        f"[INFO] Evaluating {STUDENT_MODEL_NAMES[j]} with Gaussian Noise at sigma {format(curr_sig, '.3f')}..."
                    )
                    predictions2 = curr_student_model.predict(
                        X_test_gauss_noised_sets[i])
                    gauss_acc = np.sum(
                        np.argmax(predictions2, axis=1) == np.argmax(
                            Y_test, axis=1)) / len(Y_test)
                    df.iloc[j,
                            df.columns.
                            get_loc("sig_" +
                                    str(format(curr_sig, '.3f')))] = gauss_acc
                    print("[INFO] Completed adversarial evaluation...")
                    print(f"Gaussian noise accuracy: {gauss_acc}")

        print(
            f"[INFO] Recording adversarial robustness results to {RESULTS_FILE}..."
        )
        df.to_csv(RESULTS_FILE, sep=',')